diff --git a/app/common/enums.js b/app/common/enums.js index d047240..b7a650c 100644 --- a/app/common/enums.js +++ b/app/common/enums.js @@ -12,8 +12,6 @@ const AD_CATEGORY = { CATEGORY_GARAGE: "GARAGE" }; -const IGNORED_USERNAMES = []; - const AD_STATUS = { STATUS_NORMAL: 1, STATUS_RESERVED: 2, @@ -36,7 +34,6 @@ const CRAWLER_AD_TYPE = { module.exports = { AD_TYPE, - IGNORED_USERNAMES, AD_CATEGORY, AD_STATUS, AD_AGENCY, diff --git a/app/config/appConfig.js b/app/config/appConfig.js index b4144cc..5b06652 100644 --- a/app/config/appConfig.js +++ b/app/config/appConfig.js @@ -6,7 +6,10 @@ const APP_URL = ? process.env.APP_URL || "http://market-alarm" : process.env.APP_URL || `${APP_BASE_URL}:${APP_PORT}`; +const DEFAULT_TIMEZONE = "Europe/Sarajevo"; + module.exports = { APP_PORT, - APP_URL + APP_URL, + DEFAULT_TIMEZONE }; diff --git a/app/crawler/crawl.js b/app/crawler/crawl.js index 0f9dcc9..77d4fc9 100644 --- a/app/crawler/crawl.js +++ b/app/crawler/crawl.js @@ -13,23 +13,28 @@ const PostgresSaver = require("./savers/postgres"); const crawlers = [ new OlxCrawler( - OLX_CONFIG.OLX_START_PAGE, - OLX_CONFIG.OLX_END_PAGE, - OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE, [new PostgresSaver()], OLX_CONFIG.OLX_CRAWLER_AD_TYPE, - OLX_CONFIG.OLX_CRAWLER_AD_CATEGORIES + OLX_CONFIG.OLX_CRAWLER_AD_CATEGORIES, + OLX_CONFIG.OLX_MAX_PAGES, + OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE, + OLX_CONFIG.OLX_IGNORED_USERNAMES, + OLX_CONFIG.OLX_DELAY_BETWEEN_PAGES ) ]; async function crawlAll() { for (let crawler of crawlers) { try { - await crawler.crawl(); + const newRealEstates = await crawler.crawl(); + + console.log("Number of new real estates : ", newRealEstates.length); } catch (e) { console.log("Error crawling. Trying next crawler! ", e); } } } -crawlAll(); +(async () => { + await crawlAll(); +})(); diff --git a/app/crawler/crawlerConfig.js b/app/crawler/crawlerConfig.js index d524fce..eb9133a 100644 --- a/app/crawler/crawlerConfig.js +++ b/app/crawler/crawlerConfig.js @@ -2,29 +2,37 @@ require("dotenv").config({ path: "../../.env" }); const { CRAWLER_AD_TYPE, AD_CATEGORY } = require("../common/enums"); -const crawlerAdType = +const olxCrawlerAdType = process.env.OLX_CRAWLER_AD_TYPE !== undefined ? CRAWLER_AD_TYPE[process.env.OLX_CRAWLER_AD_TYPE] : null; -const parsedCrawlerAdCategories = +const olxParsedCrawlerAdCategories = process.env.OLX_CRAWLER_AD_CATEGORIES !== undefined ? process.env.OLX_CRAWLER_AD_CATEGORIES.split(",").map(category => category.trim() ) : ["CATEGORY_FLAT", "CATEGORY_HOUSE"]; -const transformedCrawlerAdCategories = parsedCrawlerAdCategories +const olxIgnoredUsernames = + process.env.OLX_IGNORED_USERNAMES !== undefined + ? process.env.OLX_IGNORED_USERNAMES.split(",").map(username => + username.trim() + ) + : []; + +const transformedCrawlerAdCategories = olxParsedCrawlerAdCategories .map(categoryName => AD_CATEGORY[categoryName]) .filter(category => !!category); const OLX_CONFIG = { - OLX_START_PAGE: parseInt(process.env.OLX_START_PAGE) || 1, - OLX_END_PAGE: parseInt(process.env.OLX_END_PAGE) || 10, + OLX_MAX_PAGES: parseInt(process.env.OLX_MAX_PAGES) || 500, OLX_MAX_RESULTS_PER_PAGE: parseInt(process.env.OLX_MAX_RESULTS_PER_PAGE) || 50, - OLX_CRAWLER_AD_TYPE: crawlerAdType || CRAWLER_AD_TYPE.NONE, - OLX_CRAWLER_AD_CATEGORIES: transformedCrawlerAdCategories + OLX_CRAWLER_AD_TYPE: olxCrawlerAdType || CRAWLER_AD_TYPE.NONE, + OLX_CRAWLER_AD_CATEGORIES: transformedCrawlerAdCategories, + OLX_IGNORED_USERNAMES: olxIgnoredUsernames || [], + OLX_DELAY_BETWEEN_PAGES: parseInt(process.env.OLX_DELAY_BETWEEN_PAGES) || 1000 }; module.exports = { diff --git a/app/crawler/savers/postgres.js b/app/crawler/savers/postgres.js index 5aa0c3a..9ba3391 100644 --- a/app/crawler/savers/postgres.js +++ b/app/crawler/savers/postgres.js @@ -1,3 +1,5 @@ +const moment = require("moment"); + const { bulkUpsertRealEstates } = require("../../helpers/db/realEstate"); class PostgresSaver { @@ -9,7 +11,33 @@ class PostgresSaver { async save(results) { console.log("[POSTGRES] Saving..."); - await bulkUpsertRealEstates(results); + + const savedRecords = await bulkUpsertRealEstates(results); + + if (Array.isArray(savedRecords)) { + const newRealEstates = []; + const existingRealEstates = []; + + for (const savedRecord of savedRecords) { + const { createdAt, updatedAt } = savedRecord; + + const createdAtMoment = moment.utc(createdAt); + const updatedAtMoment = moment.utc(updatedAt); + + if (createdAtMoment.isSame(updatedAtMoment, "second")) { + newRealEstates.push(savedRecord); + } else { + existingRealEstates.push(savedRecord); + } + } + + return { + newRecords: newRealEstates, + existingRecords: existingRealEstates + }; + } else { + throw { message: "[POSTGRES] Failed to save records" }; + } } close() { diff --git a/app/crawler/specific/olx.js b/app/crawler/specific/olx.js index 0bf0a35..b10f55a 100644 --- a/app/crawler/specific/olx.js +++ b/app/crawler/specific/olx.js @@ -1,102 +1,170 @@ "use strict"; -let fetch = require("node-fetch"); -let cheerio = require("cheerio"); +const fetch = require("node-fetch"); +const cheerio = require("cheerio"); +const Promise = require("bluebird"); +const moment = require("moment-timezone"); const { AD_TYPE, AD_CATEGORY, - IGNORED_USERNAMES, AD_AGENCY, AD_STATUS, CRAWLER_AD_TYPE } = require("../../common/enums"); +const { DEFAULT_TIMEZONE } = require("../../config/appConfig"); + const OLX_ENUMS = { - OLX_AD_TYPE: {}, - OLX_AD_CATEGORY: {}, - MAX_DETAIL_FIELDS: 30 + OLX_AD_TYPE: { + [CRAWLER_AD_TYPE.ALL]: "", + [CRAWLER_AD_TYPE.ONLY_SELL]: "&vrsta=samoprodaja", + [CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje" + }, + OLX_AD_CATEGORY: { + [AD_CATEGORY.CATEGORY_FLAT]: "&kategorija=23", + [AD_CATEGORY.CATEGORY_HOUSE]: "&kategorija=24", + [AD_CATEGORY.CATEGORY_LAND]: "&kategorija=29", + [AD_CATEGORY.CATEGORY_OFFICE]: "&kategorija=25", + [AD_CATEGORY.CATEGORY_APARTMENT]: "&kategorija=27", + [AD_CATEGORY.CATEGORY_GARAGE]: "&kategorija=30" + }, + MAX_DETAIL_FIELDS: 30, + OLX_PUBLISHED_DATE_FORMAT: "DD.MM.YYYY. u HH:mm", + OLX_RENEWED_DATE_FORMAT: "DD.MM.YYYY. u HH:mm" }; -OLX_ENUMS.OLX_AD_TYPE[CRAWLER_AD_TYPE.ALL] = ""; -OLX_ENUMS.OLX_AD_TYPE[CRAWLER_AD_TYPE.ONLY_SELL] = "&vrsta=samoprodaja"; -OLX_ENUMS.OLX_AD_TYPE[CRAWLER_AD_TYPE.ONLY_RENT] = "&vrsta=samoizdavanje"; - -OLX_ENUMS.OLX_AD_CATEGORY[AD_CATEGORY.CATEGORY_FLAT] = "&kategorija=23"; -OLX_ENUMS.OLX_AD_CATEGORY[AD_CATEGORY.CATEGORY_HOUSE] = "&kategorija=24"; -OLX_ENUMS.OLX_AD_CATEGORY[AD_CATEGORY.CATEGORY_LAND] = "&kategorija=29"; -OLX_ENUMS.OLX_AD_CATEGORY[AD_CATEGORY.CATEGORY_OFFICE] = "&kategorija=25"; -OLX_ENUMS.OLX_AD_CATEGORY[AD_CATEGORY.CATEGORY_APARTMENT] = "&kategorija=27"; -OLX_ENUMS.OLX_AD_CATEGORY[AD_CATEGORY.CATEGORY_GARAGE] = "&kategorija=30"; - class OlxCrawler { constructor( - fromPage = 1, - toPage = 10, - maxResults = 1000, savers = [], crawlerAdTypes = CRAWLER_AD_TYPE.ALL, crawlerAdCategories = [ AD_CATEGORY.CATEGORY_FLAT, AD_CATEGORY.CATEGORY_HOUSE - ] + ], + maxPages = 1000, + maxResultsPerPage = 100, + ignoredUsernames = [], + delayBetweenPages = 1000 ) { - this.fromPage = fromPage; - this.toPage = toPage; - this.maxResults = maxResults; this.savers = savers; this.baseUrl = "https://www.olx.ba/pretraga?sort_order=desc&sort_po=datum"; this.crawlerAdTypes = crawlerAdTypes; this.crawlerAdCategories = crawlerAdCategories; + this.maxPages = maxPages; + this.maxResultsPerPage = maxResultsPerPage; + this.ignoredUsernames = ignoredUsernames; + this.delayBetweenPages = delayBetweenPages; } async crawl() { console.log("[OLX] Crawler started"); - const crawlAdTypes = this.crawlerAdTypes; const crawlAdCategories = this.crawlerAdCategories; - const urlWithAdTypeFilter = `${this.baseUrl}${OLX_ENUMS.OLX_AD_TYPE[crawlAdTypes]}`; + const newRealEstates = []; - if (crawlAdCategories && crawlAdTypes) { - const asyncPagesIndexingByCategory = []; + if (crawlAdCategories) { + const indexGenerators = []; for (const adCategory of crawlAdCategories) { - asyncPagesIndexingByCategory.push( - this.indexPages( - `${urlWithAdTypeFilter}${OLX_ENUMS.OLX_AD_CATEGORY[adCategory]}` - ) - ); + indexGenerators.push(this.categoryIndexer(adCategory)); } - await Promise.all(asyncPagesIndexingByCategory); + let done = false; + while (!done) { + const categoryIndexerPromises = []; + const generatorsToRemove = []; + for (const indexGenerator of indexGenerators) { + categoryIndexerPromises.push(indexGenerator.next()); + generatorsToRemove.push(false); + } + + const singlePageResults = await Promise.all(categoryIndexerPromises); + const entries = singlePageResults.entries(); + + for (const [index, { value: singlePageResult }] of entries) { + if (singlePageResult) { + const saveResults = await this.saveCrawledResults(singlePageResult); + const { newRecords, existingRecords } = saveResults; + + newRealEstates.push(...newRecords); + + for (const existingRecord of existingRecords) { + const { publishedDate, renewedDate } = existingRecord; + + const publishedDateMoment = moment.utc(publishedDate); + const renewedDateMoment = moment.utc(renewedDate); + + const stopCrawlingThisCategory = publishedDateMoment.isSame( + renewedDateMoment, + "minute" + ); + + if (stopCrawlingThisCategory) { + generatorsToRemove[index] = true; + // console.log("\tGenerator ", index + 1, "has no more new ads"); + break; + } + } + } else { + //Generator returned undefined, remove this generator from array + generatorsToRemove[index] = true; + // console.log("Generator ", index + 1, "has no more pages"); + } + } + + // console.log("Generators state : ", generatorsToRemove); + for (let i = generatorsToRemove.length - 1; i >= 0; i--) { + if (generatorsToRemove[i]) { + // console.log("\tRemove generator ", i + 1); + indexGenerators.splice(i, 1); + } + } + if (indexGenerators.length === 0) { + done = true; + } + + await this.sleep(this.delayBetweenPages); + } } console.log("[OLX] Crawler finished"); + return newRealEstates; } - async indexPages(url) { - const startPage = this.fromPage; - const endPage = this.toPage; - const maxResultsPerPage = this.maxResults; + async *categoryIndexer(adCategory) { + let pageToIndex = 1; - for (let pageNumber = startPage; pageNumber <= endPage; pageNumber++) { - const singlePageResults = await this.indexSinglePage( - url, - pageNumber, - maxResultsPerPage - ); - await this.saveCrawledResults(singlePageResults); - await this.sleep(5000); + const urlAdTypePart = OLX_ENUMS.OLX_AD_TYPE[this.crawlerAdTypes]; + const urlCategoryPart = OLX_ENUMS.OLX_AD_CATEGORY[adCategory]; + if (urlAdTypePart && urlCategoryPart) { + while (true) { + const urlPageToCrawl = `${this.baseUrl}${urlAdTypePart}${urlCategoryPart}&stranica=${pageToIndex}`; + const singlePageResults = await this.indexSinglePage( + urlPageToCrawl, + this.maxResultsPerPage + ); + + if (Array.isArray(singlePageResults) && singlePageResults.length > 0) { + yield singlePageResults; + } else { + return undefined; + } + + ++pageToIndex; + if (pageToIndex === this.maxPages) { + return undefined; + } + } + } else { + return undefined; } } - async indexSinglePage(urlWithoutPageNumber, pageNumber, maxResultsPerPage) { + async indexSinglePage(url, maxResultsPerPage) { try { - const url = `${urlWithoutPageNumber}&stranica=${pageNumber}`; - const res = await fetch(url); const body = await res.text(); const $ = cheerio.load(body); let hrefs = []; - const singlePageResults = []; $("#rezultatipretrage") .find(".listitem") @@ -113,50 +181,60 @@ class OlxCrawler { let actualNoOfResults = hrefs.length <= maxResultsPerPage ? hrefs.length : maxResultsPerPage; + const asyncScraping = []; for (let i = 0; i < actualNoOfResults; i++) { - console.log(`Scraping : ${hrefs[i]}`); - - const adData = await this.scrapeAd(hrefs[i]); - - if (adData) { - singlePageResults.push(adData); - } - await this.sleep(500); + asyncScraping.push(this.scrapeAd(hrefs[i])); } - return singlePageResults; + const scrapedData = await Promise.all(asyncScraping); + const filteredScrapedData = scrapedData.filter(adData => !!adData); + return filteredScrapedData; } catch (e) { console.error("Exception caught:" + e); + return []; } } async scrapeAd(url) { + //console.log("Scraping : ", url); try { const adPageSource = await fetch(url); const body = await adPageSource.text(); const $ = cheerio.load(body); let status = AD_STATUS.STATUS_NORMAL; - const username = $( - "#lg > div.desno2.profil > div:nth-child(2) > div.vrsta1.vrsta_desno > a > div.username > span" - ).text(); + const propertySelectors = { + username: + "#lg > div.desno2.profil > div:nth-child(2) > div.vrsta1.vrsta_desno > a > div.username > span", + title: "#naslovartikla", + descriptions: ".artikal_detaljniopis_tekst", + category: + "#artikal_glavni_div > div.artikal_lijevo > div:nth-child(3) > div > span:nth-child(3) > a > span" + }; - if (IGNORED_USERNAMES.includes((username || "").toLowerCase())) { + const username = $(propertySelectors.username) + .text() + .trim(); + if (this.ignoredUsernames.includes((username || "").toLowerCase())) { return null; } - const title = $("#naslovartikla").text(); - const descriptions = $(".artikal_detaljniopis_tekst"); - const category = $( - "#artikal_glavni_div > div.artikal_lijevo > div:nth-child(3) > div > span:nth-child(3) > a > span" - ).text(); + const title = $(propertySelectors.title) + .text() + .trim(); + const descriptions = $(propertySelectors.descriptions); + const category = $(propertySelectors.category) + .text() + .trim(); //====== PRICE DETECTION AND EXTRACTION ===== let price = null; const normalPriceValue = $("#pc > p:nth-child(2)").text(); const urgentPriceValue = $( "#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p" - ).text(); + ) + .text() + .trim(); if (normalPriceValue && normalPriceValue.length > 0) { price = normalPriceValue; @@ -208,6 +286,39 @@ class OlxCrawler { } const olxIdFieldSelector = `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${otherInformationDivId}) > div:nth-child(4)`; + const publishedDateValueSelector = `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${otherInformationDivId}) > div:nth-child(3) > div.df2.neanimiraj > time`; + const renewedDateFullValueSelector = `#artikal_glavni_div > div.artikal_lijevo > div.op.ob.pop`; + + const publishedDate = $(publishedDateValueSelector) + .text() + .trim(); + + const publishedDateMoment = moment.tz( + publishedDate, + OLX_ENUMS.OLX_PUBLISHED_DATE_FORMAT, + DEFAULT_TIMEZONE + ); + + if (!publishedDateMoment.isValid()) { + throw { message: "Invalid published date ! Check parsing format" }; + } + + const renewedDate = $(renewedDateFullValueSelector) + .data("content") + .trim(); + + const renewedDateMoment = moment.tz( + renewedDate, + OLX_ENUMS.OLX_RENEWED_DATE_FORMAT, + DEFAULT_TIMEZONE + ); + + if (!renewedDateMoment) { + throw { + message: + "Invalid renewed date ! Check how parser parsed renewed date text" + }; + } adType = $( `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${otherInformationDivId}) > div:nth-child(2) > div.df2` @@ -262,7 +373,9 @@ class OlxCrawler { const time = $("time").attr("datetime"); const numberOfViews = $( "#artikal_glavni_div > div.artikal_lijevo > div:nth-child(18) > div:nth-child(6) > div.df2" - ).text(); + ) + .text() + .trim(); //=========================================== //========================================= @@ -300,8 +413,14 @@ class OlxCrawler { price: parsedPrice, area: parsedArea, gardenSize: parsedGardenSize, - shortDescription: descriptions.first().text(), - longDescription: descriptions.last().text(), + shortDescription: descriptions + .first() + .text() + .trim(), + longDescription: descriptions + .last() + .text() + .trim(), streetNumber: 0, streetName: "", locality: "", @@ -312,7 +431,9 @@ class OlxCrawler { country: "", locationLat, locationLong, - adStatus: status + adStatus: status, + publishedDate: publishedDateMoment.toISOString(), + renewedDate: renewedDateMoment.toISOString() }; return data; @@ -334,6 +455,8 @@ class OlxCrawler { return AD_CATEGORY.CATEGORY_HOUSE; case "Poslovni prostori": return AD_CATEGORY.CATEGORY_OFFICE; + case "Apartmani": + return AD_CATEGORY.CATEGORY_APARTMENT; default: return undefined; } @@ -370,6 +493,58 @@ class OlxCrawler { return parseFloat(formattedPriceText); } + parseRenewedDate(renewedDateText) { + const currentMoment = moment.tz(DEFAULT_TIMEZONE); + + if (renewedDateText.includes("Prije mjesec dana")) { + return currentMoment.add(-1, "month"); + } + + if (renewedDateText.includes("Jučer")) { + return currentMoment.add(-1, "day"); + } + + if (renewedDateText.includes("Prije sat")) { + return currentMoment.add(-1, "hour"); + } + + if (renewedDateText.includes("dan")) { + // format for this case should be "Prije N dana" or "Prije N dan" + const dateParts = renewedDateText.split(" "); + if (dateParts[0] === "Prije") { + const numberOfDays = parseInt(dateParts[1]); + return currentMoment.add(-1 * numberOfDays, "days"); + } else { + return undefined; + } + } + + if (renewedDateText.includes("sat")) { + const dateParts = renewedDateText.split(" "); + const parsedHours = + dateParts && dateParts.length > 2 ? parseInt(dateParts[1]) : undefined; + if (!parsedHours) { + return undefined; + } + return currentMoment.add(-1 * parsedHours, "hours"); + } + + const todayVariations = ["min", "sekund", "maloprije"]; + for (const todayVariation of todayVariations) { + if (renewedDateText.includes(todayVariation)) { + return currentMoment; + } + } + + const renewedDateMoment = moment.tz( + renewedDateText, + OLX_ENUMS.OLX_RENEWED_DATE_FORMAT, + DEFAULT_TIMEZONE + ); + + return renewedDateMoment.isValid() ? renewedDateMoment : undefined; + } + async sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } @@ -377,9 +552,13 @@ class OlxCrawler { async saveCrawledResults(results) { const savers = this.savers; - for (const saver of savers) { - await saver.save(results); - } + // for (const saver of savers) { + // await saver.save(results); + // } + + //For now, we use only Postgres saver, so ... + return await savers[0].save(results); + //so that we can use some sequelize options and information when data is inserted } } diff --git a/app/helpers/db/realEstate.js b/app/helpers/db/realEstate.js index 3f32b7c..2443fbb 100644 --- a/app/helpers/db/realEstate.js +++ b/app/helpers/db/realEstate.js @@ -23,10 +23,13 @@ const bulkUpsertRealEstates = async realEstateData => { "longDescription", "gardenSize", "adStatus", - "updatedAt" + "updatedAt", + "renewedDate" ]; + return await db.RealEstate.bulkCreate(realEstateData, { - updateOnDuplicate: fieldsToUpdateIfDuplicate + updateOnDuplicate: fieldsToUpdateIfDuplicate, + returning: true }); } catch (e) { console.log("Error bulk upserting realEstates : ", e); diff --git a/app/migrations/20190923185802-add-published-renewed-dates-to-realEstates.js b/app/migrations/20190923185802-add-published-renewed-dates-to-realEstates.js new file mode 100644 index 0000000..88ae358 --- /dev/null +++ b/app/migrations/20190923185802-add-published-renewed-dates-to-realEstates.js @@ -0,0 +1,21 @@ +"use strict"; + +module.exports = { + up: (queryInterface, Sequelize) => { + return Promise.all([ + queryInterface.addColumn("RealEstates", "publishedDate", { + type: Sequelize.DATE + }), + queryInterface.addColumn("RealEstates", "renewedDate", { + type: Sequelize.DATE + }) + ]); + }, + + down: (queryInterface, Sequelize) => { + return Promise.all([ + queryInterface.removeColumn("RealEstates", "renewedDate"), + queryInterface.removeColumn("RealEstates", "publishedDate") + ]); + } +}; diff --git a/app/models/realEstate.js b/app/models/realEstate.js index 72b76ec..48c85d2 100644 --- a/app/models/realEstate.js +++ b/app/models/realEstate.js @@ -43,14 +43,12 @@ module.exports = (sequelize, DataTypes) => { country: DataTypes.TEXT, locationLat: DataTypes.REAL, locationLong: DataTypes.REAL, - lastTimeCrawled: { - type: DataTypes.DATE, - allowNull: false - }, title: DataTypes.TEXT, shortDescription: DataTypes.TEXT, longDescription: DataTypes.TEXT, - adStatus: DataTypes.INTEGER + adStatus: DataTypes.INTEGER, + publishedDate: DataTypes.DATE, + renewedDate: DataTypes.DATE }); RealEstate.associate = models => { diff --git a/development.env b/development.env index f998e9d..2f2fdc0 100644 --- a/development.env +++ b/development.env @@ -16,8 +16,9 @@ SOURCE_EMAIL=info@saburly.com #=============== CRAWLER SETTINGS===============# #==OLX== -OLX_START_PAGE=Crawler starts from this page -OLX_END_PAGE=Crawler ends with this page (including this page) +OLX_MAX_PAGES=Restrict crawler to this number of pages OLX_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved OLX_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values OLX_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values +OLX_IGNORED_USERNAMES=comma separated list of usernames to ignore +OLX_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page diff --git a/package-lock.json b/package-lock.json index cac150b..ad66538 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2697,9 +2697,9 @@ "integrity": "sha512-bV7f+6l2QigeBBZSM/6yTNq4P2fNpSWj/0e7jQcy87A8e7o2nAfP/34/2ky5Vw4B9S446EtIhodAzkFCcR4dQg==" }, "moment-timezone": { - "version": "0.5.25", - "resolved": "https://registry.npmjs.org/moment-timezone/-/moment-timezone-0.5.25.tgz", - "integrity": "sha512-DgEaTyN/z0HFaVcVbSyVCUU6HeFdnNC3vE4c9cgu2dgMTvjBUBdBzWfasTBmAW45u5OIMeCJtU8yNjM22DHucw==", + "version": "0.5.26", + "resolved": "https://registry.npmjs.org/moment-timezone/-/moment-timezone-0.5.26.tgz", + "integrity": "sha512-sFP4cgEKTCymBBKgoxZjYzlSovC20Y6J7y3nanDc5RoBIXKlZhoYwBoZGe3flwU6A372AcRwScH8KiwV6zjy1g==", "requires": { "moment": ">= 2.9.0" } diff --git a/package.json b/package.json index a1447c3..f55a068 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,8 @@ "express": "^4.16.4", "express-ejs-layouts": "^2.5.0", "express-layout": "^0.1.0", + "moment": "^2.24.0", + "moment-timezone": "^0.5.26", "node-fetch": "^2.3.0", "node-schedule": "^1.3.2", "pg": "^7.10.0",