Compare commits

..

4 Commits

Author SHA1 Message Date
Naida Vatric
d7fcb2a278 Merge branch 'master' after user-agent change into email-density 2020-02-21 14:26:33 +01:00
Naida Vatric
6bad24d735 New query for search req search. 2020-02-21 14:25:10 +01:00
Naida Vatric
7302edceec Changed queries logic again. 2020-02-18 15:04:26 +01:00
Naida Vatric
bd33a6b80e Logs for query check. 2020-02-17 23:24:55 +01:00
16 changed files with 118 additions and 631 deletions

View File

@@ -45,11 +45,6 @@ const USER_AGENT =
process.env.USER_AGENT ||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36";
const USE_SCRAPER_API = process.env.USE_SCRAPER_API || 1; //Default to use
const SCRAPER_API_KEY = process.env.SCRAPER_API_KEY || "";
const NUMBER_OF_CONCURRENT_REQ_SCRAPER_API =
parseInt(process.env.NUMBER_OF_CONCURRENT_REQ_SCRAPER_API) || 10;
module.exports = {
APP_PORT,
APP_URL,
@@ -64,8 +59,5 @@ module.exports = {
STAGING,
CHECK_UP_DAYS,
PROSTOR_LOGIN,
USER_AGENT,
USE_SCRAPER_API,
SCRAPER_API_KEY,
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
USER_AGENT
};

View File

@@ -18,9 +18,7 @@ const {
const {
DEFAULT_TIMEZONE,
PRINT_CRAWLER_DEBUG,
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API,
SCRAPER_API_KEY
PRINT_CRAWLER_DEBUG
} = require("../../config/appConfig");
const OLX_ENUMS = {
@@ -46,8 +44,6 @@ const OLX_ENUMS = {
const { OLX_FORCE_CRAWL } = require("../specificConfigs/olx");
const scraperapiClient = require("scraperapi-sdk")(SCRAPER_API_KEY);
class OlxCrawler {
constructor(
savers = [],
@@ -194,40 +190,12 @@ class OlxCrawler {
let actualNoOfResults =
hrefs.length <= maxResultsPerPage ? hrefs.length : maxResultsPerPage;
const scrapedData = [];
for (
let i = 0;
i <= actualNoOfResults;
i = i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
) {
const concurrentUrlsToScrape = hrefs.slice(
i,
i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
);
//Before it send n req to scraperAPI it send preflight request to check if we have enough concurrent req availabe
//It does not send "real" req until approven internaly
let availableConcurrentReqSlots = false;
do {
availableConcurrentReqSlots = await this.checkAvailableConcurrentReqSlots(
concurrentUrlsToScrape.length
);
} while (availableConcurrentReqSlots !== true);
//
console.log(
`OLX - Sending requests from ${i} to ${i +
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API}.`
);
console.log(`OLX - Urls sent to scrape: `, concurrentUrlsToScrape);
//
const concurrentReqScraperApi = concurrentUrlsToScrape.map(url =>
this.scrapeAd(url)
);
const concurrentReqData = await Promise.all(concurrentReqScraperApi);
concurrentReqData.forEach(reqData => scrapedData.push(reqData));
const asyncScraping = [];
for (let i = 0; i < actualNoOfResults; i++) {
asyncScraping.push(this.scrapeAd(hrefs[i]));
}
const scrapedData = await Promise.all(asyncScraping);
const filteredScrapedData = scrapedData.filter(adData => !!adData);
return filteredScrapedData;
} catch (e) {
@@ -238,7 +206,6 @@ class OlxCrawler {
async scrapeAd(url) {
// console.log("Scraping : ", url);
try {
const adPageSource = await fetch(url);
const body = await adPageSource.text();
@@ -271,28 +238,15 @@ class OlxCrawler {
//====== PRICE DETECTION AND EXTRACTION =====
let price = null;
let normalPrice = null;
let urgentPrice = null;
const normalPriceValue = $("#pc > p:nth-child(2)")
.text()
.trim();
const normalPriceValue = $("#pc > p:nth-child(2)").text();
const urgentPriceValue = $(
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p"
)
.text()
.trim();
//For cases where price is given in discount manner - different from default parsing
const discountPriceValue = $(
"#artikal_glavni_div > div.artikal_lijevo > div.op.pop > p"
)
.text()
.trim();
if (normalPriceValue && normalPriceValue.length > 0) {
normalPrice = normalPriceValue
.replace(/\r\n|\n|\r/gm, "")
.replace("KM", "")
.trim();
price = normalPriceValue;
if (
$("#pc > p.n")
.text()
@@ -302,35 +256,21 @@ class OlxCrawler {
} else {
status = AD_STATUS.STATUS_NORMAL;
}
} else if (discountPriceValue && discountPriceValue.length > 0) {
status = AD_STATUS.STATUS_URGENT;
const priceValues = discountPriceValue.split("KM");
normalPrice = priceValues[0].trim();
} else {
console.log("Body:", body);
throw { message: "Can't find normal price" };
}
if (urgentPriceValue && urgentPriceValue.length > 0) {
const priceValues = urgentPriceValue.replace("Cijena", "").split("KM");
} else if (urgentPriceValue && urgentPriceValue.length > 0) {
const priceValues = urgentPriceValue.split("KM");
//priceValues will contain values like ["100000", "90000", ...], second element is urgent price
if (priceValues.length > 0) {
if (priceValues[0].trim().indexOf("Hitno") != -1) {
urgentPrice = priceValues[0].replace("Hitno", "").trim();
status = AD_STATUS.STATUS_URGENT;
} else {
urgentPrice = priceValues[0].trim();
}
} else if (discountPriceValue && discountPriceValue.length > 0) {
status = AD_STATUS.STATUS_URGENT;
const priceValues = discountPriceValue.split("KM");
urgentPrice = priceValues[1].trim();
if (priceValues.length > 1) {
price = priceValues[1].trim();
status = AD_STATUS.STATUS_DISCOUNTED;
} else {
throw { message: "Can't find urgent price" };
}
} else {
throw {
message: "Can't find price (it is not normal nor urgent price ?)"
};
}
price = status === AD_STATUS.STATUS_URGENT ? urgentPrice : normalPrice;
//====== OTHER AD INFORMATION ===============
let adType = null;
let olxId = null;
@@ -338,7 +278,7 @@ class OlxCrawler {
let otherInformationDivId;
//We need to locate DIV ID where other information are stored
for (let possibleId = 1; possibleId <= 30; possibleId++) {
for (let possibleId = 10; possibleId <= 20; possibleId++) {
const adTypeFieldTitle = $(
`#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${possibleId}) > div:nth-child(2) > div.df1`
)
@@ -715,7 +655,6 @@ class OlxCrawler {
} catch (e) {
console.error("Exception caught: " + e.message, "\r\nURL:", url);
}
return null;
}
@@ -829,9 +768,6 @@ class OlxCrawler {
if (!priceText) {
return NaN;
}
if (priceText === "Po dogovoru") {
return null;
}
const formattedPriceText = priceText.replace(".", "").replace(",", ".");
return parseFloat(formattedPriceText);
}
@@ -931,28 +867,8 @@ class OlxCrawler {
console.log("sprat = NEPOZNATO [", floorText, "]");
return null;
}
async checkAvailableConcurrentReqSlots(numberOfNeededConcurrentReq) {
try {
const scraperApiAccountInfo = await scraperapiClient.account();
const numberOfUsedConcurrentReq =
scraperApiAccountInfo.concurrentRequests;
const limitOfConcurrentReq = scraperApiAccountInfo.concurrencyLimit;
//Buffer of requests to prevent errors with prefligh requests
const bufferNumberOfReq = 3;
const numberOfAvailableConcurrentReq =
limitOfConcurrentReq - bufferNumberOfReq - numberOfUsedConcurrentReq;
if (numberOfNeededConcurrentReq <= numberOfAvailableConcurrentReq) {
return true;
} else {
return false;
}
} catch (err) {
return false;
}
}
async sleep(ms) {
// console.log("Sleep for:", ms);
return new Promise(resolve => setTimeout(resolve, ms));
}

View File

@@ -4,7 +4,6 @@ const fetch = require("../../helpers/fetchWrapper");
const cheerio = require("cheerio");
const moment = require("moment-timezone");
const FormData = require("form-data");
const nodeFetch = require("node-fetch");
const {
AD_TYPE,
@@ -198,7 +197,7 @@ class ProstorCrawler {
// console.log("[PROSTOR] Scraping : ", url);
try {
const adPageSource = await nodeFetch(url, {
const adPageSource = await fetch(url, {
headers: { Cookie: prostorCookie }
});
const body = await adPageSource.text();
@@ -428,7 +427,7 @@ class ProstorCrawler {
}
try {
const res = await nodeFetch(url, {
const res = await fetch(url, {
headers: { Cookie: prostorCookie }
});
const body = await res.text();
@@ -592,7 +591,7 @@ class ProstorCrawler {
formData.append("email", PROSTOR_LOGIN.EMAIL);
formData.append("password", PROSTOR_LOGIN.PASSWORD);
return nodeFetch("https://prostor.ba/moj-prostor/prijava", {
return fetch("https://prostor.ba/moj-prostor/prijava", {
method: "POST",
body: formData,
headers: { Cookie: prostorCookie }
@@ -619,12 +618,9 @@ class ProstorCrawler {
});
}
async getCookies() {
const getResponse = await nodeFetch(
"https://prostor.ba/moj-prostor/prijava",
{
headers: { Cookie: "" }
}
);
const getResponse = await fetch("https://prostor.ba/moj-prostor/prijava", {
headers: { Cookie: "" }
});
const raw = getResponse.headers.raw()["set-cookie"];
const cookie = raw
.map(datastring => {

View File

@@ -16,8 +16,7 @@ const {
const {
PRINT_CRAWLER_DEBUG,
DEFAULT_TIMEZONE,
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
DEFAULT_TIMEZONE
} = require("../../config/appConfig");
const { SALJIC_FORCE_CRAWL } = require("../specificConfigs/saljic");
@@ -85,7 +84,6 @@ class SaljicCrawler {
for (const [index, { value: singlePageResult }] of entries) {
if (singlePageResult) {
const saveResults = await this.saveCrawledResults(singlePageResult);
const { newRecords } = saveResults;
newRealEstates.push(...newRecords);
@@ -205,32 +203,13 @@ class SaljicCrawler {
? hrefsAbs.length
: maxResultsPerPage;
const scrapedData = [];
for (
let i = 0;
i <= actualNoOfResults;
i = i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
) {
const concurrentUrlsToScrape = hrefsAbs.slice(
i,
i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
);
const concurrentAdTypesOfReq = adTypes.slice(
i,
i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
);
const concurrentReqScraperApi = concurrentUrlsToScrape.map(
(url, index) => this.scrapeAd(url, concurrentAdTypesOfReq[index])
);
const concurrentReqData = await Promise.all(concurrentReqScraperApi);
concurrentReqData.forEach(reqData => scrapedData.push(reqData));
const asyncScraping = [];
for (let i = 0; i < actualNoOfResults; i++) {
asyncScraping.push(this.scrapeAd(hrefsAbs[i], adTypes[i]));
}
const scrapedData = await Promise.all(asyncScraping);
const filteredScrapedData = scrapedData.filter(adData => !!adData);
return filteredScrapedData;
} catch (e) {
console.error("[SALJIC] Exception caught:" + e);
@@ -238,28 +217,22 @@ class SaljicCrawler {
}
}
async scrapeAd(url, adTypeAttribute) {
//console.log("[SALJIC] Scraping : ", url);
async scrapeAd(url, adType) {
// console.log("[SALJIC] Scraping : ", url);
try {
const adPageSource = await fetch(url);
const body = await adPageSource.text();
const $ = cheerio.load(body);
//Throws error if req to Scraper API proxy wasn't succesful and responds with error
if (body.indexOf("<html>") === -1) {
throw { message: "Scraper API server error." };
}
// No information for status ex. PRODAN
const status = AD_STATUS.STATUS_NORMAL;
//Extracting agency ID from url
const agencyObjectId = url
? parseInt(url.substring(46, url.length))
: null;
const agencyObjectId = parseInt(url.substring(46, url.length));
//Extracting main properties
const propertySelectors = {
title:
"div.content-wrap > div.container.clearfix.wpc > div.col-md-8.nobottommargin > div.single-post.nobottommargin > div.entry.clearfix > div.entry-title > h2",
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-title > h2",
price:
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.topmargin-sm.single-product > div.product > div.product-price > ins",
streetName:
@@ -270,7 +243,6 @@ class SaljicCrawler {
latAndLong:
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > div.gmap.bottommargin > iframe"
};
const title = $(propertySelectors.title)
.text()
.replace(/(\r\n|\n|\r)/gm, "")
@@ -300,15 +272,14 @@ class SaljicCrawler {
.trim();
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
let tmpLatLong;
let latText;
let longText;
if (latAndLongSrc && latAndLongSrc.indexOf("openstreetmap") !== -1) {
tmpLatLong = latAndLongSrc.split("marker=")[1];
latText = tmpLatLong.split("%2C")[0];
longText = tmpLatLong.split("%2C")[1];
}
const latText = latAndLongSrc.substring(
latAndLongSrc.indexOf("marker=") + 7,
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker="))
);
const longText = latAndLongSrc.substring(
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3,
latAndLongSrc.length
);
const locationLat = parseFloat(latText) || null;
const locationLong = parseFloat(longText) || null;
@@ -357,11 +328,11 @@ class SaljicCrawler {
let numberOfViewsKivi = null;
let streetNumber = 0;
let adStatus = status;
let adType = adTypeAttribute;
let shortDescription = descriptions
? descriptions.substring(0, descriptions.indexOf("."))
: "";
let longDescription = descriptions || "";
let shortDescription = descriptions.substring(
0,
descriptions.indexOf(".")
);
let longDescription = descriptions;
//Extracting data - Glavne karakteristike
let mainFieldIndex = 1;
do {
@@ -372,14 +343,10 @@ class SaljicCrawler {
.replace(/[\n\r\t]/gm, "")
.trim();
const mainFieldTitle = mainField
? mainField.substring(0, mainField.indexOf(" "))
: "";
const mainFieldTitle = mainField.substring(0, mainField.indexOf(" "));
const mainFieldValue = mainField
? mainField
.substring(mainField.indexOf(" "), mainField.length)
.trim()
: "";
.substring(mainField.indexOf(" "), mainField.length)
.trim();
switch (mainFieldTitle) {
case "Površina":
@@ -441,7 +408,6 @@ class SaljicCrawler {
additionalField.length
)
.trim();
realEstateType = this.getAdCategoryId(categoryTmp);
} else {
switch (additionalField) {
@@ -532,11 +498,6 @@ class SaljicCrawler {
const region = "";
const entity = "";
const country = "";
//Throws error if realEstateType is null - not read. Still dont know why?
if (realEstateType === null) {
console.log("Body:", body);
throw { message: "Couldn't read real estate type." };
}
const data = {
url,
@@ -606,7 +567,6 @@ class SaljicCrawler {
} catch (e) {
console.error("Exception caught: " + e.message, "\r\nURL:", url);
}
return null;
}

View File

@@ -3,7 +3,6 @@ const db = require("../../models/index");
const sequelize = require("sequelize");
const Op = sequelize.Op;
const { AD_CATEGORY } = require("../../common/enums");
const { CHECK_UP_DAYS } = require("../../config/appConfig");
const getSearchRequest = async searchRequestId => {
try {
@@ -17,22 +16,6 @@ const getSearchRequest = async searchRequestId => {
const createSearchRequest = async (searchRequestFields = {}) => {
return await db.SearchRequest.create(searchRequestFields);
};
const findAllRequestsForCheckUp = async () => {
const checkUpOffset = 24 * 60 * 60 * 1000 * CHECK_UP_DAYS; //in miliseconds
const checkupDate = new Date();
checkupDate.setTime(checkupDate.getTime() - checkUpOffset);
const dateQuery = {
notifiedAt: {
[Op.lte]: checkupDate
}
};
const allRequestsForCheckUp = await db.SearchRequest.findAll({
where: dateQuery
});
return allRequestsForCheckUp;
};
const findSearchRequestsForRealEstate = async realEstate => {
const {
@@ -173,33 +156,11 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else {
// If real estate dont have defined number of rooms ex. null
//It returns requests that didn't choose number of rooms - also null
//Or ones that picked some values but also picked to includeIncomplete ads (or default)
//It returns all search requests except for ones that dont want incpomlete ads
numberOfRoomsQuery = {
[Op.or]: [
{
[Op.and]: [
{
numberOfRoomsMin: {
[Op.is]: null
}
},
{
numberOfRoomsMax: {
[Op.is]: null
}
}
]
},
{
includeIncompleteAds: {
[Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
}
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
@@ -229,30 +190,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else {
numberOfFloorsQuery = {
[Op.or]: [
{
[Op.and]: [
{
numberOfFloorsMin: {
[Op.is]: null
}
},
{
numberOfFloorsMax: {
[Op.is]: null
}
}
]
},
{
includeIncompleteAds: {
[Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
}
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
@@ -281,30 +221,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else {
floorQuery = {
[Op.or]: [
{
[Op.and]: [
{
floorMin: {
[Op.is]: null
}
},
{
floorMax: {
[Op.is]: null
}
}
]
},
{
includeIncompleteAds: {
[Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
}
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
@@ -313,7 +232,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
//If user dont check checkbox for ex. elevator it does not mean he only wants no elevator
//If real estate characteristic =true find all req, one that wants charachertistic or dont care - dont need query
//If real estate characteristic = false, find all req exept for ones that wants characteristic to be true
//If real estate characteristic = null, dont know if true or false, find req that dont care or want char and want incomplete ads
//If real estate characteristic = null, dont know if true or false, find all req except ones that dont want incomplete ads
let balconyQuery = {};
if (realEstateTypeObject.hasBalconyProp && balcony !== true) {
if (balcony === false) {
@@ -324,30 +243,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else if (balcony === null) {
balconyQuery = {
[Op.or]: [
{
balcony: {
[Op.ne]: true
}
},
{
[Op.and]: [
{
balcony: {
[Op.eq]: true
}
},
{
includeIncompleteAds: {
[Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
}
}
]
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
@@ -361,30 +259,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else if (newBuilding === null) {
newBuildingQuery = {
[Op.or]: [
{
newBuilding: {
[Op.ne]: true
}
},
{
[Op.and]: [
{
newBuilding: {
[Op.eq]: true
}
},
{
includeIncompleteAds: {
[Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
}
}
]
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
@@ -398,33 +275,13 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else if (elevator === null) {
elevatorQuery = {
[Op.or]: [
{
elevator: {
[Op.ne]: true
}
},
{
[Op.and]: [
{
elevator: {
[Op.eq]: true
}
},
{
includeIncompleteAds: {
[Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
}
}
]
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
//General query consists of each individual query
const query = {
adType,
@@ -476,6 +333,5 @@ const findSearchRequestsForRealEstate = async realEstate => {
module.exports = {
getSearchRequest,
createSearchRequest,
findSearchRequestsForRealEstate,
findAllRequestsForCheckUp
findSearchRequestsForRealEstate
};

View File

@@ -2,6 +2,7 @@
const db = require("../../models/index");
const sequelize = require("sequelize");
const Op = sequelize.Op;
const { CHECK_UP_DAYS } = require("../../config/appConfig");
const findRealEstatesForSearchRequest = async searchRequestId => {
const query = {
@@ -42,6 +43,42 @@ const findNotNotifiedMatches = async () => {
return matchingRecords;
};
const findAllRequestsForCheckUp = async () => {
//First we find IDs of search request that don't need to be emailed for check up - to EXCLUDE
//The ones that received notification for real estate CHECK_UP_DAYS days from now
const date = new Date();
const checkUpDate = date.getDate() - CHECK_UP_DAYS;
date.setDate(checkUpDate);
const dateQuery = {
createdAt: {
[Op.gte]: date
}
};
const excludedMatches = await db.SearchRequestMatch.findAll({
attributes: ["searchRequestId"],
where: dateQuery,
order: [["searchRequestId", "ASC"]]
});
const excludedRequestsAll = excludedMatches.map(match => {
return match.dataValues.searchRequestId;
});
//Removing duplicate search request id-s for optimization
const excludedRequests = [...new Set(excludedRequestsAll)];
const query = {
subscribed: true,
id: {
[Op.notIn]: excludedRequests
}
};
const allRequestsForCheckUp = await db.SearchRequest.findAll({
where: query
});
return allRequestsForCheckUp;
};
const addMatches = async matchingRecords => {
return await db.SearchRequestMatch.bulkCreate(matchingRecords, {
@@ -52,5 +89,6 @@ const addMatches = async matchingRecords => {
module.exports = {
findRealEstatesForSearchRequest,
addMatches,
findNotNotifiedMatches
findNotNotifiedMatches,
findAllRequestsForCheckUp
};

View File

@@ -1,9 +1,5 @@
const nodeFetch = require("node-fetch");
const {
USER_AGENT,
USE_SCRAPER_API,
SCRAPER_API_KEY
} = require("../config/appConfig");
const { USER_AGENT } = require("../config/appConfig");
const fetch = async (url, options = {}) => {
const newOptions = Object.assign({}, options);
@@ -11,14 +7,7 @@ const fetch = async (url, options = {}) => {
newOptions["headers"] = {};
}
newOptions["headers"]["User-Agent"] = USER_AGENT;
const urlAdaptedForScraping = USE_SCRAPER_API
? `http://api.scraperapi.com/?api_key=${SCRAPER_API_KEY}&url=${url}`
: url;
//
// console.log("Url for scraping:", urlAdaptedForScraping);
return nodeFetch(urlAdaptedForScraping, newOptions);
return nodeFetch(url, newOptions);
};
module.exports = fetch;

View File

@@ -1,14 +0,0 @@
"use strict";
module.exports = {
up: (queryInterface, Sequelize) => {
return queryInterface.addColumn("SearchRequests", "notifiedAt", {
type: Sequelize.DATE,
defaultValue: new Date()
});
},
down: (queryInterface, Sequelize) => {
return queryInterface.removeColumn("SearchRequests", "notifiedAt");
}
};

View File

@@ -82,11 +82,7 @@ module.exports = (sequelize, DataTypes) => {
floorMin: DataTypes.INTEGER,
floorMax: DataTypes.INTEGER,
accessRoadType: DataTypes.TEXT,
heatingType: DataTypes.TEXT,
notifiedAt: {
type: DataTypes.DATE,
defaultValue: new Date()
}
heatingType: DataTypes.TEXT
});
return SearchRequest;

View File

@@ -15,10 +15,9 @@ const {
} = require("../helpers/emailContentGenerator");
const {
findNotNotifiedMatches,
findAllRequestsForCheckUp,
findRealEstatesForSearchRequest
} = require("../helpers/db/searchRequestMatch");
const { findAllRequestsForCheckUp } = require("../helpers/db/searchRequest");
const { sendEmail } = require("../services/emailService");
const notifyForNewRealEstates = async newRealEstates => {
@@ -36,7 +35,7 @@ const notifyForNewSearchRequest = async searchRequest => {
matchingRealEstates
);
const { email } = searchRequest;
//In case of the new search req, notifiedAt column is populated with default value - now (moment of creation)
await sendEmail(
email,
`${stagingTag} Kivi - novi zahtjev za pretragu`,
@@ -77,10 +76,6 @@ const notifyMatches = async (matches, dailyNotification = false) => {
sendEmailPromise.catch(err =>
console.log("[Email Sending Failed]", err)
);
//Change time of notified At for searchReq
searchRequest.notifiedAt = new Date();
searchRequest.save();
}
}
}
@@ -148,12 +143,8 @@ const checkUpNotify = async () => {
const sendEmailPromise = sendEmail(email, emailSubject, emailContent);
asyncSendEmailActions.push(sendEmailPromise);
sendEmailPromise.catch(err => console.log("[Email Sending Failed]", err));
//Change time of notified At for searchReq
searchRequest.notifiedAt = new Date();
searchRequest.save();
}
await Promise.all(asyncSendEmailActions);*/
await Promise.all(asyncSendEmailActions); */
};
module.exports = {

View File

@@ -22,11 +22,6 @@ GA_ID=Google Analytics ID
#=============== GOOGLE MAPS =============#
API_MAP_KEY=(your-key-here)
#=============== SCRAPER API SUPORT =============#
USE_SCRAPER_API= To turn it on (1) or off (0)
SCRAPER_API_KEY= Key for Scraper api
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API= Number of requests to send concurrently to Srcaper API proxy
#=============== AWS SDK EMAIL SETTINGS =======#
AWS_KEY_ID=(your-key-here)
AWS_SECRET_ACCESS_KEY=(your-key-here)
@@ -37,7 +32,6 @@ SOURCE_EMAIL=info@saburly.com
CRAWLER_INTERVAL=Interval to run cralwer(s), in seconds
STOP_CRAWLER=Non-zero value will skip crawler execution
PRINT_CRAWLER_DEBUG_INFO=Non-zero value will print crawler debugging info to the server console
#==OLX==
OLX_MAX_PAGES=Restrict crawler to this number of pages
OLX_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
@@ -46,7 +40,6 @@ OLX_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be
OLX_IGNORED_USERNAMES=comma separated list of usernames to ignore
OLX_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page
OLX_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
#==RENTAL==
RENTAL_MAX_PAGES=Restrict crawler to this number of pages
RENTAL_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved

187
package-lock.json generated
View File

@@ -147,14 +147,6 @@
}
}
},
"argparse": {
"version": "1.0.10",
"resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
"integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
"requires": {
"sprintf-js": "~1.0.2"
}
},
"arr-diff": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
@@ -203,21 +195,6 @@
"integrity": "sha1-WWZ/QfrdTyDMvCu5a41Pf3jsA2c=",
"dev": true
},
"async": {
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
"requires": {
"lodash": "^4.17.14"
},
"dependencies": {
"lodash": {
"version": "4.17.15",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
"integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A=="
}
}
},
"async-each": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/async-each/-/async-each-1.0.3.tgz",
@@ -648,11 +625,6 @@
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU="
},
"colors": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/colors/-/colors-1.0.3.tgz",
"integrity": "sha1-BDP0TYCWgP3rYO0mDxsMJi6CpAs="
},
"combined-stream": {
"version": "1.0.7",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.7.tgz",
@@ -758,25 +730,6 @@
"resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz",
"integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac="
},
"coveralls": {
"version": "3.0.9",
"resolved": "https://registry.npmjs.org/coveralls/-/coveralls-3.0.9.tgz",
"integrity": "sha512-nNBg3B1+4iDox5A5zqHKzUTiwl2ey4k2o0NEcVZYvl+GOSJdKBj4AJGKLv6h3SvWch7tABHePAQOSZWM9E2hMg==",
"requires": {
"js-yaml": "^3.13.1",
"lcov-parse": "^1.0.0",
"log-driver": "^1.2.7",
"minimist": "^1.2.0",
"request": "^2.88.0"
},
"dependencies": {
"minimist": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
"integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ="
}
}
},
"create-error-class": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/create-error-class/-/create-error-class-3.0.2.tgz",
@@ -829,11 +782,6 @@
"resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz",
"integrity": "sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg=="
},
"cycle": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/cycle/-/cycle-1.0.3.tgz",
"integrity": "sha1-IegLK+hYD5i0aPN5QwZisEbDStI="
},
"d": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/d/-/d-1.0.1.tgz",
@@ -1112,11 +1060,6 @@
"prettier-linter-helpers": "^1.0.0"
}
},
"esprima": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
"integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A=="
},
"etag": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
@@ -1331,11 +1274,6 @@
"resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz",
"integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU="
},
"eyes": {
"version": "0.1.8",
"resolved": "https://registry.npmjs.org/eyes/-/eyes-0.1.8.tgz",
"integrity": "sha1-Ys8SAjTGg3hdkCNIqADvPgzCC8A="
},
"fast-deep-equal": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz",
@@ -2537,15 +2475,6 @@
"nopt": "~4.0.1"
}
},
"js-yaml": {
"version": "3.13.1",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.13.1.tgz",
"integrity": "sha512-YfbcO7jXDdyj0DGxYVSlSeQNHbD7XPWvrVWeVUujrQEoZzWJIRrCPoyk6kL6IAjAG2IolMK4T0hNUe0HOUs5Jw==",
"requires": {
"argparse": "^1.0.7",
"esprima": "^4.0.0"
}
},
"jsbn": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
@@ -2608,11 +2537,6 @@
"invert-kv": "^2.0.0"
}
},
"lcov-parse": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/lcov-parse/-/lcov-parse-1.0.0.tgz",
"integrity": "sha1-6w1GtUER68VhrLTECO+TY73I9+A="
},
"locate-path": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-3.0.0.tgz",
@@ -2627,11 +2551,6 @@
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz",
"integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg=="
},
"log-driver": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/log-driver/-/log-driver-1.2.7.tgz",
"integrity": "sha512-U7KCmLdqsGHBLeWqYlFA0V0Sl6P08EE1ZrmA9cxjUE0WVqT9qnyVDPz1kzpFEP0jdJuFnasWIfSd7fsaNXkpbg=="
},
"long-timeout": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/long-timeout/-/long-timeout-0.1.1.tgz",
@@ -3302,20 +3221,6 @@
"integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==",
"dev": true
},
"promise-request-retry": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/promise-request-retry/-/promise-request-retry-1.0.2.tgz",
"integrity": "sha512-zZmu19chRtC6TYeAZaELF8s+Zotl48M6bRnIVjcUrObEjpI4wk+2VpGVRaRgCG6isOqsK4c5IMY7t59Ff2ia0A==",
"requires": {
"async": "^2.6.0",
"bluebird": "^3.5.1",
"coveralls": "^3.0.0",
"req-cwd": "^2.0.0",
"request": "^2.85.0",
"request-promise": "^4.2.2",
"winston": "^2.4.0"
}
},
"proto-list": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/proto-list/-/proto-list-1.2.4.tgz",
@@ -3510,22 +3415,6 @@
"integrity": "sha1-jcrkcOHIirwtYA//Sndihtp15jc=",
"dev": true
},
"req-cwd": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/req-cwd/-/req-cwd-2.0.0.tgz",
"integrity": "sha1-1AgrTURZgDZkD7c93qAe1T20nrw=",
"requires": {
"req-from": "^2.0.0"
}
},
"req-from": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/req-from/-/req-from-2.0.0.tgz",
"integrity": "sha1-10GI5H+TeW9Kpx327jWuaJ8+DnA=",
"requires": {
"resolve-from": "^3.0.0"
}
},
"request": {
"version": "2.88.0",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz",
@@ -3565,32 +3454,6 @@
}
}
},
"request-promise": {
"version": "4.2.5",
"resolved": "https://registry.npmjs.org/request-promise/-/request-promise-4.2.5.tgz",
"integrity": "sha512-ZgnepCykFdmpq86fKGwqntyTiUrHycALuGggpyCZwMvGaZWgxW6yagT0FHkgo5LzYvOaCNvxYwWYIjevSH1EDg==",
"requires": {
"bluebird": "^3.5.0",
"request-promise-core": "1.1.3",
"stealthy-require": "^1.1.1",
"tough-cookie": "^2.3.3"
}
},
"request-promise-core": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.3.tgz",
"integrity": "sha512-QIs2+ArIGQVp5ZYbWD5ZLCY29D5CfWizP8eWnm8FoGD1TX61veauETVQbrV60662V0oFBkrDOuaBI8XgtuyYAQ==",
"requires": {
"lodash": "^4.17.15"
},
"dependencies": {
"lodash": {
"version": "4.17.15",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
"integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A=="
}
}
},
"require-directory": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
@@ -3609,11 +3472,6 @@
"path-parse": "^1.0.6"
}
},
"resolve-from": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-3.0.0.tgz",
"integrity": "sha1-six699nWiBvItuZTM17rywoYh0g="
},
"resolve-url": {
"version": "0.2.1",
"resolved": "https://registry.npmjs.org/resolve-url/-/resolve-url-0.2.1.tgz",
@@ -3658,16 +3516,6 @@
"resolved": "https://registry.npmjs.org/sax/-/sax-1.2.1.tgz",
"integrity": "sha1-e45lYZCyKOgaZq6nSEgNgozS03o="
},
"scraperapi-sdk": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/scraperapi-sdk/-/scraperapi-sdk-1.0.3.tgz",
"integrity": "sha512-wFzdVptJHAA13HWMxR6DxsesA95cx0eBvylh2CHH9UmzBYor7N54jxgL473IW1VZEferSCNpwlW2R/B3zTPDsQ==",
"requires": {
"promise-request-retry": "^1.0.2",
"request": "^2.88.0",
"request-promise": "^4.2.5"
}
},
"semver": {
"version": "5.6.0",
"resolved": "https://registry.npmjs.org/semver/-/semver-5.6.0.tgz",
@@ -3990,11 +3838,6 @@
"extend-shallow": "^3.0.0"
}
},
"sprintf-js": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
"integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw="
},
"sshpk": {
"version": "1.16.1",
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz",
@@ -4011,11 +3854,6 @@
"tweetnacl": "~0.14.0"
}
},
"stack-trace": {
"version": "0.0.10",
"resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz",
"integrity": "sha1-VHxws0fo0ytOEI6hoqFZ5f3eGcA="
},
"static-extend": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/static-extend/-/static-extend-0.1.2.tgz",
@@ -4042,11 +3880,6 @@
"resolved": "https://registry.npmjs.org/statuses/-/statuses-1.4.0.tgz",
"integrity": "sha512-zhSCtt8v2NDrRlPQpCNtw/heZLtfUDqxBM1udqikb/Hbk52LK4nQSwr10u77iopCW5LsyHpuXS0GnEc48mLeew=="
},
"stealthy-require": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/stealthy-require/-/stealthy-require-1.1.1.tgz",
"integrity": "sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks="
},
"string-width": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz",
@@ -4518,26 +4351,6 @@
"string-width": "^2.1.1"
}
},
"winston": {
"version": "2.4.4",
"resolved": "https://registry.npmjs.org/winston/-/winston-2.4.4.tgz",
"integrity": "sha512-NBo2Pepn4hK4V01UfcWcDlmiVTs7VTB1h7bgnB0rgP146bYhMxX0ypCz3lBOfNxCO4Zuek7yeT+y/zM1OfMw4Q==",
"requires": {
"async": "~1.0.0",
"colors": "1.0.x",
"cycle": "1.0.x",
"eyes": "0.1.x",
"isstream": "0.1.x",
"stack-trace": "0.0.x"
},
"dependencies": {
"async": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/async/-/async-1.0.0.tgz",
"integrity": "sha1-+PwEyjoTeErenhZBr5hXjPvWR6k="
}
}
},
"wkx": {
"version": "0.4.8",
"resolved": "https://registry.npmjs.org/wkx/-/wkx-0.4.8.tgz",

View File

@@ -17,9 +17,7 @@
"checkup-notify": "cd app/npmScripts && node npmCheckUpNotify.js",
"test-search": "cd test && node searchTest.js",
"test-olx-scraper": "cd test && node olxScrapeTest.js",
"test-saljic-scraper": "cd test && node saljicScrapeTest.js",
"test-rental-scraper": "cd test && node rentalScrapeTest.js",
"test-scraper-api": "cd test && node scraperAPITest.js"
"test-rental-scraper": "cd test && node rentalScrapeTest.js"
},
"repository": {
"type": "git",
@@ -52,7 +50,6 @@
"pg": "^7.10.0",
"prettier": "^1.19.1",
"react-step-wizard": "^5.1.0",
"scraperapi-sdk": "^1.0.3",
"sequelize": "^5.18.4",
"sequelize-cli": "^5.5.0"
},

View File

@@ -9,7 +9,7 @@ if (urlToScrape) {
(async () => {
const data = await crawler.scrapeAd(urlToScrape);
console.log("Scraped data:", data);
console.log(data);
})();
} else {
console.log("No URL to scrape. Use like this : ");

View File

@@ -1,17 +0,0 @@
"use strict";
const saljicCrawler = require("../app/crawler/specificCrawlers/saljic");
const urlToScrape = process.argv[2] || undefined;
if (urlToScrape) {
const crawler = new saljicCrawler();
(async () => {
const data = await crawler.scrapeAd(urlToScrape);
console.log("Scraped data:", data);
})();
} else {
console.log("No URL to scrape. Use like this : ");
console.log("npm run test-saljic-scraper -- URL_TO_SCRAPE");
}

View File

@@ -1,19 +0,0 @@
const { SCRAPER_API_KEY } = require("../app/config/appConfig");
const scraperapiClient = require("scraperapi-sdk")(SCRAPER_API_KEY);
async function logUsedConcurrentReq() {
try {
const response = await scraperapiClient.account();
const dateOfLog = new Date().toLocaleString();
console.log(
dateOfLog,
" Number of concurrent requests: ",
response.concurrentRequests
);
} catch (err) {
console.log(err.message);
}
}
setInterval(logUsedConcurrentReq, 1000);