Compare commits
11 Commits
prostor-vi
...
scraper-ap
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ce857ddce9 | ||
|
|
148b2ea863 | ||
|
|
d436d4a37b | ||
|
|
6791a509d0 | ||
|
|
edc6e2bbf7 | ||
|
|
4f230020d7 | ||
|
|
f62a7200c7 | ||
|
|
cff7cc2e9c | ||
|
|
df2a962d0f | ||
|
|
be4508ebea | ||
|
|
81fa3f046d |
@@ -41,6 +41,13 @@ const PROSTOR_LOGIN = {
|
|||||||
PASSWORD: process.env.PROSTOR_LOGIN_PASS
|
PASSWORD: process.env.PROSTOR_LOGIN_PASS
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const USER_AGENT =
|
||||||
|
process.env.USER_AGENT ||
|
||||||
|
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36";
|
||||||
|
|
||||||
|
const USE_SCRAPER_API = process.env.USE_SCRAPER_API || 1; //Default to use
|
||||||
|
const SCRAPER_API_KEY = process.env.SCRAPER_API_KEY || "";
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
APP_PORT,
|
APP_PORT,
|
||||||
APP_URL,
|
APP_URL,
|
||||||
@@ -54,5 +61,8 @@ module.exports = {
|
|||||||
API_MAP_KEY,
|
API_MAP_KEY,
|
||||||
STAGING,
|
STAGING,
|
||||||
CHECK_UP_DAYS,
|
CHECK_UP_DAYS,
|
||||||
PROSTOR_LOGIN
|
PROSTOR_LOGIN,
|
||||||
|
USER_AGENT,
|
||||||
|
USE_SCRAPER_API,
|
||||||
|
SCRAPER_API_KEY
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
const fetch = require("node-fetch");
|
const fetch = require("../../helpers/fetchWrapper");
|
||||||
const cheerio = require("cheerio");
|
const cheerio = require("cheerio");
|
||||||
const Promise = require("bluebird");
|
const Promise = require("bluebird");
|
||||||
const moment = require("moment-timezone");
|
const moment = require("moment-timezone");
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
const fetch = require("node-fetch");
|
const fetch = require("../../helpers/fetchWrapper");
|
||||||
const cheerio = require("cheerio");
|
const cheerio = require("cheerio");
|
||||||
const Promise = require("bluebird");
|
const Promise = require("bluebird");
|
||||||
const moment = require("moment-timezone");
|
const moment = require("moment-timezone");
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
const fetch = require("node-fetch");
|
const fetch = require("../../helpers/fetchWrapper");
|
||||||
const cheerio = require("cheerio");
|
const cheerio = require("cheerio");
|
||||||
const moment = require("moment-timezone");
|
const moment = require("moment-timezone");
|
||||||
const FormData = require("form-data");
|
const FormData = require("form-data");
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
const fetch = require("node-fetch");
|
const fetch = require("../../helpers/fetchWrapper");
|
||||||
const cheerio = require("cheerio");
|
const cheerio = require("cheerio");
|
||||||
const Promise = require("bluebird");
|
const Promise = require("bluebird");
|
||||||
const moment = require("moment-timezone");
|
const moment = require("moment-timezone");
|
||||||
@@ -399,7 +399,9 @@ class RentalCrawler {
|
|||||||
);
|
);
|
||||||
if (!publishedDateMoment.isValid()) {
|
if (!publishedDateMoment.isValid()) {
|
||||||
throw {
|
throw {
|
||||||
message: `Invalid published date : ${extractedData["re_realEstates_inserted"]}`
|
message: `Invalid published date : ${
|
||||||
|
extractedData["re_realEstates_inserted"]
|
||||||
|
}`
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -410,7 +412,9 @@ class RentalCrawler {
|
|||||||
);
|
);
|
||||||
if (!renewedDateMoment.isValid()) {
|
if (!renewedDateMoment.isValid()) {
|
||||||
throw {
|
throw {
|
||||||
message: `Invalid renewed date : ${extractedData["re_realEstates_edited"]}`
|
message: `Invalid renewed date : ${
|
||||||
|
extractedData["re_realEstates_edited"]
|
||||||
|
}`
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
const fetch = require("node-fetch");
|
const fetch = require("../../helpers/fetchWrapper");
|
||||||
const cheerio = require("cheerio");
|
const cheerio = require("cheerio");
|
||||||
const moment = require("moment-timezone");
|
const moment = require("moment-timezone");
|
||||||
|
|
||||||
|
|||||||
@@ -332,10 +332,14 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//When includeIncompleteAds are not defined - null it will consider it true
|
||||||
const order = [["updatedAt", "desc"]];
|
const order = [["updatedAt", "desc"]];
|
||||||
|
|
||||||
return db.RealEstate.findAll({
|
return db.RealEstate.findAll({
|
||||||
where: includeIncompleteAds ? queryIncludeIncomplete : query,
|
where:
|
||||||
|
includeIncompleteAds || includeIncompleteAds == null
|
||||||
|
? queryIncludeIncomplete
|
||||||
|
: query,
|
||||||
limit: maxResults,
|
limit: maxResults,
|
||||||
order
|
order
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -157,7 +157,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
|||||||
} else {
|
} else {
|
||||||
// If real estate dont have defined number of rooms ex. null
|
// If real estate dont have defined number of rooms ex. null
|
||||||
//It returns requests that didn't choose number of rooms - also null
|
//It returns requests that didn't choose number of rooms - also null
|
||||||
//Or ones that picked some values but also picked to includeIncomplete ads
|
//Or ones that picked some values but also picked to includeIncomplete ads (or default)
|
||||||
numberOfRoomsQuery = {
|
numberOfRoomsQuery = {
|
||||||
[Op.or]: [
|
[Op.or]: [
|
||||||
{
|
{
|
||||||
@@ -176,7 +176,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
includeIncompleteAds: {
|
includeIncompleteAds: {
|
||||||
[Op.eq]: true
|
[Op.or]: {
|
||||||
|
[Op.eq]: true,
|
||||||
|
[Op.is]: null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -226,7 +229,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
includeIncompleteAds: {
|
includeIncompleteAds: {
|
||||||
[Op.eq]: true
|
[Op.or]: {
|
||||||
|
[Op.eq]: true,
|
||||||
|
[Op.is]: null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -275,7 +281,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
includeIncompleteAds: {
|
includeIncompleteAds: {
|
||||||
[Op.eq]: true
|
[Op.or]: {
|
||||||
|
[Op.eq]: true,
|
||||||
|
[Op.is]: null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -313,7 +322,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
includeIncompleteAds: {
|
includeIncompleteAds: {
|
||||||
[Op.eq]: true
|
[Op.or]: {
|
||||||
|
[Op.eq]: true,
|
||||||
|
[Op.is]: null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -347,7 +359,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
includeIncompleteAds: {
|
includeIncompleteAds: {
|
||||||
[Op.eq]: true
|
[Op.or]: {
|
||||||
|
[Op.eq]: true,
|
||||||
|
[Op.is]: null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -381,7 +396,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
includeIncompleteAds: {
|
includeIncompleteAds: {
|
||||||
[Op.eq]: true
|
[Op.or]: {
|
||||||
|
[Op.eq]: true,
|
||||||
|
[Op.is]: null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -423,10 +441,13 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
|||||||
[Op.eq]: "ANY"
|
[Op.eq]: "ANY"
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
//Tag to check if incomplete ads are accepted in query
|
//Tag to check if incomplete ads are accepted in query which is default
|
||||||
if (checkForIncompleteWanted) {
|
if (checkForIncompleteWanted) {
|
||||||
query.includeIncompleteAds = {
|
query.includeIncompleteAds = {
|
||||||
[Op.eq]: true
|
[Op.or]: {
|
||||||
|
[Op.eq]: true,
|
||||||
|
[Op.is]: null
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
21
app/helpers/fetchWrapper.js
Normal file
21
app/helpers/fetchWrapper.js
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
const nodeFetch = require("node-fetch");
|
||||||
|
const {
|
||||||
|
USER_AGENT,
|
||||||
|
USE_SCRAPER_API,
|
||||||
|
SCRAPER_API_KEY
|
||||||
|
} = require("../config/appConfig");
|
||||||
|
|
||||||
|
const fetch = async (url, options = {}) => {
|
||||||
|
const newOptions = Object.assign({}, options);
|
||||||
|
if (!newOptions["headers"]) {
|
||||||
|
newOptions["headers"] = {};
|
||||||
|
}
|
||||||
|
newOptions["headers"]["User-Agent"] = USER_AGENT;
|
||||||
|
const urlAdaptedForScraping = USE_SCRAPER_API
|
||||||
|
? `http://api.scraperapi.com/?api_key=${SCRAPER_API_KEY}&url=${url}`
|
||||||
|
: url;
|
||||||
|
|
||||||
|
return nodeFetch(urlAdaptedForScraping, newOptions);
|
||||||
|
};
|
||||||
|
|
||||||
|
module.exports = fetch;
|
||||||
@@ -15,15 +15,7 @@ module.exports = (sequelize, DataTypes) => {
|
|||||||
allowNull: false,
|
allowNull: false,
|
||||||
defaultValue: {
|
defaultValue: {
|
||||||
type: "Polygon",
|
type: "Polygon",
|
||||||
coordinates: [
|
coordinates: [[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]],
|
||||||
[
|
|
||||||
[0, 0],
|
|
||||||
[0, 0],
|
|
||||||
[0, 0],
|
|
||||||
[0, 0],
|
|
||||||
[0, 0]
|
|
||||||
]
|
|
||||||
],
|
|
||||||
crs: { type: "name", properties: { name: "EPSG:4326" } }
|
crs: { type: "name", properties: { name: "EPSG:4326" } }
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -131,7 +131,7 @@ const notifyRequestsWithDailyOption = async () => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const checkUpNotify = async () => {
|
const checkUpNotify = async () => {
|
||||||
/* const searchRequestsForCheckUp = await findAllRequestsForCheckUp();
|
/* const searchRequestsForCheckUp = await findAllRequestsForCheckUp();
|
||||||
|
|
||||||
const asyncSendEmailActions = [];
|
const asyncSendEmailActions = [];
|
||||||
|
|
||||||
@@ -144,7 +144,7 @@ const checkUpNotify = async () => {
|
|||||||
asyncSendEmailActions.push(sendEmailPromise);
|
asyncSendEmailActions.push(sendEmailPromise);
|
||||||
sendEmailPromise.catch(err => console.log("[Email Sending Failed]", err));
|
sendEmailPromise.catch(err => console.log("[Email Sending Failed]", err));
|
||||||
}
|
}
|
||||||
await Promise.all(asyncSendEmailActions); */
|
await Promise.all(asyncSendEmailActions); */
|
||||||
};
|
};
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|||||||
@@ -61,9 +61,8 @@
|
|||||||
<p class="distinguished">
|
<p class="distinguished">
|
||||||
<label class="checkbox-label">
|
<label class="checkbox-label">
|
||||||
<input type="checkbox" class="filled-in" name="includeIncompleteAds"
|
<input type="checkbox" class="filled-in" name="includeIncompleteAds"
|
||||||
<% if (includeIncompleteAds) { %>
|
|
||||||
checked
|
checked
|
||||||
<% } %>>
|
>
|
||||||
<span>Uključi i oglase bez potpunih informacija</span>
|
<span>Uključi i oglase bez potpunih informacija</span>
|
||||||
</label>
|
</label>
|
||||||
</p>
|
</p>
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ APP_BASE_URL=base url for the app
|
|||||||
|
|
||||||
ENVIRONMENT=Variable to denote development, staging and production
|
ENVIRONMENT=Variable to denote development, staging and production
|
||||||
|
|
||||||
|
USER_AGENT=User agent header to send in fetch requests
|
||||||
|
|
||||||
MAX_REAL_ESTATES_IN_EMAIL=Max number of real estates that will be shown in email, others will be truncated and URL with full list will be shwon
|
MAX_REAL_ESTATES_IN_EMAIL=Max number of real estates that will be shown in email, others will be truncated and URL with full list will be shwon
|
||||||
MAX_REAL_ESTATES_IN_FIRST_EMAIL=Max number of real estates that will be shown in first (welcome) email
|
MAX_REAL_ESTATES_IN_FIRST_EMAIL=Max number of real estates that will be shown in first (welcome) email
|
||||||
@@ -21,6 +22,10 @@ GA_ID=Google Analytics ID
|
|||||||
#=============== GOOGLE MAPS =============#
|
#=============== GOOGLE MAPS =============#
|
||||||
API_MAP_KEY=(your-key-here)
|
API_MAP_KEY=(your-key-here)
|
||||||
|
|
||||||
|
#=============== SCRAPER API SUPORT =============#
|
||||||
|
USE_SCRAPER_API= To turn it on (1) or off (0)
|
||||||
|
SCRAPER_API_KEY= Key for Scraper api
|
||||||
|
|
||||||
#=============== AWS SDK EMAIL SETTINGS =======#
|
#=============== AWS SDK EMAIL SETTINGS =======#
|
||||||
AWS_KEY_ID=(your-key-here)
|
AWS_KEY_ID=(your-key-here)
|
||||||
AWS_SECRET_ACCESS_KEY=(your-key-here)
|
AWS_SECRET_ACCESS_KEY=(your-key-here)
|
||||||
@@ -69,4 +74,4 @@ AKTIDO_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without
|
|||||||
SALJIC_MAX_RESULTS_PER_PAGE=For Saljic crawler, this represents how many ads are crawled at once
|
SALJIC_MAX_RESULTS_PER_PAGE=For Saljic crawler, this represents how many ads are crawled at once
|
||||||
SALJIC_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values
|
SALJIC_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values
|
||||||
SALJIC_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
|
SALJIC_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
|
||||||
SALJIC_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
|
SALJIC_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
|
||||||
|
|||||||
Reference in New Issue
Block a user