84 lines
2.2 KiB
JavaScript
84 lines
2.2 KiB
JavaScript
"use strict";
|
|
|
|
const fetch = require("node-fetch");
|
|
const cheerio = require("cheerio");
|
|
const moment = require("moment-timezone");
|
|
|
|
const {
|
|
AD_TYPE,
|
|
AD_CATEGORY,
|
|
AD_AGENCY,
|
|
AD_STATUS,
|
|
CRAWLER_AD_TYPE,
|
|
FURNISHING_TYPE,
|
|
HEATING_TYPE
|
|
} = require("../../common/enums");
|
|
|
|
const {
|
|
PRINT_CRAWLER_DEBUG,
|
|
DEFAULT_TIMEZONE
|
|
} = require("../../config/appConfig");
|
|
const { SALJIC_FORCE_CRAWL } = require("../specificConfigs/saljic");
|
|
|
|
const SALJIC_ENUMS = {
|
|
SALJIC_AD_TYPE: {
|
|
[CRAWLER_AD_TYPE.ALL]: "&input_vrsta=",
|
|
[CRAWLER_AD_TYPE.ONLY_SELL]: "&input_vrsta=1",
|
|
[CRAWLER_AD_TYPE.ONLY_RENT]: "&input_vrsta=2"
|
|
},
|
|
SALJIC_AD_CATEGORY: {
|
|
[AD_CATEGORY.ALL.id]: "&input_kategorija=",
|
|
[AD_CATEGORY.FLAT.id]: "&input_kategorija=15",
|
|
[AD_CATEGORY.HOUSE.id]: "&input_kategorija=9",
|
|
[AD_CATEGORY.LAND.id]: "&input_kategorija=5", //3 and 4 also gradjevinsko
|
|
[AD_CATEGORY.OFFICE.id]: "&input_kategorija=8",
|
|
[AD_CATEGORY.APARTMENT.id]: "&input_kategorija=1",
|
|
[AD_CATEGORY.GARAGE.id]: "&input_kategorija=2"
|
|
//[AD_CATEGORY.COTTAGE.id]: ""
|
|
}
|
|
};
|
|
|
|
class SaljicCrawler {
|
|
constructor(
|
|
savers = [],
|
|
crawlerAdTypes = CRAWLER_AD_TYPE.ALL,
|
|
crawlerAdCategories = [AD_CATEGORY.FLAT, AD_CATEGORY.HOUSE],
|
|
maxPages = 5000,
|
|
maxResultsPerPage = 5000,
|
|
ignoredUsernames = [],
|
|
delayBetweenPages = 1000
|
|
) {
|
|
this.savers = savers;
|
|
this.baseUrl = "https://www.saljicnekretnine.ba/v2/nekretnine_search";
|
|
this.crawlerAdTypes = crawlerAdTypes;
|
|
this.crawlerAdCategories = crawlerAdCategories;
|
|
this.maxResultsPerPage = maxResultsPerPage;
|
|
this.delayBetweenPages = delayBetweenPages;
|
|
}
|
|
|
|
async crawl() {
|
|
//
|
|
console.log("Saljic URL: ", this.baseUrl);
|
|
}
|
|
|
|
//======= HELPER FUNCTIONS =============
|
|
|
|
async sleep(ms) {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
async saveCrawledResults(results) {
|
|
const savers = this.savers;
|
|
|
|
// for (const saver of savers) {
|
|
// await saver.save(results);
|
|
// }
|
|
|
|
//For now, we use only Postgres saver, so ...
|
|
return savers[0].save(results);
|
|
//so that we can use some sequelize options and information when data is inserted
|
|
}
|
|
}
|
|
|
|
module.exports = SaljicCrawler;
|