Files
old-web/app/crawler/specificCrawlers/saljic.js
2020-01-29 01:09:53 +01:00

84 lines
2.2 KiB
JavaScript

"use strict";
const fetch = require("node-fetch");
const cheerio = require("cheerio");
const moment = require("moment-timezone");
const {
AD_TYPE,
AD_CATEGORY,
AD_AGENCY,
AD_STATUS,
CRAWLER_AD_TYPE,
FURNISHING_TYPE,
HEATING_TYPE
} = require("../../common/enums");
const {
PRINT_CRAWLER_DEBUG,
DEFAULT_TIMEZONE
} = require("../../config/appConfig");
const { SALJIC_FORCE_CRAWL } = require("../specificConfigs/saljic");
const SALJIC_ENUMS = {
SALJIC_AD_TYPE: {
[CRAWLER_AD_TYPE.ALL]: "&input_vrsta=",
[CRAWLER_AD_TYPE.ONLY_SELL]: "&input_vrsta=1",
[CRAWLER_AD_TYPE.ONLY_RENT]: "&input_vrsta=2"
},
SALJIC_AD_CATEGORY: {
[AD_CATEGORY.ALL.id]: "&input_kategorija=",
[AD_CATEGORY.FLAT.id]: "&input_kategorija=15",
[AD_CATEGORY.HOUSE.id]: "&input_kategorija=9",
[AD_CATEGORY.LAND.id]: "&input_kategorija=5", //3 and 4 also gradjevinsko
[AD_CATEGORY.OFFICE.id]: "&input_kategorija=8",
[AD_CATEGORY.APARTMENT.id]: "&input_kategorija=1",
[AD_CATEGORY.GARAGE.id]: "&input_kategorija=2"
//[AD_CATEGORY.COTTAGE.id]: ""
}
};
class SaljicCrawler {
constructor(
savers = [],
crawlerAdTypes = CRAWLER_AD_TYPE.ALL,
crawlerAdCategories = [AD_CATEGORY.FLAT, AD_CATEGORY.HOUSE],
maxPages = 5000,
maxResultsPerPage = 5000,
ignoredUsernames = [],
delayBetweenPages = 1000
) {
this.savers = savers;
this.baseUrl = "https://www.saljicnekretnine.ba/v2/nekretnine_search";
this.crawlerAdTypes = crawlerAdTypes;
this.crawlerAdCategories = crawlerAdCategories;
this.maxResultsPerPage = maxResultsPerPage;
this.delayBetweenPages = delayBetweenPages;
}
async crawl() {
//
console.log("Saljic URL: ", this.baseUrl);
}
//======= HELPER FUNCTIONS =============
async sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async saveCrawledResults(results) {
const savers = this.savers;
// for (const saver of savers) {
// await saver.save(results);
// }
//For now, we use only Postgres saver, so ...
return savers[0].save(results);
//so that we can use some sequelize options and information when data is inserted
}
}
module.exports = SaljicCrawler;