Compare commits

...

10 Commits

Author SHA1 Message Date
Naida Vatric
c6f0e039a5 Added price history log. 2020-01-21 23:12:04 +01:00
Naida Vatric
8d3f001678 WIP Added model, migration and bulk upsert fnc. 2020-01-21 16:28:47 +01:00
Naida Vatric
42eddb3aa5 WIP Bulk create not working. 2020-01-21 01:19:35 +01:00
Naida Vatric
0a181f742f WIP Added model and migration for new table priceHistory. 2020-01-20 23:09:02 +01:00
Naida Vatric
259799144e Merge branch 'rental-crawler-fix' into 'master'
Rental crawler fix

See merge request saburly/marketalarm/web!80
2020-01-06 23:12:52 +00:00
Naida Vatric
bc73d4159d Merge branch 'master' into 'rental-crawler-fix'
# Conflicts:
#   .gitignore
2020-01-06 23:12:40 +00:00
Naida Vatric
37ad32fe76 Merge branch 'edit-location-start' into 'master'
Edit location start

See merge request saburly/marketalarm/web!79
2020-01-06 23:10:16 +00:00
Naida Vatric
94875a0fa3 Merge branch 'add-currency-to-price-filters' into 'master'
Add currency to price filters

See merge request saburly/marketalarm/web!78
2020-01-06 23:09:40 +00:00
Naida Vatric
0c2d218d29 Changed floor numbers and basement-attic tag. 2020-01-02 00:10:31 +01:00
Naida Vatric
fed2dc00dc Changed number of rooms. 2019-12-29 23:42:39 +01:00
10 changed files with 210 additions and 11 deletions

2
.gitignore vendored
View File

@@ -2,4 +2,4 @@ node_modules/
.env
.idea/
.eslintrc
.vscode/
.vscode/

View File

@@ -1,6 +1,7 @@
const moment = require("moment");
const { bulkUpsertRealEstates } = require("../../helpers/db/realEstate");
const { bulkUpsertPriceHistory } = require("../../helpers/db/priceHistory");
class PostgresSaver {
connect() {
@@ -11,6 +12,21 @@ class PostgresSaver {
async save(results) {
const savedRecords = await bulkUpsertRealEstates(results);
//Extruding data for price history table
const resultPrices = savedRecords.map(realEstate => {
//Null values canot be recognized by ignore duplicates in sequalize
//Value price = 0 indicates 'cijena na upit'
const priceTmp =
realEstate.dataValues.price === null ? 0 : realEstate.dataValues.price;
return {
realEstateId: realEstate.dataValues.id,
price: priceTmp,
createdAt: realEstate.dataValues.createdAt,
updatedAt: realEstate.dataValues.updatedAt
};
});
const savedPrices = await bulkUpsertPriceHistory(resultPrices);
if (Array.isArray(savedRecords)) {
const newRealEstates = [];

View File

@@ -312,7 +312,7 @@ class RentalCrawler {
let numberOfRooms =
parseInt(extractedData["re_realEstates_roomsNO"]) +
parseInt(extractedData["re_realEstates_bedroomNO"]) || null,
parseInt(extractedData["re_realEstates_bedNO"]) || null,
numberOfFloors =
parseInt(extractedData["re_realEstates_floorsNO"]) ||
this.getNumberOfFloorsFromFloorId(extractedData["re_floorNO_id"]),
@@ -352,7 +352,9 @@ class RentalCrawler {
realEstatePropertiesFromInfrastructure.phoneConnection,
cableTV = realEstatePropertiesFromInfrastructure.cableTV,
internet = realEstatePropertiesFromInfrastructure.internet,
basementAttic = realEstatePropertiesFromSpaces.basementAttic,
basementAttic =
realEstatePropertiesFromSpaces.basementAttic ||
this.checkBasemAtticFromFloors(extractedData["re_floorNO_id"]),
storeRoom = realEstatePropertiesFromSpaces.storeRoom,
videoSurveillance =
realEstatePropertiesFromDescriptions.videoSurveillance ||
@@ -397,9 +399,7 @@ class RentalCrawler {
);
if (!publishedDateMoment.isValid()) {
throw {
message: `Invalid published date : ${
extractedData["re_realEstates_inserted"]
}`
message: `Invalid published date : ${extractedData["re_realEstates_inserted"]}`
};
}
@@ -410,9 +410,7 @@ class RentalCrawler {
);
if (!renewedDateMoment.isValid()) {
throw {
message: `Invalid renewed date : ${
extractedData["re_realEstates_edited"]
}`
message: `Invalid renewed date : ${extractedData["re_realEstates_edited"]}`
};
}
@@ -782,8 +780,42 @@ class RentalCrawler {
if (floorIds.length === 0) {
return null;
}
let noOfFloors = floorIds.length;
// Floors of 'suteren', 'podrum', 'tavan' and 'potkrovlje' are not counted
floorIds.forEach(id => {
if (
parseInt(id) === 1 ||
parseInt(id) === 2 ||
parseInt(id) === 12 ||
parseInt(id) === 14
) {
noOfFloors--;
}
});
return noOfFloors;
}
return floorIds.length;
checkBasemAtticFromFloors(floorsIdText) {
// floorIdText can be array of numbers, separated by comma or number
const floorIds = floorsIdText.split(",");
let check = false;
if (floorIds.length === 0) {
check = false;
}
//If floors 'suteren', 'podrum', 'tavan' and 'potkrovlje' exists then tag for basement-attic is true
floorIds.forEach(id => {
if (
parseInt(id) === 1 ||
parseInt(id) === 2 ||
parseInt(id) === 12 ||
parseInt(id) === 14
) {
check = true;
}
});
return check;
}
async sleep(ms) {

View File

@@ -0,0 +1,20 @@
"use strict";
const db = require("../../models/index");
const sequelize = require("sequelize");
const bulkUpsertPriceHistory = async priceHistoryData => {
try {
const order = [["realEstateId", "desc"]];
return await db.PriceHistory.bulkCreate(priceHistoryData, {
order,
ignoreDuplicates: true
});
} catch (e) {
console.log("Error bulk upserting priceHistory : ", e);
}
};
module.exports = {
bulkUpsertPriceHistory
};

View File

@@ -0,0 +1,42 @@
"use strict";
module.exports = {
up: (queryInterface, Sequelize) => {
const tableFields = {
id: {
type: Sequelize.BIGINT,
autoIncrement: true,
allowNull: false,
primaryKey: true
},
realEstateId: {
type: Sequelize.BIGINT,
allowNull: false,
unique: "uniquePriceRealEstate",
references: {
model: "RealEstates",
key: "id"
},
onUpdate: "CASCADE",
onDelete: "SET NULL"
},
price: {
type: Sequelize.REAL,
unique: "uniquePriceRealEstate"
},
createdAt: {
type: Sequelize.DATE,
defaultValue: Sequelize.literal("NOW()")
},
updatedAt: {
type: Sequelize.DATE,
defaultValue: Sequelize.literal("NOW()")
}
};
return queryInterface.createTable("PriceHistory", tableFields);
},
down: queryInterface => {
return queryInterface.dropTable("PriceHistory", {});
}
};

View File

@@ -0,0 +1,10 @@
"use strict";
module.exports = {
up: (queryInterface, Sequelize) =>
queryInterface.addConstraint("PriceHistory", ["realEstateId", "price"], {
type: "unique",
name: "uniquePriceRealEstate"
}),
down: queryInterface =>
queryInterface.removeConstraint("PriceHistory", "uniquePriceRealEstate")
};

View File

@@ -0,0 +1,44 @@
"use strict";
module.exports = (sequalize, DataTypes) => {
const PriceHistory = sequalize.define(
"PriceHistory",
{
id: {
type: DataTypes.BIGINT,
autoIncrement: true,
primaryKey: true,
allowNull: false
},
realEstateId: {
type: DataTypes.BIGINT,
allowNull: false,
unique: "uniquePriceRealEstate",
references: {
model: "RealEstates",
key: "id"
},
onUpdate: "CASCADE",
onDelete: "SET NULL"
},
price: {
type: DataTypes.REAL,
unique: "uniquePriceRealEstate"
}
},
{
freezeTableName: true
}
);
PriceHistory.associate = models => {
PriceHistory.hasMany(models.RealEstate, {
foreignKey: "id",
sourceKey: "realEstateId",
targetKey: "id",
as: "realEstates"
});
};
return PriceHistory;
};

View File

@@ -0,0 +1,34 @@
"use strict";
const { RealEstate } = require("../models");
module.exports = {
async up(queryInterface, Sequelize) {
//Reading initial data from RealEstate table in db
const realEstateInitialData = await RealEstate.findAll();
//Extruding data for table PriceHistory
const priceHistoryInitialData = realEstateInitialData.map(realEstate => {
//Null values canot be recognized by ignore duplicates in sequalize
//Value price = 0 indicates 'cijena na upit'
const priceTmp =
realEstate.dataValues.price === null ? 0 : realEstate.dataValues.price;
return {
realEstateId: realEstate.dataValues.id,
price: priceTmp,
createdAt: realEstate.dataValues.createdAt,
updatedAt: realEstate.dataValues.updatedAt
};
});
return queryInterface.bulkInsert(
"PriceHistory",
priceHistoryInitialData,
{}
);
},
async down(queryInterface, Sequelize) {
return queryInterface.bulkDelete("PriceHistory", null, {});
}
};

View File

@@ -8,6 +8,7 @@
"start": "node ./index.js",
"start-mon": "nodemon ./index.js",
"migrate": "cd app && npx sequelize db:migrate",
"seed": "cd app && npx sequelize db:seed:all",
"setup": "docker build -t marketalerts . && docker run -e POSTGRES_USER=docker -e POSTGRES_PASSWORD=docker -e POSTGRES_DB=marketalerts --name pg_marketalerts -d -p 5432:5432 marketalerts && sleep 10 && npm run migrate",
"docker-start": "docker start pg_marketalerts",
"docker-stop": "docker stop pg_marketalerts",

View File

@@ -13,5 +13,5 @@ if (urlToScrape) {
})();
} else {
console.log("No URL to scrape. Use like this : ");
console.log("npm run test-olx-scraper -- URL_TO_SCRAPE");
console.log("npm run test-rental-scraper -- URL_TO_SCRAPE");
}