Files
old-domene-svedska/crawler/crawler.js

65 lines
2.2 KiB
JavaScript
Raw Normal View History

2017-12-07 12:36:41 +01:00
var config = require('./config');
const links = require('./helper/links');
2017-12-12 22:08:02 +01:00
const database = require('./helper/database');
2017-12-07 12:36:41 +01:00
var MongoClient = require ('mongodb').MongoClient;
var ObjectID = require ('mongodb').ObjectID;
var fs = require('fs');
2017-12-12 22:08:02 +01:00
MongoClient.connect(config.databaseURL).then(mongoDatabase => {
let db = mongoDatabase;
db.executeDbAdminCommand( { setParameter: true, textSearchEnabled : true});
db.collection('expired_list').createIndex({domainName: 'text'}, {unique: true});
db.collection('yesterday').drop();
db.collection('today').rename('yesterday');
db.createCollection('today');
2017-12-07 12:36:41 +01:00
//Get word list into memory
fs.readFile(config.wordList, 'utf8', (err,data)=>{
if (err){
console.log(err);
config.words=[];
}else{
2017-12-08 10:57:47 +01:00
config.words = data.split('\n');
config.words = config.words.map(word=>{
2017-12-08 10:57:47 +01:00
return word.toLowerCase();
});
2017-12-13 13:56:47 +01:00
let getDomainListPromises = [];
2017-12-08 10:57:47 +01:00
config.domainList.map(url=>{
2017-12-13 13:56:47 +01:00
let p = links.getDomainList(url).then(res=>{
console.log("One promise done");
let p2 = database.insertTodayDomains(db,res).then(()=>{
let p3 = database.getYesterdayDomains(db).then((result)=>{
2017-12-12 22:08:02 +01:00
result.map((domain)=>{
2017-12-13 13:56:47 +01:00
getDomainListPromises.push(database.cleanExpired(db,domain));
2017-12-12 22:08:02 +01:00
});
}).catch((e)=>{
console.log(e);
});
2017-12-13 13:56:47 +01:00
getDomainListPromises.push(p3);
});
getDomainListPromises.push(p2);
});
getDomainListPromises.push(p);
});
Promise.all(getDomainListPromises).then(()=>{
console.log("All promises done");
database.getExpiredDomains(db).then((result)=>{
links.checkExpiredDomains(db,result).then(()=>{
db.close();
2017-12-08 10:57:47 +01:00
});
2017-12-13 13:56:47 +01:00
});
2017-12-08 10:57:47 +01:00
});
}
2017-12-07 12:36:41 +01:00
});
2017-12-07 12:36:41 +01:00
}).catch(reason=>{
console.log("Error : " + reason);
});