Files
old-domene-svedska/crawler/crawler.js
2017-12-14 02:45:51 +01:00

59 lines
1.9 KiB
JavaScript

var config = require('./config');
const links = require('./helper/links');
const database = require('./helper/database');
var MongoClient = require ('mongodb').MongoClient;
var ObjectID = require ('mongodb').ObjectID;
var fs = require('fs');
MongoClient.connect(config.databaseURL).then(mongoDatabase => {
let db = mongoDatabase;
db.executeDbAdminCommand( { setParameter: true, textSearchEnabled : true});
db.collection('expired_list').createIndex({domainName: 'text'}, {unique: true});
db.collection('yesterday').drop();
db.collection('today').rename('yesterday');
db.createCollection('today');
//Get word list into memory
fs.readFile(config.wordList, 'utf8', (err,data)=>{
if (err){
console.log(err);
config.words=[];
}else{
config.words = data.split('\n');
config.words = config.words.map(word=>{
return word.toLowerCase();
});
let getDomainListPromises = [];
config.domainList.map(url=>{
getDomainListPromises.push(links.getDomainList(url).then(res=>{
return database.insertTodayDomains(db,res);
}).then(()=>{
return database.getYesterdayDomains(db);
}).then(result=>{
return database.insertExpired(db,result);
}).catch(e=>{
console.log("Err : " +e);
}));
});
Promise.all(getDomainListPromises).then(()=>{
database.getExpiredDomains(db).then(result=>{
return links.checkExpiredDomains(db,result);
}).then(()=>{
console.log("Done - closing");
db.close();
});
});
}
});
}).catch(reason=>{
console.log("Error : " + reason);
});