Files
old-domene-svedska/crawler/crawler.js
2017-12-11 18:22:06 +01:00

88 lines
4.0 KiB
JavaScript

var config = require('./config');
const links = require('./helper/links');
var MongoClient = require ('mongodb').MongoClient;
var ObjectID = require ('mongodb').ObjectID;
var fs = require('fs');
var datetime = require('node-datetime');
var http = require('http');
const punycode = require('punycode');
MongoClient.connect(config.databaseURL).then(database => {
let db = database;
db.executeDbAdminCommand( { setParameter: true, textSearchEnabled : true});
db.collection('expired_list').createIndex({domainName: 'text'}, {unique: true});
db.collection('yesterday').drop();
db.collection('today').rename('yesterday');
db.createCollection('today');
//Get word list into memory
fs.readFile(config.wordList, 'utf8', (err,data)=>{
if (err){
console.log(err);
config.words=[];
}else{
config.words = data.split('\n');
config.words = config.words.map(word=>{
return word.toLowerCase();
});
config.domainList.map(url=>{
//get domain list from url
links.getDomainList(url, (res)=>{
db.collection('today').insert(res,()=>{
//insertion done, compare domains with yesterday
db.collection('yesterday').find({}).toArray((err,result)=>{
if (err){
console.log("Error : " + err);
}else{
result.map((domain)=>{
db.collection('today').findOne({domainName:domain.domainName}, (err,result)=>{
if (result===null){
if (datetime.create().format('Y-m-d')===domain.expirationDate){
db.collection('expired_list').insert(domain);
}
}
});
});
db.collection('expired_list').find({}).toArray((err,result)=>{
result.map(domain=>{
let checkLink = '';
switch(domain.tld){
case 'se':
checkLink = config.seDomainCheck;
break;
case 'nu':
checkLink = config.nuDomainCheck;
break;
}
let fullName = domain.domainName + '.' + domain.tld;
http.get(checkLink+punycode.toASCII(fullName), res => {
res.setEncoding("utf8");
let body = "";
res.on("data", data => {
body += data;
});
res.on("end", () => {
let status = body.split(' ')[0];
if (status !== 'free'){
db.collection('expired_list').remove({domainName:domain.domainName});
}
});
});
});
process.exit(0);
});
}
});
});
});
});
}
});
}).catch(reason=>{
console.log("Error : " + reason);
});