Files
old-domene-svedska/crawler/crawler.js
2017-12-08 10:57:47 +01:00

36 lines
1.1 KiB
JavaScript

var config = require('./config');
const links = require('./helper/links');
var MongoClient = require ('mongodb').MongoClient;
var ObjectID = require ('mongodb').ObjectID;
var fs = require('fs');
MongoClient.connect(config.databaseURL).then(database => {
let db = database;
//db.collection ('yesterday').createIndex ({domainName: 'text'});
//Get word list into memory
fs.readFile(config.wordList, 'utf8', (err,data)=>{
if (err){
console.log(err);
config.words=[];
}else{
config.words = data.split('\n');
let tmpWords = config.words.map((word,index)=>{
return word.toLowerCase();
});
config.words = tmpWords;
config.domainList.map(url=>{
//get domain list from url
links.getDomainList(url, (res)=>{
res.map(obj =>{
fs.appendFileSync('izlaz.txt',obj.domainName+'\n',err=>{console.log('er:' + err)})
});
});
});
}
});
}).catch(reason=>{
console.log("Error : " + reason);
});