full filter

This commit is contained in:
GotPPay
2017-12-08 10:57:47 +01:00
parent 100299b327
commit c69b45b632
8 changed files with 184323 additions and 78507 deletions

61700
crawler/bardate_domains.txt Normal file

File diff suppressed because it is too large Load Diff

46129
crawler/bardate_domains_nu.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +1,18 @@
var config = {}; var config = {};
/*
config.domainList = [ config.domainList = [
'https://www.iis.se/data/bardate_domains_nu.txt', 'https://www.iis.se/data/bardate_domains_nu.txt',
'https://www.iis.se/data/bardate_domains.txt']; 'https://www.iis.se/data/bardate_domains.txt'];
*/
config.domainList = ['/home/bilal/Saburly/domene-svedska/crawler/bardate_domains.txt'];
config.wordList = __dirname + '/words.txt'; config.wordList = __dirname + '/words.txt';
config.lettersOnlyRegex = /^[A-Za-z]+$/; config.lettersOnlyRegex = /^[A-Za-z]+$/;
config.swedishLettersOnly = /^[A-Za-zÅåÄäÖöüÜáÁèÈàÀéÉëËíÍÆæøØçÇ]+$/;
config.databaseURL = 'mongodb://localhost:27017/kivi'; config.databaseURL = 'mongodb://localhost:27017/kivi';

View File

@@ -15,17 +15,22 @@ MongoClient.connect(config.databaseURL).then(database => {
config.words=[]; config.words=[];
}else{ }else{
config.words = data.split('\n'); config.words = data.split('\n');
let tmpWords = config.words.map((word,index)=>{
return word.toLowerCase();
});
config.words = tmpWords;
config.domainList.map(url=>{
//get domain list from url
links.getDomainList(url, (res)=>{
res.map(obj =>{
fs.appendFileSync('izlaz.txt',obj.domainName+'\n',err=>{console.log('er:' + err)})
});
});
});
} }
}); });
config.domainList.map(url=>{
//get domain list from url
links.getDomainList(url, (res)=>{
res.map(obj =>{fs.appendFile('izlaz.txt',obj.domainName+'\n',err=>{console.log('er:' + err)})});
});
});
}).catch(reason=>{ }).catch(reason=>{
console.log("Error : " + reason); console.log("Error : " + reason);
}); });

View File

@@ -1,15 +1,19 @@
const config = require('../config'); const config = require('../config');
const https = require("https"); const https = require("https");
const punycode = require('punycode');
var fs = require('fs');
module.exports = { module.exports = {
getDomainList : function(url, callback){ getDomainList : function(url, callback){
getRawDomainList(url,(raw)=>{ getRawDomainList(url,(raw)=>{
let result = []; let result = [];
raw.split('\n').map(domain=>{ raw.split('\n').map(domain=>{
let dot = domain.indexOf('.'); let unicodeDomain = punycode.toUnicode(domain);
let dot = unicodeDomain.indexOf('.');
if (dot !== -1){ if (dot !== -1){
let domainName = domain.substring(0,dot); let domainName = unicodeDomain.substring(0,dot);
if (domainName.match(config.lettersOnlyRegex)){ if (domainName.match(config.swedishLettersOnly)){
//domain name contains only letters //domain name contains only letters
//line in domain list is formatted as follows : [domain name]\t[expiration date] //line in domain list is formatted as follows : [domain name]\t[expiration date]
result.push({domainName: domainName, expirationDate: domain.split('\t')[1]}); result.push({domainName: domainName, expirationDate: domain.split('\t')[1]});
@@ -17,6 +21,7 @@ module.exports = {
} }
}); });
applyFilter(result, callback); applyFilter(result, callback);
console.log("Result Len : " + result.length);
}); });
} }
}; };
@@ -34,14 +39,25 @@ var applyFilter = function (domains, callback){
} }
var getRawDomainList = function (url, callback) { var getRawDomainList = function (url, callback) {
https.get(url, res => { if (url[0]==='/'){
res.setEncoding("utf8"); //it's local file
let body = ""; fs.readFile(url,'utf8',(err,data)=>{
res.on("data", data => { if (err){
body += data; console.log("err : " + err);
}else{
callback(data);
}
}); });
res.on("end", () => { }else{
callback(body); https.get(url, res => {
res.setEncoding("utf8");
let body = "";
res.on("data", data => {
body += data;
});
res.on("end", () => {
callback(body);
});
}); });
}); }
} }

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,6 @@
{ {
"dependencies": { "dependencies": {
"mongodb": "^2.2.33" "mongodb": "^2.2.33",
"punycode": "^2.1.0"
} }
} }

File diff suppressed because it is too large Load Diff