new code fix
This commit is contained in:
@@ -1248,7 +1248,7 @@ agag.se 2018-01-15
|
||||
agagil.se 2018-02-13
|
||||
agal.se 2018-01-15
|
||||
agaler.se 2018-02-13
|
||||
agan.se 2017-12-12
|
||||
agan.se 2017-12-13
|
||||
agapolitikern.se 2018-01-08
|
||||
agarum.se 2017-12-30
|
||||
agathared.se 2017-12-29
|
||||
|
||||
63014
crawler/bardate_domains_13_12.txt
Normal file
63014
crawler/bardate_domains_13_12.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -6,7 +6,6 @@ config.domainList = [
|
||||
'https://www.iis.se/data/bardate_domains.txt'];
|
||||
|
||||
|
||||
|
||||
//config.domainList = ['/home/bilal/Saburly/domene-svedska/crawler/bardate_domains_11_12.txt'];
|
||||
|
||||
config.seDomainCheck = 'http://free.iis.se/free?q=';
|
||||
|
||||
@@ -30,32 +30,26 @@ MongoClient.connect(config.databaseURL).then(mongoDatabase => {
|
||||
let getDomainListPromises = [];
|
||||
|
||||
config.domainList.map(url=>{
|
||||
let p = links.getDomainList(url).then(res=>{
|
||||
console.log("One promise done");
|
||||
let p2 = database.insertTodayDomains(db,res).then(()=>{
|
||||
let p3 = database.getYesterdayDomains(db).then((result)=>{
|
||||
getDomainListPromises.push(links.getDomainList(url).then(res=>{
|
||||
return database.insertTodayDomains(db,res);
|
||||
}).then(()=>{
|
||||
return database.getYesterdayDomains(db);
|
||||
}).then(result=>{
|
||||
return database.insertExpired(db,result);
|
||||
}).catch(e=>{
|
||||
console.log("Err : " +e);
|
||||
}));
|
||||
|
||||
result.map((domain)=>{
|
||||
getDomainListPromises.push(database.cleanExpired(db,domain));
|
||||
});
|
||||
|
||||
}).catch((e)=>{
|
||||
console.log(e);
|
||||
});
|
||||
getDomainListPromises.push(p3);
|
||||
});
|
||||
getDomainListPromises.push(p2);
|
||||
});
|
||||
getDomainListPromises.push(p);
|
||||
});
|
||||
|
||||
Promise.all(getDomainListPromises).then(()=>{
|
||||
console.log("All promises done");
|
||||
database.getExpiredDomains(db).then((result)=>{
|
||||
links.checkExpiredDomains(db,result).then(()=>{
|
||||
database.getExpiredDomains(db).then(result=>{
|
||||
return links.checkExpiredDomains(db,result);
|
||||
}).then(()=>{
|
||||
console.log("Done - closing");
|
||||
db.close();
|
||||
});
|
||||
});
|
||||
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
@@ -17,22 +17,32 @@ module.exports = {
|
||||
return getArrayFromDatabase(db, 'expired_list');
|
||||
},
|
||||
|
||||
cleanExpired : function (db, domain){
|
||||
return new Promise((resolve,reject)=>{
|
||||
db.collection('today').findOne({domainName:domain.domainName}, (err,result)=>{
|
||||
if (err){
|
||||
reject(err);
|
||||
}else{
|
||||
if ((result===null) &&(datetime.create().format('Y-m-d')===domain.expirationDate)){
|
||||
db.collection('expired_list').insert(domain);
|
||||
insertExpired : function (db, domains){
|
||||
return new Promise((resolve, reject)=>{
|
||||
let domainsForInsertion = [];
|
||||
let waitingPromises = [];
|
||||
|
||||
domains.map(domain=>{
|
||||
|
||||
waitingPromises.push(checkDate(db,domain).then(res=>{
|
||||
if (res!==null){
|
||||
domainsForInsertion.push(res);
|
||||
}
|
||||
}));
|
||||
|
||||
});
|
||||
|
||||
Promise.all(waitingPromises).then(()=>{
|
||||
if (domainsForInsertion.length>0){
|
||||
db.collection('expired_list').insert(domainsForInsertion, (err,res)=>{
|
||||
resolve();
|
||||
});
|
||||
}else{
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
},
|
||||
};
|
||||
|
||||
const getArrayFromDatabase = function(db, collection){
|
||||
@@ -46,3 +56,16 @@ const getArrayFromDatabase = function(db, collection){
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const checkDate = function(db, domain){
|
||||
return new Promise((resolve,reject)=>{
|
||||
db.collection('today').findOne({domainName: domain.domainName}, (err,result)=>{
|
||||
if ((!err) && (result===null) &&(datetime.create().format('Y-m-d')===domain.expirationDate)){
|
||||
//domainsForInsertion.push(domain);
|
||||
resolve(domain);
|
||||
}else{
|
||||
resolve(null);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -7,18 +7,17 @@ var http = require('http');
|
||||
module.exports = {
|
||||
getDomainList : function(url){
|
||||
return new Promise((resolve, reject)=>{
|
||||
|
||||
getRawDomainList(url).then(raw=>{
|
||||
processDomains(raw).then(result=>{
|
||||
applyFilter(result).then(result=>{
|
||||
resolve(result);
|
||||
})
|
||||
});
|
||||
resolve(applyFilter(processDomains(raw)));
|
||||
});
|
||||
});
|
||||
},
|
||||
|
||||
checkExpiredDomains : function(db, domains){
|
||||
return new Promise((resolve,reject)=>{
|
||||
let waitingPromises = [];
|
||||
let domainsForRemoval = [];
|
||||
domains.map(domain=>{
|
||||
let checkLink = '';
|
||||
switch(domain.tld){
|
||||
@@ -31,7 +30,39 @@ module.exports = {
|
||||
}
|
||||
|
||||
let fullName = domain.domainName + '.' + domain.tld;
|
||||
http.get(checkLink+punycode.toASCII(fullName), res => {
|
||||
waitingPromises.push(removeOccupiedDomain(db, checkLink+punycode.toASCII(fullName),domain));
|
||||
});
|
||||
|
||||
Promise.all(waitingPromises).then(()=>{
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
var isDomainFree = function (db, url, domain){
|
||||
return new Promise((resolve,reject)=>{
|
||||
http.get(url, res => {
|
||||
res.setEncoding("utf8");
|
||||
let body = "";
|
||||
res.on("data", data => {
|
||||
body += data;
|
||||
});
|
||||
res.on("end", () => {
|
||||
let status = body.split(' ')[0];
|
||||
if (status === 'free'){
|
||||
resolve(true);
|
||||
}else{
|
||||
resolve(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
var removeOccupiedDomain = function (db, url,domain){
|
||||
return new Promise((resolve,reject)=>{
|
||||
http.get(url, res => {
|
||||
res.setEncoding("utf8");
|
||||
let body = "";
|
||||
res.on("data", data => {
|
||||
@@ -40,21 +71,15 @@ module.exports = {
|
||||
res.on("end", () => {
|
||||
let status = body.split(' ')[0];
|
||||
if (status !== 'free'){
|
||||
db.collection('expired_list').remove({domainName:domain.domainName});
|
||||
db.collection('expired_list').remove({domainName:domain.domainName, tld: domain.tld});
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
resolve();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
var applyFilter = function (domains){
|
||||
return new Promise((resolve,reject)=>{
|
||||
//get domain names that only match whole words
|
||||
let result = [];
|
||||
domains.map(domain=>{
|
||||
let index = config.words.indexOf(domain.domainName);
|
||||
@@ -62,12 +87,10 @@ var applyFilter = function (domains){
|
||||
result.push(domain);
|
||||
}
|
||||
});
|
||||
resolve(result);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
var processDomains = function(raw){
|
||||
return new Promise((resolve,reject)=>{
|
||||
let result = [];
|
||||
raw.split('\n').map(domain=>{
|
||||
let unicodeDomain = punycode.toUnicode(domain);
|
||||
@@ -83,8 +106,7 @@ var processDomains = function(raw){
|
||||
}
|
||||
}
|
||||
});
|
||||
resolve(result);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
var getRawDomainList = function (url) {
|
||||
|
||||
Reference in New Issue
Block a user