new code fix
This commit is contained in:
@@ -1248,7 +1248,7 @@ agag.se 2018-01-15
|
|||||||
agagil.se 2018-02-13
|
agagil.se 2018-02-13
|
||||||
agal.se 2018-01-15
|
agal.se 2018-01-15
|
||||||
agaler.se 2018-02-13
|
agaler.se 2018-02-13
|
||||||
agan.se 2017-12-12
|
agan.se 2017-12-13
|
||||||
agapolitikern.se 2018-01-08
|
agapolitikern.se 2018-01-08
|
||||||
agarum.se 2017-12-30
|
agarum.se 2017-12-30
|
||||||
agathared.se 2017-12-29
|
agathared.se 2017-12-29
|
||||||
|
|||||||
63014
crawler/bardate_domains_13_12.txt
Normal file
63014
crawler/bardate_domains_13_12.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -6,7 +6,6 @@ config.domainList = [
|
|||||||
'https://www.iis.se/data/bardate_domains.txt'];
|
'https://www.iis.se/data/bardate_domains.txt'];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//config.domainList = ['/home/bilal/Saburly/domene-svedska/crawler/bardate_domains_11_12.txt'];
|
//config.domainList = ['/home/bilal/Saburly/domene-svedska/crawler/bardate_domains_11_12.txt'];
|
||||||
|
|
||||||
config.seDomainCheck = 'http://free.iis.se/free?q=';
|
config.seDomainCheck = 'http://free.iis.se/free?q=';
|
||||||
|
|||||||
@@ -30,32 +30,26 @@ MongoClient.connect(config.databaseURL).then(mongoDatabase => {
|
|||||||
let getDomainListPromises = [];
|
let getDomainListPromises = [];
|
||||||
|
|
||||||
config.domainList.map(url=>{
|
config.domainList.map(url=>{
|
||||||
let p = links.getDomainList(url).then(res=>{
|
getDomainListPromises.push(links.getDomainList(url).then(res=>{
|
||||||
console.log("One promise done");
|
return database.insertTodayDomains(db,res);
|
||||||
let p2 = database.insertTodayDomains(db,res).then(()=>{
|
}).then(()=>{
|
||||||
let p3 = database.getYesterdayDomains(db).then((result)=>{
|
return database.getYesterdayDomains(db);
|
||||||
|
}).then(result=>{
|
||||||
|
return database.insertExpired(db,result);
|
||||||
|
}).catch(e=>{
|
||||||
|
console.log("Err : " +e);
|
||||||
|
}));
|
||||||
|
|
||||||
result.map((domain)=>{
|
|
||||||
getDomainListPromises.push(database.cleanExpired(db,domain));
|
|
||||||
});
|
|
||||||
|
|
||||||
}).catch((e)=>{
|
|
||||||
console.log(e);
|
|
||||||
});
|
|
||||||
getDomainListPromises.push(p3);
|
|
||||||
});
|
|
||||||
getDomainListPromises.push(p2);
|
|
||||||
});
|
|
||||||
getDomainListPromises.push(p);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
Promise.all(getDomainListPromises).then(()=>{
|
Promise.all(getDomainListPromises).then(()=>{
|
||||||
console.log("All promises done");
|
database.getExpiredDomains(db).then(result=>{
|
||||||
database.getExpiredDomains(db).then((result)=>{
|
return links.checkExpiredDomains(db,result);
|
||||||
links.checkExpiredDomains(db,result).then(()=>{
|
}).then(()=>{
|
||||||
db.close();
|
console.log("Done - closing");
|
||||||
});
|
db.close();
|
||||||
});
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -17,22 +17,32 @@ module.exports = {
|
|||||||
return getArrayFromDatabase(db, 'expired_list');
|
return getArrayFromDatabase(db, 'expired_list');
|
||||||
},
|
},
|
||||||
|
|
||||||
cleanExpired : function (db, domain){
|
insertExpired : function (db, domains){
|
||||||
return new Promise((resolve,reject)=>{
|
return new Promise((resolve, reject)=>{
|
||||||
db.collection('today').findOne({domainName:domain.domainName}, (err,result)=>{
|
let domainsForInsertion = [];
|
||||||
if (err){
|
let waitingPromises = [];
|
||||||
reject(err);
|
|
||||||
}else{
|
domains.map(domain=>{
|
||||||
if ((result===null) &&(datetime.create().format('Y-m-d')===domain.expirationDate)){
|
|
||||||
db.collection('expired_list').insert(domain);
|
waitingPromises.push(checkDate(db,domain).then(res=>{
|
||||||
|
if (res!==null){
|
||||||
|
domainsForInsertion.push(res);
|
||||||
}
|
}
|
||||||
|
}));
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
Promise.all(waitingPromises).then(()=>{
|
||||||
|
if (domainsForInsertion.length>0){
|
||||||
|
db.collection('expired_list').insert(domainsForInsertion, (err,res)=>{
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
}else{
|
||||||
resolve();
|
resolve();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
},
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const getArrayFromDatabase = function(db, collection){
|
const getArrayFromDatabase = function(db, collection){
|
||||||
@@ -45,4 +55,17 @@ const getArrayFromDatabase = function(db, collection){
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const checkDate = function(db, domain){
|
||||||
|
return new Promise((resolve,reject)=>{
|
||||||
|
db.collection('today').findOne({domainName: domain.domainName}, (err,result)=>{
|
||||||
|
if ((!err) && (result===null) &&(datetime.create().format('Y-m-d')===domain.expirationDate)){
|
||||||
|
//domainsForInsertion.push(domain);
|
||||||
|
resolve(domain);
|
||||||
|
}else{
|
||||||
|
resolve(null);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
}
|
}
|
||||||
@@ -7,18 +7,17 @@ var http = require('http');
|
|||||||
module.exports = {
|
module.exports = {
|
||||||
getDomainList : function(url){
|
getDomainList : function(url){
|
||||||
return new Promise((resolve, reject)=>{
|
return new Promise((resolve, reject)=>{
|
||||||
|
|
||||||
getRawDomainList(url).then(raw=>{
|
getRawDomainList(url).then(raw=>{
|
||||||
processDomains(raw).then(result=>{
|
resolve(applyFilter(processDomains(raw)));
|
||||||
applyFilter(result).then(result=>{
|
|
||||||
resolve(result);
|
|
||||||
})
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
|
|
||||||
checkExpiredDomains : function(db, domains){
|
checkExpiredDomains : function(db, domains){
|
||||||
return new Promise((resolve,reject)=>{
|
return new Promise((resolve,reject)=>{
|
||||||
|
let waitingPromises = [];
|
||||||
|
let domainsForRemoval = [];
|
||||||
domains.map(domain=>{
|
domains.map(domain=>{
|
||||||
let checkLink = '';
|
let checkLink = '';
|
||||||
switch(domain.tld){
|
switch(domain.tld){
|
||||||
@@ -31,62 +30,85 @@ module.exports = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let fullName = domain.domainName + '.' + domain.tld;
|
let fullName = domain.domainName + '.' + domain.tld;
|
||||||
http.get(checkLink+punycode.toASCII(fullName), res => {
|
waitingPromises.push(removeOccupiedDomain(db, checkLink+punycode.toASCII(fullName),domain));
|
||||||
res.setEncoding("utf8");
|
});
|
||||||
let body = "";
|
|
||||||
res.on("data", data => {
|
Promise.all(waitingPromises).then(()=>{
|
||||||
body += data;
|
resolve();
|
||||||
});
|
|
||||||
res.on("end", () => {
|
|
||||||
let status = body.split(' ')[0];
|
|
||||||
if (status !== 'free'){
|
|
||||||
db.collection('expired_list').remove({domainName:domain.domainName});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
resolve();
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
var applyFilter = function (domains){
|
var isDomainFree = function (db, url, domain){
|
||||||
return new Promise((resolve,reject)=>{
|
return new Promise((resolve,reject)=>{
|
||||||
//get domain names that only match whole words
|
http.get(url, res => {
|
||||||
let result = [];
|
res.setEncoding("utf8");
|
||||||
domains.map(domain=>{
|
let body = "";
|
||||||
let index = config.words.indexOf(domain.domainName);
|
res.on("data", data => {
|
||||||
if (index !== -1){
|
body += data;
|
||||||
result.push(domain);
|
});
|
||||||
}
|
res.on("end", () => {
|
||||||
|
let status = body.split(' ')[0];
|
||||||
|
if (status === 'free'){
|
||||||
|
resolve(true);
|
||||||
|
}else{
|
||||||
|
resolve(false);
|
||||||
|
}
|
||||||
|
});
|
||||||
});
|
});
|
||||||
resolve(result);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
var processDomains = function(raw){
|
var removeOccupiedDomain = function (db, url,domain){
|
||||||
return new Promise((resolve,reject)=>{
|
return new Promise((resolve,reject)=>{
|
||||||
let result = [];
|
http.get(url, res => {
|
||||||
raw.split('\n').map(domain=>{
|
res.setEncoding("utf8");
|
||||||
let unicodeDomain = punycode.toUnicode(domain);
|
let body = "";
|
||||||
let dot = unicodeDomain.indexOf('.');
|
res.on("data", data => {
|
||||||
let tab = unicodeDomain.indexOf('\t');
|
body += data;
|
||||||
if (dot !== -1){
|
});
|
||||||
let domainName = unicodeDomain.substring(0,dot);
|
res.on("end", () => {
|
||||||
let tld = unicodeDomain.substring(dot+1,tab);
|
let status = body.split(' ')[0];
|
||||||
if (domainName.match(config.swedishLettersOnly)){
|
if (status !== 'free'){
|
||||||
//domain name contains only letters
|
db.collection('expired_list').remove({domainName:domain.domainName, tld: domain.tld});
|
||||||
//line in domain list is formatted as follows : [domain name]\t[expiration date]
|
|
||||||
result.push({domainName: domainName, tld:tld ,expirationDate: domain.split('\t')[1]});
|
|
||||||
}
|
}
|
||||||
}
|
resolve();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
resolve(result);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var applyFilter = function (domains){
|
||||||
|
let result = [];
|
||||||
|
domains.map(domain=>{
|
||||||
|
let index = config.words.indexOf(domain.domainName);
|
||||||
|
if (index !== -1){
|
||||||
|
result.push(domain);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
var processDomains = function(raw){
|
||||||
|
let result = [];
|
||||||
|
raw.split('\n').map(domain=>{
|
||||||
|
let unicodeDomain = punycode.toUnicode(domain);
|
||||||
|
let dot = unicodeDomain.indexOf('.');
|
||||||
|
let tab = unicodeDomain.indexOf('\t');
|
||||||
|
if (dot !== -1){
|
||||||
|
let domainName = unicodeDomain.substring(0,dot);
|
||||||
|
let tld = unicodeDomain.substring(dot+1,tab);
|
||||||
|
if (domainName.match(config.swedishLettersOnly)){
|
||||||
|
//domain name contains only letters
|
||||||
|
//line in domain list is formatted as follows : [domain name]\t[expiration date]
|
||||||
|
result.push({domainName: domainName, tld:tld ,expirationDate: domain.split('\t')[1]});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
var getRawDomainList = function (url) {
|
var getRawDomainList = function (url) {
|
||||||
return new Promise((resolve, reject)=>{
|
return new Promise((resolve, reject)=>{
|
||||||
if (url[0]==='/'){
|
if (url[0]==='/'){
|
||||||
|
|||||||
Reference in New Issue
Block a user