code fix
This commit is contained in:
@@ -1248,7 +1248,7 @@ agag.se 2018-01-15
|
||||
agagil.se 2018-02-13
|
||||
agal.se 2018-01-15
|
||||
agaler.se 2018-02-13
|
||||
agan.se 2018-01-15
|
||||
agan.se 2017-12-12
|
||||
agapolitikern.se 2018-01-08
|
||||
agarum.se 2017-12-30
|
||||
agathared.se 2017-12-29
|
||||
|
||||
@@ -1252,7 +1252,6 @@ agag.se 2018-01-15
|
||||
agagil.se 2018-02-13
|
||||
agal.se 2018-01-15
|
||||
agaler.se 2018-02-13
|
||||
agan.se 2018-01-15
|
||||
agapolitikern.se 2018-01-08
|
||||
agarum.se 2017-12-30
|
||||
agathared.se 2017-12-29
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
var config = require('./config');
|
||||
const links = require('./helper/links');
|
||||
const database = require('./helper/database');
|
||||
var MongoClient = require ('mongodb').MongoClient;
|
||||
var ObjectID = require ('mongodb').ObjectID;
|
||||
var fs = require('fs');
|
||||
var datetime = require('node-datetime');
|
||||
var http = require('http');
|
||||
const punycode = require('punycode');
|
||||
|
||||
MongoClient.connect(config.databaseURL).then(database => {
|
||||
let db = database;
|
||||
|
||||
MongoClient.connect(config.databaseURL).then(mongoDatabase => {
|
||||
let db = mongoDatabase;
|
||||
|
||||
db.executeDbAdminCommand( { setParameter: true, textSearchEnabled : true});
|
||||
db.collection('expired_list').createIndex({domainName: 'text'}, {unique: true});
|
||||
@@ -30,52 +31,27 @@ MongoClient.connect(config.databaseURL).then(database => {
|
||||
|
||||
|
||||
config.domainList.map(url=>{
|
||||
//get domain list from url
|
||||
links.getDomainList(url, (res)=>{
|
||||
db.collection('today').insert(res,()=>{
|
||||
//insertion done, compare domains with yesterday
|
||||
db.collection('yesterday').find({}).toArray((err,result)=>{
|
||||
if (err){
|
||||
console.log("Error : " + err);
|
||||
}else{
|
||||
result.map((domain)=>{
|
||||
db.collection('today').findOne({domainName:domain.domainName}, (err,result)=>{
|
||||
if (result===null){
|
||||
if (datetime.create().format('Y-m-d')===domain.expirationDate){
|
||||
db.collection('expired_list').insert(domain);
|
||||
}
|
||||
}
|
||||
});
|
||||
links.getDomainList(url).then(res=>{
|
||||
database.insertTodayDomains(db,res).then(()=>{
|
||||
database.getYesterdayDomains(db).then((result)=>{
|
||||
|
||||
result.map((domain)=>{
|
||||
db.collection('today').findOne({domainName:domain.domainName}, (err,result)=>{
|
||||
if ((result===null) &&(datetime.create().format('Y-m-d')===domain.expirationDate)){
|
||||
db.collection('expired_list').insert(domain);
|
||||
}
|
||||
});
|
||||
db.collection('expired_list').find({}).toArray((err,result)=>{
|
||||
result.map(domain=>{
|
||||
let checkLink = '';
|
||||
switch(domain.tld){
|
||||
case 'se':
|
||||
checkLink = config.seDomainCheck;
|
||||
break;
|
||||
case 'nu':
|
||||
checkLink = config.nuDomainCheck;
|
||||
break;
|
||||
}
|
||||
let fullName = domain.domainName + '.' + domain.tld;
|
||||
http.get(checkLink+punycode.toASCII(fullName), res => {
|
||||
res.setEncoding("utf8");
|
||||
let body = "";
|
||||
res.on("data", data => {
|
||||
body += data;
|
||||
});
|
||||
res.on("end", () => {
|
||||
let status = body.split(' ')[0];
|
||||
if (status !== 'free'){
|
||||
db.collection('expired_list').remove({domainName:domain.domainName});
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
database.getExpiredDomains(db).then((result)=>{
|
||||
links.checkExpiredDomains(db,result).then(()=>{
|
||||
console.log("Done");
|
||||
db.close();
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
}).catch((e)=>{
|
||||
console.log(e);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
31
crawler/helper/database.js
Normal file
31
crawler/helper/database.js
Normal file
@@ -0,0 +1,31 @@
|
||||
module.exports = {
|
||||
insertTodayDomains : function (db, domains){
|
||||
return new Promise((resolve,reject)=>{
|
||||
db.collection('today').insert(domains, ()=>{
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
},
|
||||
|
||||
getYesterdayDomains : function (db){
|
||||
return getArrayFromDatabase(db, 'yesterday');
|
||||
},
|
||||
|
||||
getExpiredDomains : function (db){
|
||||
return getArrayFromDatabase(db, 'expired_list');
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
const getArrayFromDatabase = function(db, collection){
|
||||
return new Promise((resolve,reject)=>{
|
||||
db.collection(collection).find({}).toArray((err,result)=>{
|
||||
if (err){
|
||||
reject('Error reading collection');
|
||||
}else{
|
||||
resolve(result);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -4,61 +4,110 @@ const punycode = require('punycode');
|
||||
var fs = require('fs');
|
||||
|
||||
module.exports = {
|
||||
getDomainList : function(url, callback){
|
||||
|
||||
getRawDomainList(url,(raw)=>{
|
||||
let result = [];
|
||||
raw.split('\n').map(domain=>{
|
||||
let unicodeDomain = punycode.toUnicode(domain);
|
||||
let dot = unicodeDomain.indexOf('.');
|
||||
let tab = unicodeDomain.indexOf('\t');
|
||||
if (dot !== -1){
|
||||
let domainName = unicodeDomain.substring(0,dot);
|
||||
let tld = unicodeDomain.substring(dot+1,tab);
|
||||
if (domainName.match(config.swedishLettersOnly)){
|
||||
//domain name contains only letters
|
||||
//line in domain list is formatted as follows : [domain name]\t[expiration date]
|
||||
result.push({domainName: domainName, tld:tld ,expirationDate: domain.split('\t')[1]});
|
||||
}
|
||||
}
|
||||
getDomainList : function(url){
|
||||
return new Promise((resolve, reject)=>{
|
||||
getRawDomainList(url).then(raw=>{
|
||||
processDomains(raw).then(result=>{
|
||||
applyFilter(result).then(result=>{
|
||||
resolve(result);
|
||||
})
|
||||
});
|
||||
});
|
||||
applyFilter(result, callback);
|
||||
});
|
||||
},
|
||||
|
||||
checkExpiredDomains : function(db, domains){
|
||||
return new Promise((resolve,reject)=>{
|
||||
domains.map(domain=>{
|
||||
let checkLink = '';
|
||||
switch(domain.tld){
|
||||
case 'se':
|
||||
checkLink = config.seDomainCheck;
|
||||
break;
|
||||
case 'nu':
|
||||
checkLink = config.nuDomainCheck;
|
||||
break;
|
||||
}
|
||||
|
||||
let fullName = domain.domainName + '.' + domain.tld;
|
||||
http.get(checkLink+punycode.toASCII(fullName), res => {
|
||||
res.setEncoding("utf8");
|
||||
let body = "";
|
||||
res.on("data", data => {
|
||||
body += data;
|
||||
});
|
||||
res.on("end", () => {
|
||||
let status = body.split(' ')[0];
|
||||
if (status !== 'free'){
|
||||
db.collection('expired_list').remove({domainName:domain.domainName});
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
resolve();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
var applyFilter = function (domains, callback){
|
||||
//get domain names that only match whole words
|
||||
let result = [];
|
||||
domains.map(domain=>{
|
||||
let index = config.words.indexOf(domain.domainName);
|
||||
if (index !== -1){
|
||||
result.push(domain);
|
||||
}
|
||||
});
|
||||
callback(result);
|
||||
}
|
||||
|
||||
var getRawDomainList = function (url, callback) {
|
||||
if (url[0]==='/'){
|
||||
//it's local file
|
||||
fs.readFile(url,'utf8',(err,data)=>{
|
||||
if (err){
|
||||
console.log("err : " + err);
|
||||
}else{
|
||||
callback(data);
|
||||
var applyFilter = function (domains){
|
||||
return new Promise((resolve,reject)=>{
|
||||
//get domain names that only match whole words
|
||||
let result = [];
|
||||
domains.map(domain=>{
|
||||
let index = config.words.indexOf(domain.domainName);
|
||||
if (index !== -1){
|
||||
result.push(domain);
|
||||
}
|
||||
});
|
||||
}else{
|
||||
https.get(url, res => {
|
||||
res.setEncoding("utf8");
|
||||
let body = "";
|
||||
res.on("data", data => {
|
||||
body += data;
|
||||
});
|
||||
res.on("end", () => {
|
||||
callback(body);
|
||||
});
|
||||
});
|
||||
}
|
||||
resolve(result);
|
||||
});
|
||||
}
|
||||
|
||||
var processDomains = function(raw){
|
||||
return new Promise((resolve,reject)=>{
|
||||
let result = [];
|
||||
raw.split('\n').map(domain=>{
|
||||
let unicodeDomain = punycode.toUnicode(domain);
|
||||
let dot = unicodeDomain.indexOf('.');
|
||||
let tab = unicodeDomain.indexOf('\t');
|
||||
if (dot !== -1){
|
||||
let domainName = unicodeDomain.substring(0,dot);
|
||||
let tld = unicodeDomain.substring(dot+1,tab);
|
||||
if (domainName.match(config.swedishLettersOnly)){
|
||||
//domain name contains only letters
|
||||
//line in domain list is formatted as follows : [domain name]\t[expiration date]
|
||||
result.push({domainName: domainName, tld:tld ,expirationDate: domain.split('\t')[1]});
|
||||
}
|
||||
}
|
||||
});
|
||||
resolve(result);
|
||||
});
|
||||
}
|
||||
|
||||
var getRawDomainList = function (url) {
|
||||
return new Promise((resolve, reject)=>{
|
||||
if (url[0]==='/'){
|
||||
//it's local file
|
||||
fs.readFile(url,'utf8',(err,data)=>{
|
||||
if (err){
|
||||
reject(err);
|
||||
}else{
|
||||
resolve(data);
|
||||
}
|
||||
});
|
||||
}else{
|
||||
https.get(url, res => {
|
||||
res.setEncoding("utf8");
|
||||
let body = "";
|
||||
res.on("data", data => {
|
||||
body += data;
|
||||
});
|
||||
res.on("end", () => {
|
||||
resolve(body);
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user