Files
old-domene-svedska/crawler/helper/links.js

136 lines
4.0 KiB
JavaScript
Raw Normal View History

2017-12-07 12:36:41 +01:00
const config = require('../config');
const https = require("https");
2017-12-08 10:57:47 +01:00
const punycode = require('punycode');
var fs = require('fs');
2017-12-13 13:56:47 +01:00
var http = require('http');
2017-12-07 12:36:41 +01:00
module.exports = {
2017-12-12 22:08:02 +01:00
getDomainList : function(url){
return new Promise((resolve, reject)=>{
2017-12-14 02:45:51 +01:00
2017-12-12 22:08:02 +01:00
getRawDomainList(url).then(raw=>{
2017-12-14 02:45:51 +01:00
resolve(applyFilter(processDomains(raw)));
2017-12-12 22:08:02 +01:00
});
});
},
checkExpiredDomains : function(db, domains){
return new Promise((resolve,reject)=>{
2017-12-14 02:45:51 +01:00
let waitingPromises = [];
let domainsForRemoval = [];
2017-12-12 22:08:02 +01:00
domains.map(domain=>{
let checkLink = '';
switch(domain.tld){
case 'se':
checkLink = config.seDomainCheck;
break;
case 'nu':
checkLink = config.nuDomainCheck;
break;
2017-12-07 12:36:41 +01:00
}
2017-12-12 22:08:02 +01:00
let fullName = domain.domainName + '.' + domain.tld;
2017-12-14 02:45:51 +01:00
waitingPromises.push(removeOccupiedDomain(db, checkLink+punycode.toASCII(fullName),domain));
});
Promise.all(waitingPromises).then(()=>{
resolve();
2017-12-07 12:36:41 +01:00
});
});
}
};
2017-12-14 02:45:51 +01:00
var isDomainFree = function (db, url, domain){
2017-12-12 22:08:02 +01:00
return new Promise((resolve,reject)=>{
2017-12-14 02:45:51 +01:00
http.get(url, res => {
res.setEncoding("utf8");
let body = "";
res.on("data", data => {
body += data;
});
res.on("end", () => {
let status = body.split(' ')[0];
if (status === 'free'){
resolve(true);
}else{
resolve(false);
}
});
2017-12-12 22:08:02 +01:00
});
2017-12-07 12:36:41 +01:00
});
}
2017-12-14 02:45:51 +01:00
var removeOccupiedDomain = function (db, url,domain){
2017-12-12 22:08:02 +01:00
return new Promise((resolve,reject)=>{
2017-12-14 02:45:51 +01:00
http.get(url, res => {
res.setEncoding("utf8");
let body = "";
res.on("data", data => {
body += data;
});
res.on("end", () => {
let status = body.split(' ')[0];
if (status !== 'free'){
db.collection('expired_list').remove({domainName:domain.domainName, tld: domain.tld});
2017-12-12 22:08:02 +01:00
}
2017-12-14 02:45:51 +01:00
resolve();
});
2017-12-07 12:36:41 +01:00
});
2017-12-12 22:08:02 +01:00
});
}
2017-12-14 02:45:51 +01:00
var applyFilter = function (domains){
let result = [];
domains.map(domain=>{
let index = config.words.indexOf(domain.domainName);
if (index !== -1){
result.push(domain);
}
});
return result;
}
var processDomains = function(raw){
let result = [];
raw.split('\n').map(domain=>{
let unicodeDomain = punycode.toUnicode(domain);
let dot = unicodeDomain.indexOf('.');
let tab = unicodeDomain.indexOf('\t');
if (dot !== -1){
let domainName = unicodeDomain.substring(0,dot);
let tld = unicodeDomain.substring(dot+1,tab);
if (domainName.match(config.swedishLettersOnly)){
//domain name contains only letters
//line in domain list is formatted as follows : [domain name]\t[expiration date]
result.push({domainName: domainName, tld:tld ,expirationDate: domain.split('\t')[1]});
}
}
});
return result;
}
2017-12-12 22:08:02 +01:00
var getRawDomainList = function (url) {
return new Promise((resolve, reject)=>{
if (url[0]==='/'){
//it's local file
fs.readFile(url,'utf8',(err,data)=>{
if (err){
reject(err);
}else{
resolve(data);
}
2017-12-08 10:57:47 +01:00
});
2017-12-12 22:08:02 +01:00
}else{
https.get(url, res => {
res.setEncoding("utf8");
let body = "";
res.on("data", data => {
body += data;
});
res.on("end", () => {
resolve(body);
});
2017-12-08 10:57:47 +01:00
});
2017-12-12 22:08:02 +01:00
}
});
2017-12-07 12:36:41 +01:00
}