.
This commit is contained in:
47
crawler/helper/links.js
Normal file
47
crawler/helper/links.js
Normal file
@@ -0,0 +1,47 @@
|
||||
const config = require('../config');
|
||||
const https = require("https");
|
||||
|
||||
module.exports = {
|
||||
getDomainList : function(url, callback){
|
||||
getRawDomainList(url,(raw)=>{
|
||||
let result = [];
|
||||
raw.split('\n').map(domain=>{
|
||||
let dot = domain.indexOf('.');
|
||||
if (dot !== -1){
|
||||
let domainName = domain.substring(0,dot);
|
||||
if (domainName.match(config.lettersOnlyRegex)){
|
||||
//domain name contains only letters
|
||||
//line in domain list is formatted as follows : [domain name]\t[expiration date]
|
||||
result.push({domainName: domainName, expirationDate: domain.split('\t')[1]});
|
||||
}
|
||||
}
|
||||
});
|
||||
applyFilter(result, callback);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
var applyFilter = function (domains, callback){
|
||||
//get domain names that only match whole words
|
||||
let result = [];
|
||||
domains.map(domain=>{
|
||||
let index = config.words.indexOf(domain.domainName);
|
||||
if (index !== -1){
|
||||
result.push(domain);
|
||||
}
|
||||
});
|
||||
callback(result);
|
||||
}
|
||||
|
||||
var getRawDomainList = function (url, callback) {
|
||||
https.get(url, res => {
|
||||
res.setEncoding("utf8");
|
||||
let body = "";
|
||||
res.on("data", data => {
|
||||
body += data;
|
||||
});
|
||||
res.on("end", () => {
|
||||
callback(body);
|
||||
});
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user