Added srpskainfo crawler
This commit is contained in:
@@ -19,8 +19,8 @@ func CrawlKlix() {
|
||||
crHomePage := colly.NewCollector(colly.AllowedDomains("www.klix.ba"))
|
||||
crArticlePage := colly.NewCollector(colly.AllowedDomains("www.klix.ba"))
|
||||
|
||||
setupArticlePageCrawler(crArticlePage)
|
||||
setupHomepageCrawler(crHomePage, crArticlePage)
|
||||
setupKlArticlePageCrawler(crArticlePage)
|
||||
setupKlHomepageCrawler(crHomePage, crArticlePage)
|
||||
|
||||
go visitApprovedPages(crArticlePage)
|
||||
}
|
||||
@@ -33,7 +33,7 @@ func visitApprovedPages(crArticlePage *colly.Collector) {
|
||||
}
|
||||
}
|
||||
|
||||
func setupHomepageCrawler(crHomePage *colly.Collector, crArticlePage *colly.Collector) {
|
||||
func setupKlHomepageCrawler(crHomePage *colly.Collector, crArticlePage *colly.Collector) {
|
||||
|
||||
articleUrlR, _ := regexp.Compile("\\d\\d+$")
|
||||
crHomePage.OnHTML(".container a", func(e *colly.HTMLElement) {
|
||||
@@ -46,21 +46,26 @@ func setupHomepageCrawler(crHomePage *colly.Collector, crArticlePage *colly.Coll
|
||||
|
||||
crHomePage.OnScraped(func(_ *colly.Response) {
|
||||
time.Sleep(5 * time.Second)
|
||||
close(KlixArticles)
|
||||
close(KlixApprovedSites)
|
||||
close(KlixCandidates)
|
||||
terminating := model.ScrapedArticle{}
|
||||
terminating.Title = model.Terminator
|
||||
KlixArticles <- terminating
|
||||
KlixApprovedSites <- model.Terminator
|
||||
KlixCandidates <- model.Terminator
|
||||
})
|
||||
|
||||
crHomePage.OnError(func(_ *colly.Response, _ error) {
|
||||
close(KlixArticles)
|
||||
close(KlixApprovedSites)
|
||||
close(KlixCandidates)
|
||||
time.Sleep(5 * time.Second)
|
||||
terminating := model.ScrapedArticle{}
|
||||
terminating.Title = model.Terminator
|
||||
KlixArticles <- terminating
|
||||
KlixApprovedSites <- model.Terminator
|
||||
KlixCandidates <- model.Terminator
|
||||
})
|
||||
|
||||
go crHomePage.Visit("https://www.klix.ba")
|
||||
}
|
||||
|
||||
func setupArticlePageCrawler(crArticlePage *colly.Collector) {
|
||||
func setupKlArticlePageCrawler(crArticlePage *colly.Collector) {
|
||||
crArticlePage.OnHTML("html", func(e *colly.HTMLElement) {
|
||||
|
||||
url := e.Request.URL.String()
|
||||
|
||||
Reference in New Issue
Block a user