Added srpskainfo crawler

This commit is contained in:
Senad Uka
2022-02-15 07:03:30 +01:00
parent 08d81be857
commit 6648f6754a
7 changed files with 178 additions and 22 deletions

View File

@@ -19,8 +19,8 @@ func CrawlKlix() {
crHomePage := colly.NewCollector(colly.AllowedDomains("www.klix.ba"))
crArticlePage := colly.NewCollector(colly.AllowedDomains("www.klix.ba"))
setupArticlePageCrawler(crArticlePage)
setupHomepageCrawler(crHomePage, crArticlePage)
setupKlArticlePageCrawler(crArticlePage)
setupKlHomepageCrawler(crHomePage, crArticlePage)
go visitApprovedPages(crArticlePage)
}
@@ -33,7 +33,7 @@ func visitApprovedPages(crArticlePage *colly.Collector) {
}
}
func setupHomepageCrawler(crHomePage *colly.Collector, crArticlePage *colly.Collector) {
func setupKlHomepageCrawler(crHomePage *colly.Collector, crArticlePage *colly.Collector) {
articleUrlR, _ := regexp.Compile("\\d\\d+$")
crHomePage.OnHTML(".container a", func(e *colly.HTMLElement) {
@@ -46,21 +46,26 @@ func setupHomepageCrawler(crHomePage *colly.Collector, crArticlePage *colly.Coll
crHomePage.OnScraped(func(_ *colly.Response) {
time.Sleep(5 * time.Second)
close(KlixArticles)
close(KlixApprovedSites)
close(KlixCandidates)
terminating := model.ScrapedArticle{}
terminating.Title = model.Terminator
KlixArticles <- terminating
KlixApprovedSites <- model.Terminator
KlixCandidates <- model.Terminator
})
crHomePage.OnError(func(_ *colly.Response, _ error) {
close(KlixArticles)
close(KlixApprovedSites)
close(KlixCandidates)
time.Sleep(5 * time.Second)
terminating := model.ScrapedArticle{}
terminating.Title = model.Terminator
KlixArticles <- terminating
KlixApprovedSites <- model.Terminator
KlixCandidates <- model.Terminator
})
go crHomePage.Visit("https://www.klix.ba")
}
func setupArticlePageCrawler(crArticlePage *colly.Collector) {
func setupKlArticlePageCrawler(crArticlePage *colly.Collector) {
crArticlePage.OnHTML("html", func(e *colly.HTMLElement) {
url := e.Request.URL.String()