Added srpskainfo crawler
This commit is contained in:
@@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"github.com/lib/pq"
|
||||
"gitlab.com/kbr4/svevijesti/internal/database"
|
||||
"gitlab.com/kbr4/svevijesti/internal/model"
|
||||
"gitlab.com/kbr4/svevijesti/internal/scraper"
|
||||
)
|
||||
|
||||
@@ -15,17 +16,37 @@ func main() {
|
||||
|
||||
defer store.Close()
|
||||
go candidateChecker()
|
||||
go scraper.CrawlSrpskainfo()
|
||||
go scraper.CrawlKlix()
|
||||
|
||||
for article := range scraper.KlixArticles {
|
||||
fmt.Println("Saving ", article.OriginalUrl)
|
||||
err = database.InsertArticle(store, article)
|
||||
if err, ok := err.(*pq.Error); ok {
|
||||
if err.Code.Name() != "unique_violation" {
|
||||
panic(err)
|
||||
} else {
|
||||
fmt.Println("Skipping: ", article.OriginalUrl)
|
||||
article := model.ScrapedArticle{}
|
||||
|
||||
for {
|
||||
select {
|
||||
case article = <-scraper.KlixArticles:
|
||||
if article.Title == model.Terminator {
|
||||
scraper.KlixArticles = nil
|
||||
}
|
||||
case article = <-scraper.SrpskainfoArticles:
|
||||
if article.Title == model.Terminator {
|
||||
scraper.SrpskainfoArticles = nil
|
||||
}
|
||||
}
|
||||
|
||||
if article.Title != model.Terminator {
|
||||
fmt.Println("Saving ", article.OriginalUrl)
|
||||
err = database.InsertArticle(store, article)
|
||||
if err, ok := err.(*pq.Error); ok {
|
||||
if err.Code.Name() != "unique_violation" {
|
||||
panic(err)
|
||||
} else {
|
||||
fmt.Println("Skipping: ", article.OriginalUrl)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if scraper.KlixArticles == nil && scraper.SrpskainfoArticles == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -37,10 +58,31 @@ func candidateChecker() {
|
||||
}
|
||||
defer store.Close()
|
||||
|
||||
for url := range scraper.KlixCandidates {
|
||||
if !database.IsSaved(store, url) {
|
||||
scraper.KlixApprovedSites <- url
|
||||
for {
|
||||
select {
|
||||
case url := <-scraper.KlixCandidates:
|
||||
if url == model.Terminator {
|
||||
scraper.KlixCandidates = nil
|
||||
} else {
|
||||
if !database.IsSaved(store, url) {
|
||||
scraper.KlixApprovedSites <- url
|
||||
}
|
||||
}
|
||||
|
||||
case url := <-scraper.SrpskainfoCandidates:
|
||||
if url == model.Terminator {
|
||||
scraper.SrpskainfoCandidates = nil
|
||||
} else {
|
||||
if !database.IsSaved(store, url) {
|
||||
scraper.SrpskainfoApprovedSites <- url
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if scraper.KlixCandidates == nil && scraper.SrpskainfoCandidates == nil {
|
||||
break
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user