Files
old-svevijesti/cmd/spider/spider.go
2022-02-22 21:06:27 +01:00

127 lines
2.6 KiB
Go

package main
import (
"fmt"
"github.com/lib/pq"
"gitlab.com/kbr4/svevijesti/internal/database"
"gitlab.com/kbr4/svevijesti/internal/model"
"gitlab.com/kbr4/svevijesti/internal/scraper"
)
func main() {
store, err := database.Connect()
if err != nil {
panic(err)
}
defer store.Close()
go candidateChecker()
go scraper.CrawlSrpskainfo()
go scraper.CrawlKlix()
go scraper.CrawlBljesak()
go scraper.CrawlAvaz()
article := model.ScrapedArticle{}
for {
select {
case article = <-scraper.KlixArticles:
if article.Title == model.Terminator {
scraper.KlixArticles = nil
}
case article = <-scraper.SrpskainfoArticles:
if article.Title == model.Terminator {
scraper.SrpskainfoArticles = nil
}
case article = <-scraper.BljesakArticles:
if article.Title == model.Terminator {
scraper.BljesakArticles = nil
}
case article = <-scraper.AvazArticles:
if article.Title == model.Terminator {
scraper.AvazArticles = nil
}
}
if article.Title != model.Terminator {
fmt.Println("Saving ", article.OriginalUrl)
err = database.InsertArticle(store, article)
if err, ok := err.(*pq.Error); ok {
if err.Code.Name() != "unique_violation" {
panic(err)
} else {
fmt.Println("Skipping: ", article.OriginalUrl)
fmt.Println("Title ", article.Title)
fmt.Println("Error ", err)
}
}
}
if scraper.KlixArticles == nil &&
scraper.SrpskainfoArticles == nil &&
scraper.AvazArticles == nil &&
scraper.BljesakCandidates == nil {
break
}
}
}
func candidateChecker() {
store, err := database.Connect()
if err != nil {
panic(err)
}
defer store.Close()
for {
select {
case url := <-scraper.KlixCandidates:
if url == model.Terminator {
scraper.KlixCandidates = nil
} else {
if !database.IsSaved(store, url) {
scraper.KlixApprovedSites <- url
}
}
case url := <-scraper.SrpskainfoCandidates:
if url == model.Terminator {
scraper.SrpskainfoCandidates = nil
} else {
if !database.IsSaved(store, url) {
scraper.SrpskainfoApprovedSites <- url
}
}
case url := <-scraper.BljesakCandidates:
if url == model.Terminator {
scraper.BljesakCandidates = nil
} else {
if !database.IsSaved(store, url) {
scraper.BljesakApprovedSites <- url
}
}
case url := <-scraper.AvazCandidates:
if url == model.Terminator {
scraper.AvazCandidates = nil
} else {
if !database.IsSaved(store, url) {
scraper.AvazApprovedSites <- url
}
}
}
if scraper.KlixCandidates == nil &&
scraper.SrpskainfoCandidates == nil &&
scraper.AvazCandidates == nil &&
scraper.BljesakCandidates == nil {
break
}
}
}