127 lines
2.6 KiB
Go
127 lines
2.6 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"github.com/lib/pq"
|
|
"gitlab.com/kbr4/svevijesti/internal/database"
|
|
"gitlab.com/kbr4/svevijesti/internal/model"
|
|
"gitlab.com/kbr4/svevijesti/internal/scraper"
|
|
)
|
|
|
|
func main() {
|
|
store, err := database.Connect()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
defer store.Close()
|
|
go candidateChecker()
|
|
go scraper.CrawlSrpskainfo()
|
|
go scraper.CrawlKlix()
|
|
go scraper.CrawlBljesak()
|
|
go scraper.CrawlAvaz()
|
|
|
|
article := model.ScrapedArticle{}
|
|
|
|
for {
|
|
select {
|
|
case article = <-scraper.KlixArticles:
|
|
if article.Title == model.Terminator {
|
|
scraper.KlixArticles = nil
|
|
}
|
|
case article = <-scraper.SrpskainfoArticles:
|
|
if article.Title == model.Terminator {
|
|
scraper.SrpskainfoArticles = nil
|
|
}
|
|
case article = <-scraper.BljesakArticles:
|
|
if article.Title == model.Terminator {
|
|
scraper.BljesakArticles = nil
|
|
}
|
|
case article = <-scraper.AvazArticles:
|
|
if article.Title == model.Terminator {
|
|
scraper.AvazArticles = nil
|
|
}
|
|
|
|
}
|
|
|
|
if article.Title != model.Terminator {
|
|
fmt.Println("Saving ", article.OriginalUrl)
|
|
err = database.InsertArticle(store, article)
|
|
if err, ok := err.(*pq.Error); ok {
|
|
if err.Code.Name() != "unique_violation" {
|
|
panic(err)
|
|
} else {
|
|
fmt.Println("Skipping: ", article.OriginalUrl)
|
|
fmt.Println("Title ", article.Title)
|
|
fmt.Println("Error ", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
if scraper.KlixArticles == nil &&
|
|
scraper.SrpskainfoArticles == nil &&
|
|
scraper.AvazArticles == nil &&
|
|
scraper.BljesakCandidates == nil {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
func candidateChecker() {
|
|
store, err := database.Connect()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
defer store.Close()
|
|
|
|
for {
|
|
select {
|
|
case url := <-scraper.KlixCandidates:
|
|
if url == model.Terminator {
|
|
scraper.KlixCandidates = nil
|
|
} else {
|
|
if !database.IsSaved(store, url) {
|
|
scraper.KlixApprovedSites <- url
|
|
}
|
|
}
|
|
|
|
case url := <-scraper.SrpskainfoCandidates:
|
|
if url == model.Terminator {
|
|
scraper.SrpskainfoCandidates = nil
|
|
} else {
|
|
if !database.IsSaved(store, url) {
|
|
scraper.SrpskainfoApprovedSites <- url
|
|
}
|
|
}
|
|
|
|
case url := <-scraper.BljesakCandidates:
|
|
if url == model.Terminator {
|
|
scraper.BljesakCandidates = nil
|
|
} else {
|
|
if !database.IsSaved(store, url) {
|
|
scraper.BljesakApprovedSites <- url
|
|
}
|
|
}
|
|
|
|
case url := <-scraper.AvazCandidates:
|
|
if url == model.Terminator {
|
|
scraper.AvazCandidates = nil
|
|
} else {
|
|
if !database.IsSaved(store, url) {
|
|
scraper.AvazApprovedSites <- url
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if scraper.KlixCandidates == nil &&
|
|
scraper.SrpskainfoCandidates == nil &&
|
|
scraper.AvazCandidates == nil &&
|
|
scraper.BljesakCandidates == nil {
|
|
break
|
|
}
|
|
|
|
}
|
|
|
|
}
|