This commit is contained in:
2023-12-18 16:51:47 +01:00
commit 88741b2303
36 changed files with 1490 additions and 0 deletions

126
cmd/spider/spider.go Normal file
View File

@@ -0,0 +1,126 @@
package main
import (
"fmt"
"github.com/lib/pq"
"gitlab.com/kbr4/svevijesti/internal/database"
"gitlab.com/kbr4/svevijesti/internal/model"
"gitlab.com/kbr4/svevijesti/internal/scraper"
)
func main() {
store, err := database.Connect()
if err != nil {
panic(err)
}
defer store.Close()
go candidateChecker()
go scraper.CrawlSrpskainfo()
go scraper.CrawlKlix()
go scraper.CrawlBljesak()
go scraper.CrawlAvaz()
article := model.ScrapedArticle{}
for {
select {
case article = <-scraper.KlixArticles:
if article.Title == model.Terminator {
scraper.KlixArticles = nil
}
case article = <-scraper.SrpskainfoArticles:
if article.Title == model.Terminator {
scraper.SrpskainfoArticles = nil
}
case article = <-scraper.BljesakArticles:
if article.Title == model.Terminator {
scraper.BljesakArticles = nil
}
case article = <-scraper.AvazArticles:
if article.Title == model.Terminator {
scraper.AvazArticles = nil
}
}
if article.Title != model.Terminator {
fmt.Println("Saving ", article.OriginalUrl)
err = database.InsertArticle(store, article)
if err, ok := err.(*pq.Error); ok {
if err.Code.Name() != "unique_violation" {
panic(err)
} else {
fmt.Println("Skipping: ", article.OriginalUrl)
fmt.Println("Title ", article.Title)
fmt.Println("Error ", err)
}
}
}
if scraper.KlixArticles == nil &&
scraper.SrpskainfoArticles == nil &&
scraper.AvazArticles == nil &&
scraper.BljesakCandidates == nil {
break
}
}
}
func candidateChecker() {
store, err := database.Connect()
if err != nil {
panic(err)
}
defer store.Close()
for {
select {
case url := <-scraper.KlixCandidates:
if url == model.Terminator {
scraper.KlixCandidates = nil
} else {
if !database.IsSaved(store, url) {
scraper.KlixApprovedSites <- url
}
}
case url := <-scraper.SrpskainfoCandidates:
if url == model.Terminator {
scraper.SrpskainfoCandidates = nil
} else {
if !database.IsSaved(store, url) {
scraper.SrpskainfoApprovedSites <- url
}
}
case url := <-scraper.BljesakCandidates:
if url == model.Terminator {
scraper.BljesakCandidates = nil
} else {
if !database.IsSaved(store, url) {
scraper.BljesakApprovedSites <- url
}
}
case url := <-scraper.AvazCandidates:
if url == model.Terminator {
scraper.AvazCandidates = nil
} else {
if !database.IsSaved(store, url) {
scraper.AvazApprovedSites <- url
}
}
}
if scraper.KlixCandidates == nil &&
scraper.SrpskainfoCandidates == nil &&
scraper.AvazCandidates == nil &&
scraper.BljesakCandidates == nil {
break
}
}
}

23
cmd/web/web.go Normal file
View File

@@ -0,0 +1,23 @@
package main
import (
"gitlab.com/kbr4/svevijesti/internal/server"
"log"
"net/http"
"time"
)
func main() {
r := server.CreateRoutes()
http.Handle("/", r)
srv := &http.Server{
Handler: r,
Addr: "127.0.0.1:8080",
// Good practice: enforce timeouts for servers you create!
WriteTimeout: 15 * time.Second,
ReadTimeout: 15 * time.Second,
}
log.Fatal(srv.ListenAndServe())
}