Fresh
This commit is contained in:
49
.gitlab-ci.yml
Normal file
49
.gitlab-ci.yml
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
# This file is a template, and might need editing before it works on your project.
|
||||||
|
# This is a sample GitLab CI/CD configuration file that should run without any modifications.
|
||||||
|
# It demonstrates a basic 3 stage CI/CD pipeline. Instead of real tests or scripts,
|
||||||
|
# it uses echo commands to simulate the pipeline execution.
|
||||||
|
#
|
||||||
|
# A pipeline is composed of independent jobs that run scripts, grouped into stages.
|
||||||
|
# Stages run in sequential order, but jobs within stages run in parallel.
|
||||||
|
#
|
||||||
|
# For more information, see: https://docs.gitlab.com/ee/ci/yaml/index.html#stages
|
||||||
|
#
|
||||||
|
# You can copy and paste this template into a new `.gitlab-ci.yml` file.
|
||||||
|
# You should not add this template to an existing `.gitlab-ci.yml` file by using the `include:` keyword.
|
||||||
|
#
|
||||||
|
# To contribute improvements to CI/CD templates, please follow the Development guide at:
|
||||||
|
# https://docs.gitlab.com/ee/development/cicd/templates.html
|
||||||
|
# This specific template is located at:
|
||||||
|
# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Getting-Started.gitlab-ci.yml
|
||||||
|
|
||||||
|
stages: # List of stages for jobs, and their order of execution
|
||||||
|
- build
|
||||||
|
- test
|
||||||
|
- deploy
|
||||||
|
|
||||||
|
build-job: # This job runs in the build stage, which runs first.
|
||||||
|
stage: build
|
||||||
|
script:
|
||||||
|
- echo "Compiling the code..."
|
||||||
|
- echo "Compile complete."
|
||||||
|
|
||||||
|
unit-test-job: # This job runs in the test stage.
|
||||||
|
stage: test # It only starts when the job in the build stage completes successfully.
|
||||||
|
script:
|
||||||
|
- echo "Running unit tests... This will take about 60 seconds."
|
||||||
|
- sleep 60
|
||||||
|
- echo "Code coverage is 90%"
|
||||||
|
|
||||||
|
lint-test-job: # This job also runs in the test stage.
|
||||||
|
stage: test # It can run at the same time as unit-test-job (in parallel).
|
||||||
|
script:
|
||||||
|
- echo "Linting code... This will take about 10 seconds."
|
||||||
|
- sleep 10
|
||||||
|
- echo "No lint issues found."
|
||||||
|
|
||||||
|
deploy-job: # This job runs in the deploy stage.
|
||||||
|
stage: deploy # It only runs when *both* jobs in the test stage complete successfully.
|
||||||
|
environment: production
|
||||||
|
script:
|
||||||
|
- echo "Deploying application..."
|
||||||
|
- echo "Application successfully deployed."
|
||||||
10
Makefile
Normal file
10
Makefile
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
migrateup:
|
||||||
|
migrate -path db/migrations -database "postgresql://svevijesti:salmonela%20pljusti%20221%20hamo@localhost:5432/svevijestiweb?sslmode=disable" -verbose up
|
||||||
|
|
||||||
|
migratedown:
|
||||||
|
migrate -path db/migrations -database "postgresql://svevijesti:salmonela%20pljusti%20221%20hamo@localhost:5432/svevijestiweb?sslmode=disable" -verbose down
|
||||||
|
|
||||||
|
installserver:
|
||||||
|
bash ./scripts/install_server.sh
|
||||||
|
|
||||||
|
.PHONY: migrateup migratedown
|
||||||
126
cmd/spider/spider.go
Normal file
126
cmd/spider/spider.go
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/lib/pq"
|
||||||
|
"gitlab.com/kbr4/svevijesti/internal/database"
|
||||||
|
"gitlab.com/kbr4/svevijesti/internal/model"
|
||||||
|
"gitlab.com/kbr4/svevijesti/internal/scraper"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
store, err := database.Connect()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
defer store.Close()
|
||||||
|
go candidateChecker()
|
||||||
|
go scraper.CrawlSrpskainfo()
|
||||||
|
go scraper.CrawlKlix()
|
||||||
|
go scraper.CrawlBljesak()
|
||||||
|
go scraper.CrawlAvaz()
|
||||||
|
|
||||||
|
article := model.ScrapedArticle{}
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case article = <-scraper.KlixArticles:
|
||||||
|
if article.Title == model.Terminator {
|
||||||
|
scraper.KlixArticles = nil
|
||||||
|
}
|
||||||
|
case article = <-scraper.SrpskainfoArticles:
|
||||||
|
if article.Title == model.Terminator {
|
||||||
|
scraper.SrpskainfoArticles = nil
|
||||||
|
}
|
||||||
|
case article = <-scraper.BljesakArticles:
|
||||||
|
if article.Title == model.Terminator {
|
||||||
|
scraper.BljesakArticles = nil
|
||||||
|
}
|
||||||
|
case article = <-scraper.AvazArticles:
|
||||||
|
if article.Title == model.Terminator {
|
||||||
|
scraper.AvazArticles = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if article.Title != model.Terminator {
|
||||||
|
fmt.Println("Saving ", article.OriginalUrl)
|
||||||
|
err = database.InsertArticle(store, article)
|
||||||
|
if err, ok := err.(*pq.Error); ok {
|
||||||
|
if err.Code.Name() != "unique_violation" {
|
||||||
|
panic(err)
|
||||||
|
} else {
|
||||||
|
fmt.Println("Skipping: ", article.OriginalUrl)
|
||||||
|
fmt.Println("Title ", article.Title)
|
||||||
|
fmt.Println("Error ", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if scraper.KlixArticles == nil &&
|
||||||
|
scraper.SrpskainfoArticles == nil &&
|
||||||
|
scraper.AvazArticles == nil &&
|
||||||
|
scraper.BljesakCandidates == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func candidateChecker() {
|
||||||
|
store, err := database.Connect()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer store.Close()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case url := <-scraper.KlixCandidates:
|
||||||
|
if url == model.Terminator {
|
||||||
|
scraper.KlixCandidates = nil
|
||||||
|
} else {
|
||||||
|
if !database.IsSaved(store, url) {
|
||||||
|
scraper.KlixApprovedSites <- url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
case url := <-scraper.SrpskainfoCandidates:
|
||||||
|
if url == model.Terminator {
|
||||||
|
scraper.SrpskainfoCandidates = nil
|
||||||
|
} else {
|
||||||
|
if !database.IsSaved(store, url) {
|
||||||
|
scraper.SrpskainfoApprovedSites <- url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
case url := <-scraper.BljesakCandidates:
|
||||||
|
if url == model.Terminator {
|
||||||
|
scraper.BljesakCandidates = nil
|
||||||
|
} else {
|
||||||
|
if !database.IsSaved(store, url) {
|
||||||
|
scraper.BljesakApprovedSites <- url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
case url := <-scraper.AvazCandidates:
|
||||||
|
if url == model.Terminator {
|
||||||
|
scraper.AvazCandidates = nil
|
||||||
|
} else {
|
||||||
|
if !database.IsSaved(store, url) {
|
||||||
|
scraper.AvazApprovedSites <- url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if scraper.KlixCandidates == nil &&
|
||||||
|
scraper.SrpskainfoCandidates == nil &&
|
||||||
|
scraper.AvazCandidates == nil &&
|
||||||
|
scraper.BljesakCandidates == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
23
cmd/web/web.go
Normal file
23
cmd/web/web.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"gitlab.com/kbr4/svevijesti/internal/server"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
r := server.CreateRoutes()
|
||||||
|
http.Handle("/", r)
|
||||||
|
|
||||||
|
srv := &http.Server{
|
||||||
|
Handler: r,
|
||||||
|
Addr: "127.0.0.1:8080",
|
||||||
|
// Good practice: enforce timeouts for servers you create!
|
||||||
|
WriteTimeout: 15 * time.Second,
|
||||||
|
ReadTimeout: 15 * time.Second,
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Fatal(srv.ListenAndServe())
|
||||||
|
}
|
||||||
1
db/development.conf
Normal file
1
db/development.conf
Normal file
@@ -0,0 +1 @@
|
|||||||
|
postgresql://svevijesti:salmonela%20pljusti%20221%20hamo@localhost:5432/svevijestiweb?ssl_mode=disabled
|
||||||
1
db/migrations/20220206054902_create_articles.down.sql
Normal file
1
db/migrations/20220206054902_create_articles.down.sql
Normal file
@@ -0,0 +1 @@
|
|||||||
|
DROP TABLE articles;
|
||||||
16
db/migrations/20220206054902_create_articles.up.sql
Normal file
16
db/migrations/20220206054902_create_articles.up.sql
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
BEGIN;
|
||||||
|
|
||||||
|
CREATE TABLE "articles" (
|
||||||
|
"id" bigint GENERATED ALWAYS AS IDENTITY,
|
||||||
|
"title" text NOT NULL UNIQUE,
|
||||||
|
"content" text NOT NULL,
|
||||||
|
"slug" text NOT NULL UNIQUE,
|
||||||
|
"created_at" timestamptz DEFAULT NOW() NOT NULL,
|
||||||
|
"original_url" text NOT NULL UNIQUE,
|
||||||
|
"source_id" int NOT NULL,
|
||||||
|
CONSTRAINT "articles_pk" PRIMARY KEY ("id")
|
||||||
|
) WITH (
|
||||||
|
OIDS=FALSE
|
||||||
|
);
|
||||||
|
|
||||||
|
COMMIT;
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
ALTER TABLE articles
|
||||||
|
ADD CONSTRAINT articles_title_key UNIQUE (title);
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
ALTER TABLE articles
|
||||||
|
DROP CONSTRAINT articles_title_key;
|
||||||
25
go.mod
Normal file
25
go.mod
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
module gitlab.com/kbr4/svevijesti
|
||||||
|
|
||||||
|
go 1.17
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/PuerkitoBio/goquery v1.8.0 // indirect
|
||||||
|
github.com/andybalholm/cascadia v1.3.1 // indirect
|
||||||
|
github.com/antchfx/htmlquery v1.2.4 // indirect
|
||||||
|
github.com/antchfx/xmlquery v1.3.9 // indirect
|
||||||
|
github.com/antchfx/xpath v1.2.0 // indirect
|
||||||
|
github.com/gobwas/glob v0.2.3 // indirect
|
||||||
|
github.com/gocolly/colly v1.2.0 // indirect
|
||||||
|
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
|
||||||
|
github.com/golang/protobuf v1.3.1 // indirect
|
||||||
|
github.com/gorilla/mux v1.8.0 // indirect
|
||||||
|
github.com/gosimple/slug v1.12.0 // indirect
|
||||||
|
github.com/gosimple/unidecode v1.0.1 // indirect
|
||||||
|
github.com/kennygrant/sanitize v1.2.4 // indirect
|
||||||
|
github.com/lib/pq v1.10.4 // indirect
|
||||||
|
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
|
||||||
|
github.com/temoto/robotstxt v1.1.2 // indirect
|
||||||
|
golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd // indirect
|
||||||
|
golang.org/x/text v0.3.7 // indirect
|
||||||
|
google.golang.org/appengine v1.6.7 // indirect
|
||||||
|
)
|
||||||
63
go.sum
Normal file
63
go.sum
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
|
||||||
|
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
|
||||||
|
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
|
||||||
|
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
|
||||||
|
github.com/antchfx/htmlquery v1.2.4 h1:qLteofCMe/KGovBI6SQgmou2QNyedFUW+pE+BpeZ494=
|
||||||
|
github.com/antchfx/htmlquery v1.2.4/go.mod h1:2xO6iu3EVWs7R2JYqBbp8YzG50gj/ofqs5/0VZoDZLc=
|
||||||
|
github.com/antchfx/xmlquery v1.3.9 h1:Y+zyMdiUZ4fasTQTkDb3DflOXP7+obcYEh80SISBmnQ=
|
||||||
|
github.com/antchfx/xmlquery v1.3.9/go.mod h1:wojC/BxjEkjJt6dPiAqUzoXO5nIMWtxHS8PD8TmN4ks=
|
||||||
|
github.com/antchfx/xpath v1.2.0 h1:mbwv7co+x0RwgeGAOHdrKy89GvHaGvxxBtPK0uF9Zr8=
|
||||||
|
github.com/antchfx/xpath v1.2.0/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
|
||||||
|
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
|
||||||
|
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
|
||||||
|
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
|
||||||
|
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
|
||||||
|
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
|
||||||
|
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
|
||||||
|
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
||||||
|
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
|
||||||
|
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||||
|
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
|
||||||
|
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
|
||||||
|
github.com/gosimple/slug v1.12.0 h1:xzuhj7G7cGtd34NXnW/yF0l+AGNfWqwgh/IXgFy7dnc=
|
||||||
|
github.com/gosimple/slug v1.12.0/go.mod h1:UiRaFH+GEilHstLUmcBgWcI42viBN7mAb818JrYOeFQ=
|
||||||
|
github.com/gosimple/unidecode v1.0.1 h1:hZzFTMMqSswvf0LBJZCZgThIZrpDHFXux9KeGmn6T/o=
|
||||||
|
github.com/gosimple/unidecode v1.0.1/go.mod h1:CP0Cr1Y1kogOtx0bJblKzsVWrqYaqfNOnHzpgWw4Awc=
|
||||||
|
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
|
||||||
|
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
|
||||||
|
github.com/lib/pq v1.10.4 h1:SO9z7FRPzA03QhHKJrH5BXA6HU1rS4V2nIVrrNC1iYk=
|
||||||
|
github.com/lib/pq v1.10.4/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||||
|
github.com/mattn/go-sqlite3 v1.14.11 h1:gt+cp9c0XGqe9S/wAHTL3n/7MqY+siPWgWJgqdsFrzQ=
|
||||||
|
github.com/mattn/go-sqlite3 v1.14.11/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
|
||||||
|
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
|
||||||
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
|
github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg=
|
||||||
|
github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
|
||||||
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
|
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||||
|
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||||
|
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
||||||
|
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||||
|
golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
||||||
|
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||||
|
golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd h1:O7DYs+zxREGLKzKoMQrtrEacpb0ZVXA5rIwylE2Xchk=
|
||||||
|
golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
|
||||||
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
|
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
|
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||||
|
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
|
||||||
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
|
google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
|
||||||
|
google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
|
||||||
17
init/starenovine.service
Normal file
17
init/starenovine.service
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
[Unit]
|
||||||
|
|
||||||
|
Description=Stare Novine Web
|
||||||
|
After=network-online.target
|
||||||
|
Wants=network-online.target systemd-networkd-wait-online.service
|
||||||
|
|
||||||
|
StartLimitIntervalSec=500
|
||||||
|
StartLimitBurst=5
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5s
|
||||||
|
WorkingDirectory=/opt/starenovine/
|
||||||
|
ExecStart=/opt/starenovine/server
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
188
internal/database/articles.go
Normal file
188
internal/database/articles.go
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
package database
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
_ "github.com/lib/pq"
|
||||||
|
"gitlab.com/kbr4/svevijesti/internal/model"
|
||||||
|
"html/template"
|
||||||
|
"math"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func InsertArticle(store *Store, article model.ScrapedArticle) (err error) {
|
||||||
|
query := `
|
||||||
|
INSERT INTO articles
|
||||||
|
(title, content, slug, original_url, source_id)
|
||||||
|
VALUES
|
||||||
|
($1,$2,$3,$4,$5);`
|
||||||
|
|
||||||
|
_, err = store.Exec(query, article.Title, article.Content, article.Slug, article.OriginalUrl, article.SourceId)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func IsSaved(store *Store, url string) bool {
|
||||||
|
|
||||||
|
exists := false
|
||||||
|
query, err := store.Prepare(`
|
||||||
|
select exists(select 1 from articles where original_url = $1);
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer query.Close()
|
||||||
|
|
||||||
|
row := query.QueryRow(url)
|
||||||
|
err = row.Scan(&exists)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return exists
|
||||||
|
}
|
||||||
|
|
||||||
|
func ArticlesForDay(store *Store, day time.Time) (articles []model.DisplayArticle, err error) {
|
||||||
|
|
||||||
|
result := []model.DisplayArticle{}
|
||||||
|
query, err := store.Prepare(`
|
||||||
|
select id,title, content, slug, original_url, source_id, created_at from articles where created_at > $1 and created_at < $2 and LENGTH(content) > 10 order by id desc;
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
defer query.Close()
|
||||||
|
|
||||||
|
tomorrow := day.AddDate(0, 0, 1)
|
||||||
|
todayDate := day.Format("2006-01-02")
|
||||||
|
tomorrowDate := tomorrow.Format("2006-01-02")
|
||||||
|
|
||||||
|
rows, err := query.Query(todayDate, tomorrowDate)
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
r := model.DisplayArticle{}
|
||||||
|
err = rows.Scan(&r.ID, &r.Title, &r.Content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt)
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ago := time.Now().Sub(r.CreatedAt)
|
||||||
|
hours := ago.Hours()
|
||||||
|
|
||||||
|
if hours < 1 {
|
||||||
|
r.FormatedCreatedAt = fmt.Sprintf("Prije %d minuta.", int(math.Floor(ago.Minutes())))
|
||||||
|
|
||||||
|
} else if hours > 24 {
|
||||||
|
r.FormatedCreatedAt = r.CreatedAt.Format("02.01.2006. 15:04:05")
|
||||||
|
} else {
|
||||||
|
r.FormatedCreatedAt = fmt.Sprintf("Prije %d sati.", int(math.Floor(hours)))
|
||||||
|
}
|
||||||
|
r.SourceName = model.SourceName(r.SourceId)
|
||||||
|
|
||||||
|
result = append(result, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ArticleByID(store *Store, ID int, slug string) (article model.DisplayArticle, err error) {
|
||||||
|
|
||||||
|
result := model.DisplayArticle{}
|
||||||
|
query, err := store.Prepare(`
|
||||||
|
select id,title, content, slug, original_url, source_id, created_at from articles where id = $1 and slug = $2;
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
defer query.Close()
|
||||||
|
|
||||||
|
row := query.QueryRow(ID, slug)
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
r := model.DisplayArticle{}
|
||||||
|
content := ""
|
||||||
|
err = row.Scan(&r.ID, &r.Title, &content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt)
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ago := time.Now().Sub(r.CreatedAt)
|
||||||
|
hours := ago.Hours()
|
||||||
|
|
||||||
|
r.Content = template.HTML(strings.Replace(content, "\n", "<br>\n", -1))
|
||||||
|
|
||||||
|
if hours < 1 {
|
||||||
|
r.FormatedCreatedAt = fmt.Sprintf("Prije %d minuta.", int(math.Floor(ago.Minutes())))
|
||||||
|
|
||||||
|
} else if hours > 24 {
|
||||||
|
r.FormatedCreatedAt = r.CreatedAt.Format("02.01.2006. 15:04:05")
|
||||||
|
} else {
|
||||||
|
r.FormatedCreatedAt = fmt.Sprintf("Prije %d sati.", int(math.Floor(hours)))
|
||||||
|
}
|
||||||
|
r.SourceName = model.SourceName(r.SourceId)
|
||||||
|
|
||||||
|
result = r
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func PreviousAndNextArticleUrlByID(store *Store, ID int) (nextUrl string, previousUrl string, err error) {
|
||||||
|
|
||||||
|
nextResult, previousResult := "#", "#"
|
||||||
|
query, err := store.Prepare(`
|
||||||
|
select id,title, content, slug, original_url, source_id, created_at from articles where id < $1 and id > $2 order by id desc limit 1;
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Err 1:", err)
|
||||||
|
return nextResult, previousResult, err
|
||||||
|
}
|
||||||
|
defer query.Close()
|
||||||
|
|
||||||
|
row := query.QueryRow(ID, 0)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Err 2:", err)
|
||||||
|
return nextResult, previousResult, err
|
||||||
|
}
|
||||||
|
|
||||||
|
r := model.DisplayArticle{}
|
||||||
|
content := ""
|
||||||
|
err = row.Scan(&r.ID, &r.Title, &content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt)
|
||||||
|
if err != nil {
|
||||||
|
return nextResult, previousResult, err
|
||||||
|
}
|
||||||
|
|
||||||
|
previousResult = fmt.Sprintf("/%d/%s", r.ID, r.Slug)
|
||||||
|
|
||||||
|
query2, err := store.Prepare(`
|
||||||
|
select id,title, content, slug, original_url, source_id, created_at from articles where id < $1 and id > $2 order by id asc limit 1;
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Err 1:", err)
|
||||||
|
return nextResult, previousResult, err
|
||||||
|
}
|
||||||
|
defer query2.Close()
|
||||||
|
|
||||||
|
row = query2.QueryRow(ID+1000, ID)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Err 3:", err)
|
||||||
|
return nextResult, previousResult, err
|
||||||
|
}
|
||||||
|
|
||||||
|
content = ""
|
||||||
|
err = row.Scan(&r.ID, &r.Title, &content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Err 4:", err)
|
||||||
|
return nextResult, previousResult, err
|
||||||
|
}
|
||||||
|
nextResult = fmt.Sprintf("/%d/%s", r.ID, r.Slug)
|
||||||
|
|
||||||
|
return nextResult, previousResult, nil
|
||||||
|
}
|
||||||
25
internal/database/database.go
Normal file
25
internal/database/database.go
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
package database
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
_ "github.com/lib/pq"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
host = "localhost"
|
||||||
|
port = 5432
|
||||||
|
user = "svevijesti"
|
||||||
|
password = "salmonela pljusti 221 hamo"
|
||||||
|
dbname = "svevijestiweb"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Store = sql.DB
|
||||||
|
|
||||||
|
func Connect() (*Store, error) {
|
||||||
|
psqlInfo := fmt.Sprintf("host=%s port=%d user=%s "+
|
||||||
|
"password='%s' dbname=%s sslmode=disable",
|
||||||
|
host, port, user, password, dbname)
|
||||||
|
db, err := sql.Open("postgres", psqlInfo)
|
||||||
|
return db, err
|
||||||
|
}
|
||||||
51
internal/model/model.go
Normal file
51
internal/model/model.go
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
package model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"html/template"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ScrapedArticle struct {
|
||||||
|
Title string
|
||||||
|
Content string
|
||||||
|
Slug string
|
||||||
|
OriginalUrl string
|
||||||
|
SourceId int
|
||||||
|
}
|
||||||
|
|
||||||
|
type DisplayArticle struct {
|
||||||
|
ID int
|
||||||
|
Title string
|
||||||
|
Content template.HTML
|
||||||
|
Slug string
|
||||||
|
OriginalUrl string
|
||||||
|
SourceId int
|
||||||
|
CreatedAt time.Time
|
||||||
|
FormatedCreatedAt string
|
||||||
|
SourceName string
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
KlixSource = 1
|
||||||
|
SrpskainfoSource = 2
|
||||||
|
BljesakSource = 3
|
||||||
|
AvazSource = 4
|
||||||
|
)
|
||||||
|
|
||||||
|
func SourceName(sourceId int) string {
|
||||||
|
switch sourceId {
|
||||||
|
case KlixSource:
|
||||||
|
return "klix"
|
||||||
|
case SrpskainfoSource:
|
||||||
|
return "srpskainfo"
|
||||||
|
case BljesakSource:
|
||||||
|
return "bljesak"
|
||||||
|
case AvazSource:
|
||||||
|
return "avaz"
|
||||||
|
}
|
||||||
|
return "starenovine"
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
Terminator = "TERMINATED"
|
||||||
|
)
|
||||||
104
internal/scraper/avaz.go
Normal file
104
internal/scraper/avaz.go
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
package scraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/gocolly/colly"
|
||||||
|
"github.com/gosimple/slug"
|
||||||
|
"gitlab.com/kbr4/svevijesti/internal/model"
|
||||||
|
"math/rand"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
var AvazArticles = make(chan model.ScrapedArticle)
|
||||||
|
var AvazCandidates = make(chan string)
|
||||||
|
var AvazApprovedSites = make(chan string, 2)
|
||||||
|
|
||||||
|
func CrawlAvaz() {
|
||||||
|
|
||||||
|
crHomePage := colly.NewCollector(colly.AllowedDomains("avaz.ba"))
|
||||||
|
crArticlePage := colly.NewCollector(colly.AllowedDomains("avaz.ba"))
|
||||||
|
|
||||||
|
setupAvazArticlePageCrawler(crArticlePage)
|
||||||
|
setupAvazHomepageCrawler(crHomePage, crArticlePage)
|
||||||
|
|
||||||
|
go visitAvazApprovedPages(crArticlePage)
|
||||||
|
}
|
||||||
|
|
||||||
|
func visitAvazApprovedPages(crArticlePage *colly.Collector) {
|
||||||
|
fmt.Println("Consuming sites!")
|
||||||
|
for url := range AvazApprovedSites {
|
||||||
|
fmt.Println("Visiting: ", url)
|
||||||
|
crArticlePage.Visit(url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupAvazHomepageCrawler(crHomePage *colly.Collector, crArticlePage *colly.Collector) {
|
||||||
|
|
||||||
|
articleUrlR, _ := regexp.Compile("/\\d\\d+/([a-z0-9-]+)")
|
||||||
|
articleBlacklist, _ := regexp.Compile("(english)")
|
||||||
|
crHomePage.OnHTML("a", func(e *colly.HTMLElement) {
|
||||||
|
url := e.Attr("href")
|
||||||
|
completeUrl := url
|
||||||
|
if articleUrlR.MatchString(url) && !articleBlacklist.MatchString(url) {
|
||||||
|
AvazCandidates <- completeUrl
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
crHomePage.OnScraped(func(_ *colly.Response) {
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
terminating := model.ScrapedArticle{}
|
||||||
|
terminating.Title = model.Terminator
|
||||||
|
AvazArticles <- terminating
|
||||||
|
AvazApprovedSites <- model.Terminator
|
||||||
|
AvazCandidates <- model.Terminator
|
||||||
|
})
|
||||||
|
|
||||||
|
crHomePage.OnError(func(_ *colly.Response, _ error) {
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
terminating := model.ScrapedArticle{}
|
||||||
|
terminating.Title = model.Terminator
|
||||||
|
AvazArticles <- terminating
|
||||||
|
AvazApprovedSites <- model.Terminator
|
||||||
|
AvazCandidates <- model.Terminator
|
||||||
|
})
|
||||||
|
|
||||||
|
go crHomePage.Visit("https://avaz.ba")
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupAvazArticlePageCrawler(crArticlePage *colly.Collector) {
|
||||||
|
crArticlePage.OnHTML("html", func(e *colly.HTMLElement) {
|
||||||
|
|
||||||
|
url := e.Request.URL.String()
|
||||||
|
|
||||||
|
title := ""
|
||||||
|
e.ForEachWithBreak("h1.title, h3.title", func(_ int, el *colly.HTMLElement) bool {
|
||||||
|
title = el.Text
|
||||||
|
return false
|
||||||
|
})
|
||||||
|
|
||||||
|
text := ""
|
||||||
|
|
||||||
|
e.ForEach("p.podtitle, div.artikal-text", func(_ int, el *colly.HTMLElement) {
|
||||||
|
text += extractJustText(el.DOM)
|
||||||
|
})
|
||||||
|
|
||||||
|
article := model.ScrapedArticle{}
|
||||||
|
|
||||||
|
trimmedText := strings.TrimSpace(text)
|
||||||
|
article.OriginalUrl = url
|
||||||
|
article.Title = title
|
||||||
|
article.Content = trimmedText
|
||||||
|
article.SourceId = model.AvazSource
|
||||||
|
slugBase := fmt.Sprintf("%d %d %s", article.SourceId, rand.Intn(1000), title)
|
||||||
|
article.Slug = slug.Make(slugBase)
|
||||||
|
|
||||||
|
AvazArticles <- article
|
||||||
|
})
|
||||||
|
|
||||||
|
crArticlePage.OnError(func(_ *colly.Response, err error) {
|
||||||
|
fmt.Println("Problem crawling!", err)
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
104
internal/scraper/blijesak.go
Normal file
104
internal/scraper/blijesak.go
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
package scraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/gocolly/colly"
|
||||||
|
"github.com/gosimple/slug"
|
||||||
|
"gitlab.com/kbr4/svevijesti/internal/model"
|
||||||
|
"math/rand"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
var BljesakArticles = make(chan model.ScrapedArticle)
|
||||||
|
var BljesakCandidates = make(chan string)
|
||||||
|
var BljesakApprovedSites = make(chan string, 2)
|
||||||
|
|
||||||
|
func CrawlBljesak() {
|
||||||
|
|
||||||
|
crHomePage := colly.NewCollector(colly.AllowedDomains("bljesak.info"))
|
||||||
|
crArticlePage := colly.NewCollector(colly.AllowedDomains("bljesak.info"))
|
||||||
|
|
||||||
|
setupBljesakArticlePageCrawler(crArticlePage)
|
||||||
|
setupBljesakHomepageCrawler(crHomePage, crArticlePage)
|
||||||
|
|
||||||
|
go visitBljesakApprovedPages(crArticlePage)
|
||||||
|
}
|
||||||
|
|
||||||
|
func visitBljesakApprovedPages(crArticlePage *colly.Collector) {
|
||||||
|
fmt.Println("Consuming sites!")
|
||||||
|
for url := range BljesakApprovedSites {
|
||||||
|
fmt.Println("Visiting: ", url)
|
||||||
|
crArticlePage.Visit(url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupBljesakHomepageCrawler(crHomePage *colly.Collector, crArticlePage *colly.Collector) {
|
||||||
|
|
||||||
|
articleUrlR, _ := regexp.Compile("\\d\\d+$")
|
||||||
|
articleBlacklist, _ := regexp.Compile("(info-vodic|foto-data)")
|
||||||
|
crHomePage.OnHTML("a", func(e *colly.HTMLElement) {
|
||||||
|
url := e.Attr("href")
|
||||||
|
completeUrl := url
|
||||||
|
if articleUrlR.MatchString(url) && !articleBlacklist.MatchString(url) {
|
||||||
|
BljesakCandidates <- completeUrl
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
crHomePage.OnScraped(func(_ *colly.Response) {
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
terminating := model.ScrapedArticle{}
|
||||||
|
terminating.Title = model.Terminator
|
||||||
|
BljesakArticles <- terminating
|
||||||
|
BljesakApprovedSites <- model.Terminator
|
||||||
|
BljesakCandidates <- model.Terminator
|
||||||
|
})
|
||||||
|
|
||||||
|
crHomePage.OnError(func(_ *colly.Response, _ error) {
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
terminating := model.ScrapedArticle{}
|
||||||
|
terminating.Title = model.Terminator
|
||||||
|
BljesakArticles <- terminating
|
||||||
|
BljesakApprovedSites <- model.Terminator
|
||||||
|
BljesakCandidates <- model.Terminator
|
||||||
|
})
|
||||||
|
|
||||||
|
go crHomePage.Visit("https://bljesak.info")
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupBljesakArticlePageCrawler(crArticlePage *colly.Collector) {
|
||||||
|
crArticlePage.OnHTML("html", func(e *colly.HTMLElement) {
|
||||||
|
|
||||||
|
url := e.Request.URL.String()
|
||||||
|
|
||||||
|
title := ""
|
||||||
|
e.ForEachWithBreak("h1.title, h3.title", func(_ int, el *colly.HTMLElement) bool {
|
||||||
|
title = el.Text
|
||||||
|
return false
|
||||||
|
})
|
||||||
|
|
||||||
|
text := ""
|
||||||
|
|
||||||
|
e.ForEach("div.intro, div#infiniteLoadBreakpoint", func(_ int, el *colly.HTMLElement) {
|
||||||
|
text += extractJustText(el.DOM)
|
||||||
|
})
|
||||||
|
|
||||||
|
article := model.ScrapedArticle{}
|
||||||
|
|
||||||
|
trimmedText := strings.TrimSpace(text)
|
||||||
|
article.OriginalUrl = url
|
||||||
|
article.Title = title
|
||||||
|
article.Content = trimmedText
|
||||||
|
article.SourceId = model.BljesakSource
|
||||||
|
slugBase := fmt.Sprintf("%d %d %s", article.SourceId, rand.Intn(1000), title)
|
||||||
|
article.Slug = slug.Make(slugBase)
|
||||||
|
|
||||||
|
BljesakArticles <- article
|
||||||
|
})
|
||||||
|
|
||||||
|
crArticlePage.OnError(func(_ *colly.Response, err error) {
|
||||||
|
fmt.Println("Problem crawling!", err)
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
103
internal/scraper/klix.go
Normal file
103
internal/scraper/klix.go
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
package scraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/gocolly/colly"
|
||||||
|
"github.com/gosimple/slug"
|
||||||
|
"gitlab.com/kbr4/svevijesti/internal/model"
|
||||||
|
"math/rand"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
var KlixArticles = make(chan model.ScrapedArticle)
|
||||||
|
var KlixCandidates = make(chan string)
|
||||||
|
var KlixApprovedSites = make(chan string, 2)
|
||||||
|
|
||||||
|
func CrawlKlix() {
|
||||||
|
|
||||||
|
crHomePage := colly.NewCollector(colly.AllowedDomains("www.klix.ba"))
|
||||||
|
crArticlePage := colly.NewCollector(colly.AllowedDomains("www.klix.ba"))
|
||||||
|
|
||||||
|
setupKlArticlePageCrawler(crArticlePage)
|
||||||
|
setupKlHomepageCrawler(crHomePage, crArticlePage)
|
||||||
|
|
||||||
|
go visitApprovedPages(crArticlePage)
|
||||||
|
}
|
||||||
|
|
||||||
|
func visitApprovedPages(crArticlePage *colly.Collector) {
|
||||||
|
fmt.Println("Consuming sites!")
|
||||||
|
for url := range KlixApprovedSites {
|
||||||
|
fmt.Println("Visiting: ", url)
|
||||||
|
crArticlePage.Visit(url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupKlHomepageCrawler(crHomePage *colly.Collector, crArticlePage *colly.Collector) {
|
||||||
|
|
||||||
|
articleUrlR, _ := regexp.Compile("\\d\\d+$")
|
||||||
|
crHomePage.OnHTML(".container a", func(e *colly.HTMLElement) {
|
||||||
|
url := e.Attr("href")
|
||||||
|
completeUrl := "https://www.klix.ba" + url
|
||||||
|
if articleUrlR.MatchString(url) {
|
||||||
|
KlixCandidates <- completeUrl
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
crHomePage.OnScraped(func(_ *colly.Response) {
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
terminating := model.ScrapedArticle{}
|
||||||
|
terminating.Title = model.Terminator
|
||||||
|
KlixArticles <- terminating
|
||||||
|
KlixApprovedSites <- model.Terminator
|
||||||
|
KlixCandidates <- model.Terminator
|
||||||
|
})
|
||||||
|
|
||||||
|
crHomePage.OnError(func(_ *colly.Response, _ error) {
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
terminating := model.ScrapedArticle{}
|
||||||
|
terminating.Title = model.Terminator
|
||||||
|
KlixArticles <- terminating
|
||||||
|
KlixApprovedSites <- model.Terminator
|
||||||
|
KlixCandidates <- model.Terminator
|
||||||
|
})
|
||||||
|
|
||||||
|
go crHomePage.Visit("https://www.klix.ba")
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupKlArticlePageCrawler(crArticlePage *colly.Collector) {
|
||||||
|
crArticlePage.OnHTML("html", func(e *colly.HTMLElement) {
|
||||||
|
|
||||||
|
url := e.Request.URL.String()
|
||||||
|
|
||||||
|
title := ""
|
||||||
|
e.ForEachWithBreak("title", func(_ int, el *colly.HTMLElement) bool {
|
||||||
|
title = el.Text
|
||||||
|
return false
|
||||||
|
})
|
||||||
|
|
||||||
|
text := ""
|
||||||
|
|
||||||
|
e.ForEach("div#text, p.lead", func(_ int, el *colly.HTMLElement) {
|
||||||
|
text += extractJustText(el.DOM)
|
||||||
|
})
|
||||||
|
|
||||||
|
article := model.ScrapedArticle{}
|
||||||
|
|
||||||
|
trimmedText := strings.TrimSpace(text)
|
||||||
|
article.OriginalUrl = url
|
||||||
|
article.Title = title
|
||||||
|
article.Content = trimmedText
|
||||||
|
article.SourceId = model.KlixSource
|
||||||
|
slugBase := fmt.Sprintf("%d %d %s", article.SourceId, rand.Intn(1000), title)
|
||||||
|
article.Slug = slug.Make(slugBase)
|
||||||
|
|
||||||
|
KlixArticles <- article
|
||||||
|
})
|
||||||
|
|
||||||
|
crArticlePage.OnError(func(_ *colly.Response, _ error) {
|
||||||
|
fmt.Println("Problem crawling!")
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
26
internal/scraper/scraper.go
Normal file
26
internal/scraper/scraper.go
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
package scraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
)
|
||||||
|
|
||||||
|
func extractJustText(el *goquery.Selection) string {
|
||||||
|
textPart := ""
|
||||||
|
htmlPart, _ := el.Html()
|
||||||
|
if len(el.Nodes) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
//fmt.Println("Checking: ", htmlPart, "Duzina: ", strconv.Itoa(len(el.Nodes)), " Type je ", el.Nodes[0].Type, " jednakost ", el.Text() == htmlPart)
|
||||||
|
if el.Text() == htmlPart {
|
||||||
|
return el.Text() + "\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
el.Children().Each(func(_ int, el2 *goquery.Selection) {
|
||||||
|
if el2.Is("div, p, span, a") {
|
||||||
|
textPart += extractJustText(el2)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return textPart
|
||||||
|
}
|
||||||
103
internal/scraper/srpskainfo.go
Normal file
103
internal/scraper/srpskainfo.go
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
package scraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/gocolly/colly"
|
||||||
|
"github.com/gosimple/slug"
|
||||||
|
"gitlab.com/kbr4/svevijesti/internal/model"
|
||||||
|
"math/rand"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
var SrpskainfoArticles = make(chan model.ScrapedArticle)
|
||||||
|
var SrpskainfoCandidates = make(chan string)
|
||||||
|
var SrpskainfoApprovedSites = make(chan string, 2)
|
||||||
|
|
||||||
|
func CrawlSrpskainfo() {
|
||||||
|
|
||||||
|
crHomePage := colly.NewCollector(colly.AllowedDomains("srpskainfo.com"))
|
||||||
|
crArticlePage := colly.NewCollector(colly.AllowedDomains("srpskainfo.com"))
|
||||||
|
|
||||||
|
setupSiArticlePageCrawler(crArticlePage)
|
||||||
|
setupSiHomepageCrawler(crHomePage, crArticlePage)
|
||||||
|
|
||||||
|
go visitSiApprovedPages(crArticlePage)
|
||||||
|
}
|
||||||
|
|
||||||
|
func visitSiApprovedPages(crArticlePage *colly.Collector) {
|
||||||
|
fmt.Println("Consuming sites!")
|
||||||
|
for url := range SrpskainfoApprovedSites {
|
||||||
|
fmt.Println("Visiting: ", url)
|
||||||
|
crArticlePage.Visit(url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupSiHomepageCrawler(crHomePage *colly.Collector, crArticlePage *colly.Collector) {
|
||||||
|
|
||||||
|
crHomePage.OnHTML("a", func(e *colly.HTMLElement) {
|
||||||
|
articleUrlR, _ := regexp.Compile("([A-Za-z0-9]+-){3,}([A-Za-z0-9]+)/$")
|
||||||
|
url := e.Attr("href")
|
||||||
|
completeUrl := url
|
||||||
|
if articleUrlR.MatchString(url) {
|
||||||
|
SrpskainfoCandidates <- completeUrl
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
crHomePage.OnScraped(func(_ *colly.Response) {
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
terminating := model.ScrapedArticle{}
|
||||||
|
terminating.Title = model.Terminator
|
||||||
|
SrpskainfoArticles <- terminating
|
||||||
|
SrpskainfoApprovedSites <- model.Terminator
|
||||||
|
SrpskainfoCandidates <- model.Terminator
|
||||||
|
})
|
||||||
|
|
||||||
|
crHomePage.OnError(func(_ *colly.Response, _ error) {
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
terminating := model.ScrapedArticle{}
|
||||||
|
terminating.Title = model.Terminator
|
||||||
|
SrpskainfoArticles <- terminating
|
||||||
|
SrpskainfoApprovedSites <- model.Terminator
|
||||||
|
SrpskainfoCandidates <- model.Terminator
|
||||||
|
})
|
||||||
|
|
||||||
|
go crHomePage.Visit("https://srpskainfo.com")
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupSiArticlePageCrawler(crArticlePage *colly.Collector) {
|
||||||
|
crArticlePage.OnHTML("html", func(e *colly.HTMLElement) {
|
||||||
|
|
||||||
|
url := e.Request.URL.String()
|
||||||
|
|
||||||
|
title := ""
|
||||||
|
e.ForEachWithBreak("h1", func(_ int, el *colly.HTMLElement) bool {
|
||||||
|
title = el.Text
|
||||||
|
return false
|
||||||
|
})
|
||||||
|
|
||||||
|
text := ""
|
||||||
|
|
||||||
|
e.ForEach("div.article__top-content, div.article__content", func(_ int, el *colly.HTMLElement) {
|
||||||
|
text += extractJustText(el.DOM)
|
||||||
|
})
|
||||||
|
|
||||||
|
article := model.ScrapedArticle{}
|
||||||
|
|
||||||
|
trimmedText := strings.TrimSpace(text)
|
||||||
|
article.OriginalUrl = url
|
||||||
|
article.Title = title
|
||||||
|
article.Content = trimmedText
|
||||||
|
article.SourceId = model.SrpskainfoSource
|
||||||
|
slugBase := fmt.Sprintf("%d %d %s", article.SourceId, rand.Intn(1000), title)
|
||||||
|
article.Slug = slug.Make(slugBase)
|
||||||
|
|
||||||
|
SrpskainfoArticles <- article
|
||||||
|
})
|
||||||
|
|
||||||
|
crArticlePage.OnError(func(_ *colly.Response, _ error) {
|
||||||
|
fmt.Println("Problem crawling!")
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
110
internal/server/articles.go
Normal file
110
internal/server/articles.go
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
"gitlab.com/kbr4/svevijesti/internal/database"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func rootHandler(wr http.ResponseWriter, req *http.Request) {
|
||||||
|
title := "Pocetna"
|
||||||
|
store, err := database.Connect()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
defer store.Close()
|
||||||
|
|
||||||
|
articles, err := database.ArticlesForDay(store, time.Now())
|
||||||
|
if err != nil {
|
||||||
|
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
|
||||||
|
dayBefore := "/dan/" + time.Now().Add(-24*time.Hour).Format("2006-01-02")
|
||||||
|
|
||||||
|
data := map[string]interface{}{
|
||||||
|
"title": title,
|
||||||
|
"articles": articles,
|
||||||
|
"previous": dayBefore,
|
||||||
|
"next": "/",
|
||||||
|
}
|
||||||
|
|
||||||
|
err = templates.ExecuteTemplate(wr, "homeHTML", data)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func dailyArticlesHandler(wr http.ResponseWriter, req *http.Request) {
|
||||||
|
vars := mux.Vars(req)
|
||||||
|
day, err := time.Parse("2006-01-02", vars["date"])
|
||||||
|
if err != nil {
|
||||||
|
http.Error(wr, err.Error(), http.StatusNotFound)
|
||||||
|
}
|
||||||
|
dayBefore := "/dan/" + day.Add(-24*time.Hour).Format("2006-01-02")
|
||||||
|
dayAfter := "/dan/" + day.Add(24*time.Hour).Format("2006-01-02")
|
||||||
|
|
||||||
|
if day.Add(24*time.Hour).Format("2006-01-02") == time.Now().Format("2006-01-02") {
|
||||||
|
dayAfter = "/"
|
||||||
|
}
|
||||||
|
|
||||||
|
title := fmt.Sprintf("Stare novine na dan %s", day.Format("2006-01-02"))
|
||||||
|
store, err := database.Connect()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
defer store.Close()
|
||||||
|
|
||||||
|
articles, err := database.ArticlesForDay(store, day)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
|
||||||
|
data := map[string]interface{}{
|
||||||
|
"title": title,
|
||||||
|
"articles": articles,
|
||||||
|
"previous": dayBefore,
|
||||||
|
"next": dayAfter,
|
||||||
|
}
|
||||||
|
|
||||||
|
err = templates.ExecuteTemplate(wr, "homeHTML", data)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func articleHandler(wr http.ResponseWriter, req *http.Request) {
|
||||||
|
store, err := database.Connect()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
defer store.Close()
|
||||||
|
|
||||||
|
vars := mux.Vars(req)
|
||||||
|
articleID, err := strconv.Atoi(vars["id"])
|
||||||
|
if err != nil {
|
||||||
|
articleID = -1
|
||||||
|
}
|
||||||
|
articleSlug := vars["slug"]
|
||||||
|
article, err := database.ArticleByID(store, articleID, articleSlug)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(wr, err.Error(), http.StatusNotFound)
|
||||||
|
}
|
||||||
|
|
||||||
|
next, previous, _ := database.PreviousAndNextArticleUrlByID(store, articleID)
|
||||||
|
|
||||||
|
title := article.Title
|
||||||
|
data := map[string]interface{}{
|
||||||
|
"title": title,
|
||||||
|
"article": article,
|
||||||
|
"previous": previous,
|
||||||
|
"next": next,
|
||||||
|
}
|
||||||
|
|
||||||
|
err = templates.ExecuteTemplate(wr, "articleHTML", data)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
}
|
||||||
46
internal/server/server.go
Normal file
46
internal/server/server.go
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
"html/template"
|
||||||
|
"io/ioutil"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
var tPath = "./web/tpl/"
|
||||||
|
var dPath = "./web/data/"
|
||||||
|
|
||||||
|
var templateDirs = []string{"./web/tpl", "./web/data"}
|
||||||
|
var templates *template.Template
|
||||||
|
|
||||||
|
func getTemplates() (templates *template.Template, err error) {
|
||||||
|
var allFiles []string
|
||||||
|
for _, dir := range templateDirs {
|
||||||
|
files2, _ := ioutil.ReadDir(dir)
|
||||||
|
for _, file := range files2 {
|
||||||
|
filename := file.Name()
|
||||||
|
if strings.HasSuffix(filename, ".html") {
|
||||||
|
filePath := filepath.Join(dir, filename)
|
||||||
|
fmt.Println("Template found: ", filePath)
|
||||||
|
allFiles = append(allFiles, filePath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
templates, err = template.New("").ParseFiles(allFiles...)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
templates, _ = getTemplates()
|
||||||
|
}
|
||||||
|
|
||||||
|
func CreateRoutes() *mux.Router {
|
||||||
|
r := mux.NewRouter()
|
||||||
|
r.HandleFunc("/dan/{date}", dailyArticlesHandler)
|
||||||
|
r.HandleFunc("/{id:[0-9]+}/{slug}", articleHandler)
|
||||||
|
r.HandleFunc("/", rootHandler)
|
||||||
|
return r
|
||||||
|
}
|
||||||
27
pyth/avaz.py
Normal file
27
pyth/avaz.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def getNews(url):
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
articles = soup.find_all('article', class_='news__item')
|
||||||
|
|
||||||
|
for index, article in enumerate(articles, start=1):
|
||||||
|
title = article.find('h2').text.strip()
|
||||||
|
content = article.find('p').text.strip()
|
||||||
|
category = article.find('span').text.strip()
|
||||||
|
|
||||||
|
print(f"{index}. Title: {title}")
|
||||||
|
print(f" Content: {content}")
|
||||||
|
print(f" Category: {category}")
|
||||||
|
print('****************************')
|
||||||
|
else:
|
||||||
|
print(f"Error. Status code: {response.status_code}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pUrl = 'https://srpskainfo.com/sve-vijesti/'
|
||||||
|
|
||||||
|
getNews(pUrl)
|
||||||
74
pyth/checkforsimilar.py
Normal file
74
pyth/checkforsimilar.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import psycopg2
|
||||||
|
from openai import OpenAI
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
|
|
||||||
|
client = OpenAI(api_key='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')
|
||||||
|
import spacy
|
||||||
|
|
||||||
|
|
||||||
|
nlp = spacy.load("en_core_web_sm")
|
||||||
|
twenty_minutes_ago_utc = datetime.now(timezone.utc) - timedelta(minutes=20)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
db_params = {
|
||||||
|
'host': 'localhost',
|
||||||
|
'port': '5432',
|
||||||
|
'database': 'svevijestiweb',
|
||||||
|
'user': 'svevijesti',
|
||||||
|
'password': 'salmonela pljusti 221 hamo'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
conn = psycopg2.connect(**db_params)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
def convert_text_to_vector(text):
|
||||||
|
return nlp(text).vector
|
||||||
|
|
||||||
|
def check_similarity_with_gpt3(text1, text2):
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "You are a text similarity assistant."},
|
||||||
|
{"role": "user", "content": f"Compare the similarity between the following two texts:\n\nText 1: {text1}\nText 2: {text2}\n\nSimilarity:"}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
similarity_score = completion.choices[0].message.content
|
||||||
|
print("Analiza")
|
||||||
|
return similarity_score
|
||||||
|
|
||||||
|
cursor.execute("SELECT title FROM articles WHERE articles.created_at < %s", (twenty_minutes_ago_utc,))
|
||||||
|
data_from_database = cursor.fetchall()
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(len(data_from_database)):
|
||||||
|
for j in range(i + 1, len(data_from_database)):
|
||||||
|
text1 = data_from_database[i][0]
|
||||||
|
text2 = data_from_database[j][0]
|
||||||
|
|
||||||
|
vector1 = convert_text_to_vector(text1)
|
||||||
|
vector2 = convert_text_to_vector(text2)
|
||||||
|
|
||||||
|
similarity_score = check_similarity_with_gpt3(vector1, vector2 )
|
||||||
|
print(similarity_score)
|
||||||
|
print("T1",text1)
|
||||||
|
print("T2", text2)
|
||||||
|
|
||||||
|
|
||||||
|
similarity_threshold = 0.8
|
||||||
|
|
||||||
|
if similarity_score > similarity_threshold:
|
||||||
|
try:
|
||||||
|
cursor.execute("DELETE FROM articles WHERE content = %s", (text2,))
|
||||||
|
conn.commit()
|
||||||
|
print(f"Deleted rows where title is {text2}")
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback() # Roll back changes if an error occurs
|
||||||
|
print(f"Error deleting rows: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
34
pyth/srpskainfo.py
Normal file
34
pyth/srpskainfo.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def getNews(url):
|
||||||
|
response = requests.get(url)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
articles = soup.find_all('article', class_='news__item')
|
||||||
|
|
||||||
|
for index, article in enumerate(articles, start=1):
|
||||||
|
title = article.find('h2').text.strip()
|
||||||
|
content = article.find('p').text.strip()
|
||||||
|
category = article.find('span').text.strip()
|
||||||
|
slink = article.find('a')
|
||||||
|
if slink:
|
||||||
|
slink = slink.get('href', '')
|
||||||
|
else:
|
||||||
|
slink = ''
|
||||||
|
|
||||||
|
|
||||||
|
print(f"{index}. Title: {title}")
|
||||||
|
print(f" Content: {content}")
|
||||||
|
print(f" Category: {category}")
|
||||||
|
print(f"Link: {slink}")
|
||||||
|
print('****************************')
|
||||||
|
else:
|
||||||
|
print(f"Error. Status code: {response.status_code}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pUrl = 'https://srpskainfo.com/sve-vijesti/'
|
||||||
|
|
||||||
|
getNews(pUrl)
|
||||||
8
scripts/install_server.sh
Normal file
8
scripts/install_server.sh
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
sudo systemctl stop starenovine
|
||||||
|
sudo cp ./server /opt/starenovine/server
|
||||||
|
sudo cp -R ./web /opt/starenovine/
|
||||||
|
sudo killall spider
|
||||||
|
sudo cp ./spider /opt/starenovine/spider
|
||||||
|
sudo systemctl start starenovine
|
||||||
16
web/data/articles.html
Normal file
16
web/data/articles.html
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
{{define "articlesHTML"}}
|
||||||
|
<ol reversed>
|
||||||
|
{{range .articles}}
|
||||||
|
<li>
|
||||||
|
<div class="article_content">
|
||||||
|
<a href="/{{.ID}}/{{.Slug}}">
|
||||||
|
{{.Title}}</a></div>
|
||||||
|
<div class="timestamp">{{.SourceName}} - {{ .FormatedCreatedAt }}</div>
|
||||||
|
</li>
|
||||||
|
<br><br>
|
||||||
|
{{else}}
|
||||||
|
Nema članaka za izabrani datum.
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
</ol>
|
||||||
|
{{end}}
|
||||||
13
web/data/footer.html
Normal file
13
web/data/footer.html
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{{define "footerHTML"}}
|
||||||
|
|
||||||
|
<footer>
|
||||||
|
SN
|
||||||
|
<div>
|
||||||
|
<nav>
|
||||||
|
<a href="{{.previous}}"><----</a> |
|
||||||
|
<a href="/">Početna</a> |
|
||||||
|
<a href="{{.next}}">----></a>
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
{{end}}
|
||||||
56
web/data/head.html
Normal file
56
web/data/head.html
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
{{define "headHTML"}}
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta http-equiv="X-UA-Compatible" content="IE=Edge">
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||||||
|
<meta property="og:site_name" content="starenovine">
|
||||||
|
<meta name="twitter:card" content="preview">
|
||||||
|
<meta property="og:title" content="{{.title}}">
|
||||||
|
<meta name="description" content="stare novine omogucavaju citanje svih vijesti iz bosne i hercegovine, hrvatske, srbije, crne gore, kosova na bosanskom, crnogorskom, hrvatskom, srpskom jeziku na svim uredjajima koliko god stari bili">
|
||||||
|
<meta property="og:url" content="https://www.starenovine.com">
|
||||||
|
<title>{{.title}} - stare novine</title>
|
||||||
|
<link rel="canonical" href="https://www.starenovine.com/">
|
||||||
|
<style type="text/css">
|
||||||
|
body {
|
||||||
|
font-family: monospace;
|
||||||
|
font-size: 1.5em;
|
||||||
|
width: 90%;
|
||||||
|
max-width: 98vw;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1#title {
|
||||||
|
margin-block-end: 0;
|
||||||
|
font-size: 1.7em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.timestamp {
|
||||||
|
font-size: 0.8em;
|
||||||
|
color: gray;
|
||||||
|
}
|
||||||
|
|
||||||
|
.single_timestamp {
|
||||||
|
font-size: 0.77em;
|
||||||
|
margin-bottom: 0.7em;
|
||||||
|
color: gray;
|
||||||
|
}
|
||||||
|
|
||||||
|
#logo {
|
||||||
|
font-size: 2vw;
|
||||||
|
background: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre.article_content {
|
||||||
|
background: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
html {
|
||||||
|
margin: 0 auto;
|
||||||
|
max-width: 98vw;
|
||||||
|
overflow-x: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
{{end}}
|
||||||
20
web/data/header.html
Normal file
20
web/data/header.html
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
{{define "headerHTML"}}
|
||||||
|
<header>
|
||||||
|
<pre id="logo">
|
||||||
|
_____ ______ ____ ____ ___ ____ ___ __ __ ____ ____ ___
|
||||||
|
/ ___/| | / || \ / _]| \ / \ | | || || \ / _]
|
||||||
|
( \_ | || o || D ) / [_ | _ || || | | | | | _ | / [_
|
||||||
|
\__ ||_| |_|| || / | _]| | || O || | | | | | | || _]
|
||||||
|
/ \ | | | | _ || \ | [_ | | || || : | | | | | || [_
|
||||||
|
\ | | | | | || . \| || | || | \ / | | | | || |
|
||||||
|
\___| |__| |__|__||__|\_||_____||__|__| \___/ \_/ |____||__|__||_____|
|
||||||
|
|
||||||
|
</pre>
|
||||||
|
<br>
|
||||||
|
<nav>
|
||||||
|
<a href="{{.previous}}"><----</a> |
|
||||||
|
<a href="/">Početna</a> |
|
||||||
|
<a href="{{.next}}">----></a>
|
||||||
|
</nav>
|
||||||
|
</header>
|
||||||
|
{{end}}
|
||||||
11
web/data/single_article.html
Normal file
11
web/data/single_article.html
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
{{define "singleArticleHTML"}}
|
||||||
|
{{with .article }}
|
||||||
|
<div class="article_content">
|
||||||
|
<h1 id="title">{{.Title}}</h1>
|
||||||
|
<div class="single_timestamp">{{.SourceName}} - {{ .FormatedCreatedAt }}</div>
|
||||||
|
<div class="article_body">
|
||||||
|
{{.Content}}
|
||||||
|
</div>
|
||||||
|
<br><br>
|
||||||
|
{{end}}
|
||||||
|
{{end}}
|
||||||
12
web/tpl/article.html
Normal file
12
web/tpl/article.html
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
{{define "articleHTML"}}
|
||||||
|
|
||||||
|
{{template "headHTML" .}}
|
||||||
|
<body>
|
||||||
|
{{template "headerHTML" .}}
|
||||||
|
|
||||||
|
{{template "singleArticleHTML" .}}
|
||||||
|
|
||||||
|
{{template "footerHTML" .}}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
{{end}}
|
||||||
12
web/tpl/dailyArticles.html
Normal file
12
web/tpl/dailyArticles.html
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
{{define "homeHTML"}}
|
||||||
|
|
||||||
|
{{template "headHTML" .}}
|
||||||
|
<body>
|
||||||
|
{{template "headerHTML" .}}
|
||||||
|
|
||||||
|
{{template "articlesHTML" .}}
|
||||||
|
|
||||||
|
{{template "footerHTML" .}}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
{{end}}
|
||||||
12
web/tpl/home.html
Normal file
12
web/tpl/home.html
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
{{define "homeHTML"}}
|
||||||
|
|
||||||
|
{{template "headHTML" .}}
|
||||||
|
<body>
|
||||||
|
{{template "headerHTML" .}}
|
||||||
|
|
||||||
|
{{template "articlesHTML" .}}
|
||||||
|
|
||||||
|
{{template "footerHTML" .}}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
{{end}}
|
||||||
Reference in New Issue
Block a user