Merge branch 'svevijesti-upokusaju' into 'master'
Svevijesti upokusaju See merge request kbr4/svevijesti!6
This commit was merged in pull request #6.
This commit is contained in:
@@ -2,6 +2,8 @@ OPENAI_API_KEY = "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"
|
||||
|
||||
DB_HOST =localhost
|
||||
DB_PORT =5432
|
||||
DB_USER =postgres
|
||||
DB_USER =svevijesti
|
||||
DB_PASSWORD =salmonela pljusti 221 hamo
|
||||
DB_NAME =svevijestiweb
|
||||
|
||||
API_KEY=abb35e21bdcbad6d1b00141a2b25cf5a
|
||||
BIN
cmd/web/svevijesti
Normal file
BIN
cmd/web/svevijesti
Normal file
Binary file not shown.
1
go.mod
1
go.mod
@@ -15,6 +15,7 @@ require (
|
||||
github.com/gorilla/mux v1.8.0 // indirect
|
||||
github.com/gosimple/slug v1.12.0 // indirect
|
||||
github.com/gosimple/unidecode v1.0.1 // indirect
|
||||
github.com/joho/godotenv v1.5.1 // indirect
|
||||
github.com/kennygrant/sanitize v1.2.4 // indirect
|
||||
github.com/lib/pq v1.10.4 // indirect
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
|
||||
|
||||
2
go.sum
2
go.sum
@@ -25,6 +25,8 @@ github.com/gosimple/slug v1.12.0 h1:xzuhj7G7cGtd34NXnW/yF0l+AGNfWqwgh/IXgFy7dnc=
|
||||
github.com/gosimple/slug v1.12.0/go.mod h1:UiRaFH+GEilHstLUmcBgWcI42viBN7mAb818JrYOeFQ=
|
||||
github.com/gosimple/unidecode v1.0.1 h1:hZzFTMMqSswvf0LBJZCZgThIZrpDHFXux9KeGmn6T/o=
|
||||
github.com/gosimple/unidecode v1.0.1/go.mod h1:CP0Cr1Y1kogOtx0bJblKzsVWrqYaqfNOnHzpgWw4Awc=
|
||||
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
|
||||
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
|
||||
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
|
||||
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
|
||||
github.com/lib/pq v1.10.4 h1:SO9z7FRPzA03QhHKJrH5BXA6HU1rS4V2nIVrrNC1iYk=
|
||||
|
||||
@@ -2,12 +2,13 @@ package database
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
_ "github.com/lib/pq"
|
||||
"gitlab.com/kbr4/svevijesti/internal/model"
|
||||
"html/template"
|
||||
"math"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
_ "github.com/lib/pq"
|
||||
"gitlab.com/kbr4/svevijesti/internal/model"
|
||||
)
|
||||
|
||||
func InsertArticle(store *Store, article model.ScrapedArticle) (err error) {
|
||||
@@ -48,7 +49,7 @@ func ArticlesForDay(store *Store, day time.Time) (articles []model.DisplayArticl
|
||||
|
||||
result := []model.DisplayArticle{}
|
||||
query, err := store.Prepare(`
|
||||
select id,title, content, slug, original_url, source_id, created_at from articles where created_at > $1 and created_at < $2 and LENGTH(content) > 10 order by id desc;
|
||||
select id,title, content, slug, original_url, source_id, created_at, category from articles where created_at > $1 and created_at < $2 and LENGTH(content) > 10 order by id desc;
|
||||
`)
|
||||
if err != nil {
|
||||
return result, err
|
||||
@@ -67,7 +68,7 @@ func ArticlesForDay(store *Store, day time.Time) (articles []model.DisplayArticl
|
||||
|
||||
for rows.Next() {
|
||||
r := model.DisplayArticle{}
|
||||
err = rows.Scan(&r.ID, &r.Title, &r.Content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt)
|
||||
err = rows.Scan(&r.ID, &r.Title, &r.Content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt, &r.Category)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
@@ -95,7 +96,7 @@ func ArticleByID(store *Store, ID int, slug string) (article model.DisplayArticl
|
||||
|
||||
result := model.DisplayArticle{}
|
||||
query, err := store.Prepare(`
|
||||
select id,title, content, slug, original_url, source_id, created_at from articles where id = $1 and slug = $2;
|
||||
select id,title, content, slug, original_url, source_id, created_at, category from articles where id = $1 and slug = $2;
|
||||
`)
|
||||
if err != nil {
|
||||
return result, err
|
||||
@@ -109,7 +110,7 @@ func ArticleByID(store *Store, ID int, slug string) (article model.DisplayArticl
|
||||
|
||||
r := model.DisplayArticle{}
|
||||
content := ""
|
||||
err = row.Scan(&r.ID, &r.Title, &content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt)
|
||||
err = row.Scan(&r.ID, &r.Title, &content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt, &r.Category)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
@@ -138,7 +139,7 @@ func PreviousAndNextArticleUrlByID(store *Store, ID int) (nextUrl string, previo
|
||||
|
||||
nextResult, previousResult := "#", "#"
|
||||
query, err := store.Prepare(`
|
||||
select id,title, content, slug, original_url, source_id, created_at from articles where id < $1 and id > $2 order by id desc limit 1;
|
||||
select id,title, content, slug, original_url, source_id, created_at, category from articles where id < $1 and id > $2 order by id desc limit 1;
|
||||
`)
|
||||
if err != nil {
|
||||
fmt.Println("Err 1:", err)
|
||||
@@ -154,7 +155,7 @@ func PreviousAndNextArticleUrlByID(store *Store, ID int) (nextUrl string, previo
|
||||
|
||||
r := model.DisplayArticle{}
|
||||
content := ""
|
||||
err = row.Scan(&r.ID, &r.Title, &content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt)
|
||||
err = row.Scan(&r.ID, &r.Title, &content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt, &r.Category)
|
||||
if err != nil {
|
||||
return nextResult, previousResult, err
|
||||
}
|
||||
@@ -162,7 +163,7 @@ func PreviousAndNextArticleUrlByID(store *Store, ID int) (nextUrl string, previo
|
||||
previousResult = fmt.Sprintf("/%d/%s", r.ID, r.Slug)
|
||||
|
||||
query2, err := store.Prepare(`
|
||||
select id,title, content, slug, original_url, source_id, created_at from articles where id < $1 and id > $2 order by id asc limit 1;
|
||||
select id,title, content, slug, original_url, source_id, created_at, category from articles where id < $1 and id > $2 order by id asc limit 1;
|
||||
`)
|
||||
if err != nil {
|
||||
fmt.Println("Err 1:", err)
|
||||
@@ -177,7 +178,7 @@ func PreviousAndNextArticleUrlByID(store *Store, ID int) (nextUrl string, previo
|
||||
}
|
||||
|
||||
content = ""
|
||||
err = row.Scan(&r.ID, &r.Title, &content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt)
|
||||
err = row.Scan(&r.ID, &r.Title, &content, &r.Slug, &r.OriginalUrl, &r.SourceId, &r.CreatedAt, &r.Category)
|
||||
if err != nil {
|
||||
fmt.Println("Err 4:", err)
|
||||
return nextResult, previousResult, err
|
||||
|
||||
@@ -23,6 +23,7 @@ type DisplayArticle struct {
|
||||
CreatedAt time.Time
|
||||
FormatedCreatedAt string
|
||||
SourceName string
|
||||
Category string
|
||||
}
|
||||
|
||||
const (
|
||||
|
||||
@@ -2,11 +2,12 @@ package server
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/gorilla/mux"
|
||||
"gitlab.com/kbr4/svevijesti/internal/database"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
"gitlab.com/kbr4/svevijesti/internal/database"
|
||||
)
|
||||
|
||||
func rootHandler(wr http.ResponseWriter, req *http.Request) {
|
||||
@@ -24,11 +25,25 @@ func rootHandler(wr http.ResponseWriter, req *http.Request) {
|
||||
|
||||
dayBefore := "/dan/" + time.Now().Add(-24*time.Hour).Format("2006-01-02")
|
||||
|
||||
cities := []string{"Sarajevo", "Banja Luka", "Zenica", "Tuzla", "Mostar"}
|
||||
|
||||
var weatherInfo []WeatherData
|
||||
for _, city := range cities {
|
||||
data, err := getWeather(city)
|
||||
if err != nil {
|
||||
fmt.Printf("Error fetching weather for %s: %v\n", city, err)
|
||||
continue
|
||||
}
|
||||
weatherInfo = append(weatherInfo, data)
|
||||
}
|
||||
|
||||
data := map[string]interface{}{
|
||||
"title": title,
|
||||
"articles": articles,
|
||||
"previous": dayBefore,
|
||||
"next": "/",
|
||||
"title": title,
|
||||
"articles": articles,
|
||||
"previous": dayBefore,
|
||||
"next": "/",
|
||||
"weatherInfo": weatherInfo,
|
||||
"categories": CategoryMenu,
|
||||
}
|
||||
|
||||
err = templates.ExecuteTemplate(wr, "homeHTML", data)
|
||||
@@ -62,11 +77,25 @@ func dailyArticlesHandler(wr http.ResponseWriter, req *http.Request) {
|
||||
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||
}
|
||||
|
||||
cities := []string{"Sarajevo", "Banja Luka", "Zenica", "Tuzla", "Mostar"}
|
||||
|
||||
var weatherInfo []WeatherData
|
||||
for _, city := range cities {
|
||||
data, err := getWeather(city)
|
||||
if err != nil {
|
||||
fmt.Printf("Error fetching weather for %s: %v\n", city, err)
|
||||
continue
|
||||
}
|
||||
weatherInfo = append(weatherInfo, data)
|
||||
}
|
||||
|
||||
data := map[string]interface{}{
|
||||
"title": title,
|
||||
"articles": articles,
|
||||
"previous": dayBefore,
|
||||
"next": dayAfter,
|
||||
"title": title,
|
||||
"articles": articles,
|
||||
"previous": dayBefore,
|
||||
"next": dayAfter,
|
||||
"weatherInfo": weatherInfo,
|
||||
"categories": CategoryMenu,
|
||||
}
|
||||
|
||||
err = templates.ExecuteTemplate(wr, "homeHTML", data)
|
||||
@@ -97,10 +126,11 @@ func articleHandler(wr http.ResponseWriter, req *http.Request) {
|
||||
|
||||
title := article.Title
|
||||
data := map[string]interface{}{
|
||||
"title": title,
|
||||
"article": article,
|
||||
"previous": previous,
|
||||
"next": next,
|
||||
"title": title,
|
||||
"article": article,
|
||||
"previous": previous,
|
||||
"next": next,
|
||||
"categories": CategoryMenu,
|
||||
}
|
||||
|
||||
err = templates.ExecuteTemplate(wr, "articleHTML", data)
|
||||
|
||||
68
internal/server/category.go
Normal file
68
internal/server/category.go
Normal file
@@ -0,0 +1,68 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
"gitlab.com/kbr4/svevijesti/internal/database"
|
||||
"gitlab.com/kbr4/svevijesti/internal/model"
|
||||
)
|
||||
|
||||
var CategoryMenu = []string{
|
||||
"Politika",
|
||||
"Biznis",
|
||||
"Sport",
|
||||
"Magazin",
|
||||
"Scitech",
|
||||
"Ostalo",
|
||||
}
|
||||
|
||||
func handleCategory(wr http.ResponseWriter, r *http.Request) {
|
||||
vars := mux.Vars(r)
|
||||
category := vars["category"]
|
||||
|
||||
store, err := database.Connect()
|
||||
if err != nil {
|
||||
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer store.Close()
|
||||
|
||||
currentDate, err := time.Parse("2006-01-02", category)
|
||||
if err != nil {
|
||||
currentDate = time.Now()
|
||||
}
|
||||
|
||||
articles, err := database.ArticlesForDay(store, currentDate)
|
||||
if err != nil {
|
||||
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
articlesByCategory := make(map[string][]model.DisplayArticle)
|
||||
for _, article := range articles {
|
||||
articlesByCategory[article.Category] = append(articlesByCategory[article.Category], article)
|
||||
}
|
||||
|
||||
var categories []string
|
||||
for cat := range articlesByCategory {
|
||||
categories = append(categories, cat)
|
||||
}
|
||||
|
||||
prevDay := currentDate.AddDate(0, 0, -1)
|
||||
|
||||
data := map[string]interface{}{
|
||||
"title": category,
|
||||
"currentCategory": category,
|
||||
"articles": articlesByCategory[category],
|
||||
"categories": CategoryMenu,
|
||||
"previous": prevDay.Format("2006-01-02"),
|
||||
"next": "/",
|
||||
}
|
||||
err = templates.ExecuteTemplate(wr, "categoryHTML", data)
|
||||
if err != nil {
|
||||
http.Error(wr, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -2,11 +2,12 @@ package server
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/gorilla/mux"
|
||||
"html/template"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
)
|
||||
|
||||
var tPath = "./web/tpl/"
|
||||
@@ -42,5 +43,7 @@ func CreateRoutes() *mux.Router {
|
||||
r.HandleFunc("/dan/{date}", dailyArticlesHandler)
|
||||
r.HandleFunc("/{id:[0-9]+}/{slug}", articleHandler)
|
||||
r.HandleFunc("/", rootHandler)
|
||||
r.HandleFunc("/weather", WeatherHandler)
|
||||
r.HandleFunc("/{category}", handleCategory)
|
||||
return r
|
||||
}
|
||||
|
||||
104
internal/server/weather.go
Normal file
104
internal/server/weather.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
"github.com/joho/godotenv"
|
||||
)
|
||||
|
||||
var apiKey string
|
||||
|
||||
func init() {
|
||||
err := godotenv.Load()
|
||||
if err != nil {
|
||||
fmt.Println("Error loading .env file:", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
apiKey = os.Getenv("API_KEY")
|
||||
|
||||
if apiKey == "" {
|
||||
fmt.Println("API_KEY environment variable not set.")
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
type WeatherData struct {
|
||||
Coord struct {
|
||||
Lat float64 `json:"lat"`
|
||||
Lon float64 `json:"lon"`
|
||||
} `json:"coord"`
|
||||
Weather []struct {
|
||||
Description string `json:"description"`
|
||||
Icon string `json:"icon"`
|
||||
} `json:"weather"`
|
||||
Main struct {
|
||||
Temp float64 `json:"temp"`
|
||||
FellsLike float64 `json:"fells_like"`
|
||||
Preassure int `json:"preassure"`
|
||||
Humidity int `json:"humidity"`
|
||||
TempMin float64 `json:"temp_min"`
|
||||
TempMax float64 `json:"temp_max"`
|
||||
} `json:"main"`
|
||||
Wind struct {
|
||||
Speed float64 `json:"speed"`
|
||||
Deg float64 `json:"deg"`
|
||||
} `json:"wind"`
|
||||
Clouds struct {
|
||||
All int `json:"all"`
|
||||
} `json:"clouds"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
func getWeather(city string) (WeatherData, error) {
|
||||
url := fmt.Sprintf("http://api.openweathermap.org/data/2.5/weather?q=%s&appid=%s&units=metric&lang=hr", city, apiKey)
|
||||
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return WeatherData{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return WeatherData{}, err
|
||||
}
|
||||
var weatherData WeatherData
|
||||
err = json.Unmarshal(body, &weatherData)
|
||||
if err != nil {
|
||||
return WeatherData{}, err
|
||||
}
|
||||
return weatherData, nil
|
||||
}
|
||||
|
||||
func WeatherHandler(w http.ResponseWriter, r *http.Request) {
|
||||
cities := []string{"Sarajevo", "Banja Luka", "Zenica", "Tuzla", "Mostar"}
|
||||
|
||||
var weatherInfo []WeatherData
|
||||
for _, city := range cities {
|
||||
data, err := getWeather(city)
|
||||
if err != nil {
|
||||
fmt.Printf("Error fetching weather for %s: %v\n", city, err)
|
||||
continue
|
||||
}
|
||||
weatherInfo = append(weatherInfo, data)
|
||||
}
|
||||
|
||||
title := "Vremenska Prognoza"
|
||||
data := map[string]interface{}{
|
||||
"title": title,
|
||||
"weatherInfo": weatherInfo,
|
||||
"categories": CategoryMenu,
|
||||
}
|
||||
|
||||
err := templates.ExecuteTemplate(w, "weatherHTML", data)
|
||||
if err != nil {
|
||||
fmt.Println("Error executing template:", err)
|
||||
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
|
||||
}
|
||||
|
||||
}
|
||||
BIN
pyth/__pycache__/db_management.cpython-310.pyc
Normal file
BIN
pyth/__pycache__/db_management.cpython-310.pyc
Normal file
Binary file not shown.
BIN
pyth/__pycache__/get_articles.cpython-310.pyc
Normal file
BIN
pyth/__pycache__/get_articles.cpython-310.pyc
Normal file
Binary file not shown.
BIN
pyth/__pycache__/publishing_finals.cpython-310.pyc
Normal file
BIN
pyth/__pycache__/publishing_finals.cpython-310.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
pyth/__pycache__/tttt.cpython-310.pyc
Normal file
BIN
pyth/__pycache__/tttt.cpython-310.pyc
Normal file
Binary file not shown.
Binary file not shown.
241
pyth/articles.py
241
pyth/articles.py
@@ -1,241 +0,0 @@
|
||||
import psycopg2
|
||||
import numpy as np
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
from openai import OpenAI
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from vectData import get_specific_data, modify_similar_data, insert_data, preparing_articles, calculate_cosine_similarity,get_titles_links_embeddings
|
||||
from scrapingsingle import num_tokens_from_string, slice_text_at_2k_tokens
|
||||
import json
|
||||
from json_repair import repair_json
|
||||
|
||||
load_dotenv()
|
||||
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||||
client = OpenAI()
|
||||
embeddings = OpenAIEmbeddings()
|
||||
|
||||
print(f"Checking for similar!")
|
||||
|
||||
def find_and_group_similar_articles(eps=0.5, min_samples=2, threshold=0.95):
|
||||
try:
|
||||
titles, links, embeddings = get_titles_links_embeddings()
|
||||
|
||||
processed_articles = set()
|
||||
grouped_similar_articles = []
|
||||
|
||||
for i, (title1, link1, embedding1) in enumerate(zip(titles, links, embeddings)):
|
||||
if (title1, link1) not in processed_articles:
|
||||
processed_articles.add((title1, link1))
|
||||
group = [(title1, link1)]
|
||||
|
||||
for j, (title2, link2, embedding2) in enumerate(zip(titles, links, embeddings)):
|
||||
if i != j and (title2, link2) not in processed_articles:
|
||||
similarity = calculate_cosine_similarity(embedding1, embedding2)
|
||||
|
||||
if similarity > threshold:
|
||||
processed_articles.add((title2, link2))
|
||||
group.append((title2, link2))
|
||||
|
||||
grouped_similar_articles.append(group)
|
||||
|
||||
return grouped_similar_articles
|
||||
|
||||
except psycopg2.Error as e:
|
||||
print(f"Error: {e}")
|
||||
return []
|
||||
|
||||
def processing_similar():
|
||||
grouped_similar_articles_result = find_and_group_similar_articles()
|
||||
|
||||
if grouped_similar_articles_result:
|
||||
for group in grouped_similar_articles_result:
|
||||
articles = []
|
||||
|
||||
if len(group) > 1:
|
||||
for article_tuple in group:
|
||||
if len(article_tuple) >= 2:
|
||||
title, link = article_tuple[:2]
|
||||
article = [title, link]
|
||||
articles.append(article)
|
||||
l = len(articles)
|
||||
|
||||
if l == 2:
|
||||
a_one = articles[0][0]
|
||||
a_two = articles[1][0]
|
||||
|
||||
get_one = get_specific_data(a_one)
|
||||
get_two = get_specific_data(a_two)
|
||||
|
||||
text1 = get_one[0][1]
|
||||
text2 = get_two[0][1]
|
||||
link1 = get_one[0][2]
|
||||
link2 = get_two[0][2]
|
||||
if link1 != link2:
|
||||
link = f"{link1}, {link2}"
|
||||
else:
|
||||
link = link1
|
||||
|
||||
ftoks = num_tokens_from_string(text1)
|
||||
stoks = num_tokens_from_string(text2)
|
||||
tokens = ftoks + stoks
|
||||
|
||||
similar_d = f"C: {a_one}, {a_two}"
|
||||
|
||||
modify_similar_data(similar_d, a_one)
|
||||
preparing_articles(False, a_one)
|
||||
|
||||
modify_similar_data(similar_d, a_two)
|
||||
preparing_articles(False, a_two)
|
||||
|
||||
if tokens > 2000:
|
||||
combined_text = f"{text1} {text2}"
|
||||
combined_text = slice_text_at_2k_tokens(combined_text)
|
||||
user_message = rf"Here is text {combined_text}, combined from 3 sources, filter text, and make news content, return as JSON only with single 'content' field"
|
||||
else:
|
||||
user_message = rf"Here are 2 texts {text1} {text2}, combine the following texts into a cohesive news remove any non-news related to both texts and provide the cleaned data as a JSON only with single 'content' field."
|
||||
|
||||
if l == 3:
|
||||
a_one = articles[0][0]
|
||||
a_two = articles[1][0]
|
||||
a_three = articles[2][0]
|
||||
|
||||
get_one = get_specific_data(a_one)
|
||||
get_two = get_specific_data(a_two)
|
||||
get_three = get_specific_data(a_three)
|
||||
|
||||
text1 = get_one[0][1]
|
||||
text2 = get_two[0][1]
|
||||
text3 = get_three[0][1]
|
||||
link1 = get_one[0][2]
|
||||
link2 = get_two[0][2]
|
||||
link3 = get_three[0][2]
|
||||
if link1 != link2:
|
||||
if link2 != link3:
|
||||
link = f"{link1}, {link2}, {link3}"
|
||||
else:
|
||||
link = f"{link1}, {link2}"
|
||||
else:
|
||||
if link2 != link3:
|
||||
link = f"{link1}, {link3}"
|
||||
else:
|
||||
link = link1
|
||||
ftoks = num_tokens_from_string(text1)
|
||||
stoks = num_tokens_from_string(text2)
|
||||
ttoks = num_tokens_from_string(text3)
|
||||
tokens = ftoks + stoks + ttoks
|
||||
|
||||
similar_d = f"C: {a_one}, {a_two}, {a_three}"
|
||||
modify_similar_data(similar_d, a_one)
|
||||
preparing_articles(False, a_one)
|
||||
|
||||
modify_similar_data(similar_d, a_two)
|
||||
preparing_articles(False, a_two)
|
||||
|
||||
modify_similar_data(similar_d, a_three)
|
||||
preparing_articles(False, a_three)
|
||||
|
||||
if tokens > 2000:
|
||||
combined_text = f"{text1} {text2} {text3}"
|
||||
combined_text = slice_text_at_2k_tokens(combined_text)
|
||||
user_message = rf"Here is text {combined_text}, combined from 3 sources, filter text, and make news content, return as JSON only with single 'content' field"
|
||||
else:
|
||||
user_message = rf"Here are 3 texts {text1} {text2} and {text3}, combine the following texts into a cohesive news remove any non-news related to both texts and provide the cleaned data as a JSON only with single 'content' field."
|
||||
if l == 4:
|
||||
a_one = articles[0][0]
|
||||
a_two = articles[1][0]
|
||||
a_three = articles[2][0]
|
||||
a_four = articles[3][0]
|
||||
|
||||
get_one = get_specific_data(a_one)
|
||||
get_two = get_specific_data(a_two)
|
||||
get_three = get_specific_data(a_three)
|
||||
get_four = get_specific_data(a_four)
|
||||
|
||||
text1 = get_one[0][1]
|
||||
text2 = get_two[0][1]
|
||||
text3 = get_three[0][1]
|
||||
text4 = get_four[0][1]
|
||||
link1 = get_one[0][2]
|
||||
link2 = get_two[0][2]
|
||||
link3 = get_three[0][2]
|
||||
link4 = get_four[0][2]
|
||||
|
||||
if link1 != link2:
|
||||
if link2 != link3:
|
||||
if link3 != link4:
|
||||
link = f"{link1}, {link2}, {link3}, {link4}"
|
||||
else:
|
||||
link = f"{link1}, {link2}, {link3}"
|
||||
else:
|
||||
if link3 != link4:
|
||||
link = f"{link1}, {link2}, {link4}"
|
||||
else:
|
||||
link = f"{link1}, {link2}"
|
||||
else:
|
||||
if link2 != link3:
|
||||
if link3 != link4:
|
||||
link = f"{link1}, {link3}, {link4}"
|
||||
else:
|
||||
link = f"{link1}, {link3}"
|
||||
else:
|
||||
if link3 != link4:
|
||||
link = f"{link1}, {link4}"
|
||||
else:
|
||||
link = link1
|
||||
|
||||
ftoks = num_tokens_from_string(text1)
|
||||
stoks = num_tokens_from_string(text2)
|
||||
ttoks = num_tokens_from_string(text3)
|
||||
frtoks = num_tokens_from_string(text4)
|
||||
|
||||
tokens = ftoks + stoks + ttoks + frtoks
|
||||
|
||||
similar_d = f"C: {a_one}, {a_two}, {a_three}, {a_four}"
|
||||
modify_similar_data(similar_d, a_one)
|
||||
preparing_articles(False, a_one)
|
||||
|
||||
modify_similar_data(similar_d, a_two)
|
||||
preparing_articles(False, a_two)
|
||||
|
||||
modify_similar_data(similar_d, a_three)
|
||||
preparing_articles(False, a_three)
|
||||
|
||||
modify_similar_data(similar_d, a_four)
|
||||
preparing_articles(False, a_four)
|
||||
|
||||
if tokens > 2000:
|
||||
combined_text = f"{text1} {text2} {text3} {text4}"
|
||||
combined_text = slice_text_at_2k_tokens(combined_text)
|
||||
user_message = rf"Here is text {combined_text}, combined from 4 sources, filter text, and make news content, return as JSON only with a single 'content' field"
|
||||
else:
|
||||
user_message = rf"Here are 4 texts {text1} {text2} {text3} and {text4}, combine the following texts into a cohesive news, remove any non-news related to all texts, and provide the cleaned data as a JSON only with a single 'content' field."
|
||||
try:
|
||||
completion = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": "Data analytic, Journalist and News reporter"},
|
||||
{"role": "user", "content": user_message}
|
||||
])
|
||||
generated_text = completion.choices[0].message.content
|
||||
|
||||
generated_text = repair_json(generated_text)
|
||||
|
||||
response_data = json.loads(generated_text)
|
||||
title = a_one
|
||||
text = response_data["content"]
|
||||
vector = embeddings.embed_query(generated_text)
|
||||
|
||||
insert_data(title, text, link, vector, similar_d)
|
||||
print(f"Inserting combined: {title}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print(a_one)
|
||||
continue
|
||||
else:
|
||||
print("Done!.")
|
||||
else:
|
||||
print("No similar articles found.")
|
||||
if __name__=="__main__":
|
||||
processing_similar()
|
||||
122
pyth/checking_similar.py
Normal file
122
pyth/checking_similar.py
Normal file
@@ -0,0 +1,122 @@
|
||||
import psycopg2
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
from openai import OpenAI
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from db_management import get_specific_data, modify_similar_data, insert_data, preparing_articles, calculate_cosine_similarity, get_titles_links_embeddings
|
||||
from get_articles import slice_text_at_2k_tokens
|
||||
import json
|
||||
from json_repair import repair_json
|
||||
from publishing_finals import publish_articles
|
||||
|
||||
load_dotenv()
|
||||
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||||
client = OpenAI()
|
||||
embeddings = OpenAIEmbeddings()
|
||||
|
||||
print("Checking for similar!")
|
||||
|
||||
|
||||
def find_and_group_similar_articles(eps=0.5, min_samples=2, threshold=0.95):
|
||||
try:
|
||||
titles, links, embeddings = get_titles_links_embeddings()
|
||||
|
||||
processed_articles = set()
|
||||
grouped_similar_articles = []
|
||||
|
||||
for i, (title1, link1, embedding1) in enumerate(zip(titles, links, embeddings)):
|
||||
if (title1, link1) not in processed_articles:
|
||||
processed_articles.add((title1, link1))
|
||||
group = [(title1, link1)]
|
||||
|
||||
for j, (title2, link2, embedding2) in enumerate(zip(titles, links, embeddings)):
|
||||
if i != j and (title2, link2) not in processed_articles:
|
||||
similarity = calculate_cosine_similarity(embedding1, embedding2)
|
||||
|
||||
if similarity > threshold:
|
||||
if link1 != link2:
|
||||
processed_articles.add((title2, link2))
|
||||
group.append((title2, link2, embedding2))
|
||||
|
||||
grouped_similar_articles.append(group)
|
||||
return grouped_similar_articles
|
||||
|
||||
except psycopg2.Error as e:
|
||||
print(f"Error: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def processing_articles(articles):
|
||||
unique_links = set()
|
||||
|
||||
for article in articles:
|
||||
a_title, a_link = article[:2]
|
||||
get_data = get_specific_data(a_title)
|
||||
text = get_data[0][1]
|
||||
link = a_link
|
||||
|
||||
modify_similar_data(f"C: {', '.join(art[0] for art in articles)}", a_title)
|
||||
preparing_articles(False, a_title)
|
||||
|
||||
if link not in unique_links:
|
||||
unique_links.add(link)
|
||||
|
||||
combined_text = ' '.join(get_specific_data(art[0])[0][1] for art in articles)
|
||||
combined_text = slice_text_at_2k_tokens(combined_text)
|
||||
|
||||
if len(unique_links) == 1:
|
||||
link = next(iter(unique_links))
|
||||
else:
|
||||
link = ', '.join(unique_links)
|
||||
return combined_text, link
|
||||
|
||||
|
||||
def processing_similar():
|
||||
grouped_similar_articles_result = find_and_group_similar_articles()
|
||||
|
||||
if grouped_similar_articles_result:
|
||||
for group in grouped_similar_articles_result:
|
||||
articles = group
|
||||
|
||||
if len(articles) > 1:
|
||||
combined_text, link = processing_articles(articles)
|
||||
user_message = (
|
||||
rf"Here are {len(articles)} texts {combined_text}, combine the following texts into a cohesive news, "
|
||||
rf"remove any non-news related to all texts, and provide the cleaned data on Bosnian languageas and return as JSON only with a single 'content' field."
|
||||
)
|
||||
|
||||
try:
|
||||
completion = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": "Data analytic, Journalist and News reporter"},
|
||||
{"role": "user", "content": user_message}
|
||||
])
|
||||
generated_text = repair_json(completion.choices[0].message.content)
|
||||
|
||||
response_data = json.loads(generated_text)
|
||||
title = articles[0][0]
|
||||
text = response_data["content"]
|
||||
vector = embeddings.embed_query(generated_text)
|
||||
tmpCategory = get_specific_data(title)
|
||||
category = tmpCategory[0][5]
|
||||
|
||||
|
||||
|
||||
insert_data(title, text, link, vector, f"C: {', '.join(art[0] for art in articles)}", category)
|
||||
print(f"Inserting combined: {title} and Category: {category}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print(articles[0][0])
|
||||
continue
|
||||
else:
|
||||
print("Done!.")
|
||||
else:
|
||||
print("No similar articles found.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
processing_similar()
|
||||
publish_articles()
|
||||
@@ -68,7 +68,7 @@ def is_similar_data(title, text, link, embedding, threshold=0.98):
|
||||
|
||||
def get_similar():
|
||||
cursor = conn.cursor()
|
||||
query = '''SELECT title,similar_d FROM vectorsvevijesti WHERE similar_d NOT IN ('NO', 'SOURCE')'''
|
||||
query = '''SELECT title, link, similar_d FROM vectorsvevijesti WHERE similar_d NOT IN ('NO', 'SOURCE')'''
|
||||
cursor.execute(query)
|
||||
similar_data = cursor.fetchall()
|
||||
cursor.close()
|
||||
@@ -87,18 +87,23 @@ def get_titles_links_embeddings():
|
||||
return titles, links, embeddings
|
||||
|
||||
|
||||
def insert_data(title, text, link, embedding, similar_d):
|
||||
def insert_data(title, text, link, embedding, similar_d,category):
|
||||
c_time = datetime.now()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
INSERT INTO vectorsvevijesti (title, text, link, embedding, similar_d, time, ready)
|
||||
VALUES (%s, %s, %s, %s, %s ,%s ,%s);
|
||||
''', (title, text, link, embedding , similar_d, c_time, True))
|
||||
INSERT INTO vectorsvevijesti (title, text, link, embedding, similar_d, time, ready, category)
|
||||
VALUES (%s, %s, %s, %s, %s ,%s ,%s ,%s);
|
||||
''', (title, text, link, embedding , similar_d, c_time, True , category))
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
def get_data():
|
||||
def insert_final(title,text,slug,link,source_id, category):
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute('''INSERT INTO articles (title, content, slug, original_url, source_id, category)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)ON CONFLICT (original_url) DO NOTHING;''',(title , text, slug, link, source_id, category))
|
||||
conn.commit()
|
||||
|
||||
def get_data():
|
||||
cursor = conn.cursor()
|
||||
query = '''SELECT title,text,link FROM vectorsvevijesti;'''
|
||||
cursor.execute(query)
|
||||
@@ -108,7 +113,7 @@ def get_data():
|
||||
|
||||
def get_ready_data():
|
||||
cursor = conn.cursor()
|
||||
query = '''SELECT title, text, link, ready FROM vectorsvevijesti WHERE ready = %s;'''
|
||||
query = '''SELECT title, text, link, time, similar_d, category FROM vectorsvevijesti WHERE ready = %s;'''
|
||||
cursor.execute(query, ('True',))
|
||||
data = cursor.fetchall()
|
||||
cursor.close()
|
||||
@@ -122,14 +127,12 @@ def get_source_data():
|
||||
cursor.close()
|
||||
return data
|
||||
|
||||
|
||||
def modify_similar_data(new_value ,title):
|
||||
cursor = conn.cursor()
|
||||
query = '''UPDATE vectorsvevijesti SET similar_d = %s WHERE title = %s '''
|
||||
cursor.execute(query, (new_value, title))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def preparing_articles(new_value ,title):
|
||||
cursor = conn.cursor()
|
||||
query = '''UPDATE vectorsvevijesti SET ready = %s WHERE title = %s '''
|
||||
@@ -138,13 +141,12 @@ def preparing_articles(new_value ,title):
|
||||
|
||||
def get_specific_data(title):
|
||||
cursor = conn.cursor()
|
||||
query = '''SELECT title, text, link, similar_d, embedding, ready FROM vectorsvevijesti WHERE title = %s'''
|
||||
query = '''SELECT title, text, link, similar_d, embedding, category, ready FROM vectorsvevijesti WHERE title = %s'''
|
||||
cursor.execute(query, (title,))
|
||||
specific_post = cursor.fetchall()
|
||||
cursor.close()
|
||||
return specific_post
|
||||
|
||||
|
||||
def get_all_links():
|
||||
cursor = conn.cursor()
|
||||
query = '''SELECT link FROM vectorsvevijesti'''
|
||||
@@ -153,6 +155,14 @@ def get_all_links():
|
||||
cursor.close()
|
||||
return db_links
|
||||
|
||||
def get_existing_titles():
|
||||
cursor = conn.cursor()
|
||||
query = '''SELECT title, original_url FROM articles'''
|
||||
cursor.execute(query)
|
||||
db_links = {link[0] for link in cursor.fetchall()}
|
||||
cursor.close()
|
||||
return db_links
|
||||
|
||||
def delete_specific(title):
|
||||
cursor = conn.cursor()
|
||||
query = '''DELETE FROM vectorsvevijesti WHERE title = %s'''
|
||||
@@ -192,4 +202,48 @@ def create_db():
|
||||
''')
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
def create_db():
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
||||
register_vector(conn)
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS vectorsvevijesti (
|
||||
id bigserial PRIMARY KEY,
|
||||
title VARCHAR,
|
||||
text VARCHAR,
|
||||
link VARCHAR,
|
||||
embedding vector(1536),
|
||||
similar_d VARCHAR,
|
||||
time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
ready BOOLEAN,
|
||||
category VARCHAR
|
||||
);
|
||||
''')
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
def create_ar_table():
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS "articles" (
|
||||
"id" bigserial PRIMARY KEY,
|
||||
"title" text NOT NULL UNIQUE,
|
||||
"content" text NOT NULL,
|
||||
"slug" text NOT NULL UNIQUE,
|
||||
"created_at" timestamptz DEFAULT NOW() NOT NULL,
|
||||
"original_url" text NOT NULL UNIQUE,
|
||||
"source_id" int NOT NULL,
|
||||
"category" VARCHAR
|
||||
|
||||
);
|
||||
''')
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
|
||||
import psycopg2
|
||||
from psycopg2 import sql
|
||||
|
||||
|
||||
create_db()
|
||||
create_ar_table()
|
||||
2
pyth/delete_db.py
Normal file
2
pyth/delete_db.py
Normal file
@@ -0,0 +1,2 @@
|
||||
from db_management import delete_tables
|
||||
delete_tables()
|
||||
171
pyth/get_articles.py
Normal file
171
pyth/get_articles.py
Normal file
@@ -0,0 +1,171 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from urllib.parse import urljoin
|
||||
from openai import OpenAI
|
||||
import os
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from db_management import (insert_data ,is_similar_data ,get_all_links,cleansing )
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
import tiktoken
|
||||
from json_repair import repair_json
|
||||
|
||||
load_dotenv()
|
||||
cleansing()
|
||||
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
client = OpenAI()
|
||||
embeddings = OpenAIEmbeddings()
|
||||
|
||||
dlinks = ['https://klix.ba', 'https://srpskainfo.com', 'https://bljesak.info','https://www.index.hr', 'https://avaz.ba', 'https://www.telegraf.rs', 'https://www.blic.rs', 'https://www.vijesti.me','https://dnevnik.hr','https://24sata.hr']
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1.1; SM-G928X Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83 Mobile Safari/537.36'}
|
||||
|
||||
def num_tokens_from_string(string: str, model="gpt-3.5-turbo") -> int:
|
||||
encoding = tiktoken.encoding_for_model(model)
|
||||
return len(encoding.encode(string))
|
||||
|
||||
def slice_text_at_2k_tokens(text):
|
||||
encoding_name = "gpt-3.5-turbo"
|
||||
max_tokens = 1950
|
||||
encoding = tiktoken.encoding_for_model(encoding_name)
|
||||
tokens = encoding.encode(text)
|
||||
if len(tokens) <= max_tokens:
|
||||
return [text]
|
||||
sliced_tokens = tokens[:max_tokens]
|
||||
sliced_text = encoding.decode(sliced_tokens)
|
||||
return sliced_text
|
||||
|
||||
def slice_title_if_needed(text):
|
||||
encoding_name = "gpt-3.5-turbo"
|
||||
max_tokens = 100
|
||||
encoding = tiktoken.encoding_for_model(encoding_name)
|
||||
tokens = encoding.encode(text)
|
||||
if len(tokens) <= max_tokens:
|
||||
return [text]
|
||||
sliced_tokens = tokens[:max_tokens]
|
||||
sliced_text = encoding.decode(sliced_tokens)
|
||||
return sliced_text
|
||||
|
||||
def replace_with_spaces(text):
|
||||
allowed_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzČčĆćDždžĐ𩹮ž0123456789 "
|
||||
cleaned_text = ''.join(char if char in allowed_chars else ' ' for char in text)
|
||||
return cleaned_text
|
||||
|
||||
def fix_links(links_set):
|
||||
modified_links = set()
|
||||
for link in links_set:
|
||||
if "www" in link:
|
||||
modified_link = link.replace("www.", "")
|
||||
modified_links.add(modified_link)
|
||||
else:
|
||||
modified_links.add(link)
|
||||
return modified_links
|
||||
|
||||
total_links = set()
|
||||
collected_news = set()
|
||||
|
||||
def get_article_links(url, already_checked):
|
||||
response = requests.get(url,headers)
|
||||
if response.status_code == 200:
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
articles = soup.find_all('article')
|
||||
link_store = []
|
||||
|
||||
for article in articles:
|
||||
links = article.find_all('a', href=True)
|
||||
for link in links:
|
||||
link_value = urljoin(url, link['href'])
|
||||
if link_value not in already_checked:
|
||||
link_store.append(link_value)
|
||||
already_checked.add(link_value)
|
||||
return link_store
|
||||
|
||||
already_checked = set()
|
||||
|
||||
for dlink in dlinks:
|
||||
temp_links = get_article_links(dlink, already_checked)
|
||||
if temp_links:
|
||||
total_links.update(temp_links)
|
||||
final_links = {item for item in total_links if item}
|
||||
|
||||
db_links = set(get_all_links())
|
||||
new_links = final_links - db_links
|
||||
final_links = new_links
|
||||
final_links = set(final_links)
|
||||
final_links = fix_links(final_links)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
for link in final_links:
|
||||
if link not in db_links:
|
||||
print(f"Processing link: {link}")
|
||||
db_links.add(link)
|
||||
|
||||
response = requests.get(link,headers)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
titles = soup.find_all(['h2', 'h1','h3'])
|
||||
title_text = ' '.join([title.get_text(strip=True) for title in titles])
|
||||
|
||||
texts = soup.find_all(['p'])
|
||||
text_text = ' '.join([text.get_text(strip=True) for text in texts])
|
||||
|
||||
text_text = text_text
|
||||
title_text = title_text
|
||||
|
||||
title_text = replace_with_spaces(title_text)
|
||||
|
||||
text_text = slice_text_at_2k_tokens(text_text)
|
||||
text_text = replace_with_spaces(str(text_text))
|
||||
|
||||
ttk = num_tokens_from_string(text_text)
|
||||
|
||||
category_options = ['politics','business','sport','magazine','scitech']
|
||||
|
||||
category_translation = {
|
||||
'politics': 'Politika',
|
||||
'business': 'Biznis',
|
||||
'sport': 'Sport',
|
||||
'magazine': 'Magazin',
|
||||
'scitech': 'Nauka i tehnologija',
|
||||
'other': 'Ostalo',
|
||||
}
|
||||
|
||||
if ttk > 1900:
|
||||
title_text = slice_title_if_needed(title_text)
|
||||
try:
|
||||
completion = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": "Data analytic, Journalist and News reporter"},
|
||||
{"role": "user", "content": rf"Extract relevant information from the following input: Title: {title_text}, Text: {text_text}. Remove any non-news element related to the current text and title and remove 'FOTO' and 'VIDEO' from title and text, from {category_options} select category in wich that news belong, and provide the cleaned data make sure that its on Bosnian language and valid JSON object with 'title' field, 'category' and 'content' field."}
|
||||
])
|
||||
generated_text = completion.choices[0].message.content
|
||||
|
||||
generated_text = repair_json(generated_text)
|
||||
|
||||
response_data = json.loads(generated_text)
|
||||
title = response_data["title"]
|
||||
predicted_category = response_data["category"]
|
||||
text = response_data["content"]
|
||||
|
||||
if predicted_category.lower() in category_options:
|
||||
category = predicted_category.lower()
|
||||
else:
|
||||
category = 'other'
|
||||
|
||||
category = category_translation.get(category, category.capitalize())
|
||||
|
||||
vector = embeddings.embed_query(generated_text)
|
||||
|
||||
print(f"Category: {category}")
|
||||
|
||||
if not is_similar_data(title, text, link, vector, threshold=0.98):
|
||||
similar_d = "NO"
|
||||
insert_data(title, text, link, vector,similar_d,category)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error in completion: {e}")
|
||||
continue
|
||||
|
||||
69
pyth/publishing_finals.py
Normal file
69
pyth/publishing_finals.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from slugify import slugify
|
||||
import random
|
||||
from db_management import get_ready_data,insert_final,get_existing_titles
|
||||
|
||||
def create_slug(title):
|
||||
base_slug = "{} {}".format(random.randint(1, 1000), title)
|
||||
slug = slugify(base_slug)
|
||||
return slug
|
||||
|
||||
def get_source_id(link,similar):
|
||||
if similar == "NO":
|
||||
if "srpskainfo" in link:
|
||||
return 1
|
||||
elif "klix" in link:
|
||||
return 2
|
||||
elif "bljesak" in link:
|
||||
return 3
|
||||
elif "blic" in link:
|
||||
return 4
|
||||
elif "index.hr" in link:
|
||||
return 6
|
||||
elif "avaz" in link:
|
||||
return 7
|
||||
elif "telegraf" in link:
|
||||
return 8
|
||||
elif "vijesti.me" in link:
|
||||
return 9
|
||||
elif "dnevnik.hr" in link:
|
||||
return 10
|
||||
elif "24sata.hr" in link:
|
||||
return 11
|
||||
else:
|
||||
return 0
|
||||
else:
|
||||
return 5
|
||||
|
||||
data = get_ready_data()
|
||||
|
||||
def remove_braces_and_quotes(text):
|
||||
final_text = text.replace('{"', '')
|
||||
final_text = final_text.replace('"}', '')
|
||||
|
||||
return final_text
|
||||
|
||||
|
||||
def publish_articles():
|
||||
for d in data:
|
||||
title = d[0]
|
||||
text = d[1]
|
||||
link = d[2]
|
||||
similar_d = d[4]
|
||||
category = d[5]
|
||||
slug = create_slug(title)
|
||||
source_id = get_source_id(link,similar_d)
|
||||
|
||||
check = get_existing_titles()
|
||||
|
||||
title_check = any(title in t for t in check)
|
||||
link_check = any(link in l for l in check)
|
||||
|
||||
if title_check or link_check:
|
||||
continue
|
||||
else:
|
||||
text = remove_braces_and_quotes(text)
|
||||
title = remove_braces_and_quotes(title)
|
||||
print(f"Source: {source_id}")
|
||||
print(f"Link: {link}")
|
||||
insert_final(title, text, slug, link, source_id, category)
|
||||
print(f"Publishing: {title}")
|
||||
@@ -1,146 +0,0 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from urllib.parse import urljoin
|
||||
from openai import OpenAI
|
||||
import os
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from vectData import (insert_data ,is_similar_data ,get_all_links,cleansing )
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
import tiktoken
|
||||
from json_repair import repair_json
|
||||
|
||||
load_dotenv()
|
||||
cleansing()
|
||||
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
client = OpenAI()
|
||||
embeddings = OpenAIEmbeddings()
|
||||
|
||||
dlinks = ['https://klix.ba', 'https://srpskainfo.com', 'https://bljesak.info']
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1.1; SM-G928X Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83 Mobile Safari/537.36'}
|
||||
|
||||
def num_tokens_from_string(string: str, model="gpt-3.5-turbo") -> int:
|
||||
encoding = tiktoken.encoding_for_model(model)
|
||||
return len(encoding.encode(string))
|
||||
|
||||
def slice_text_at_2k_tokens(text):
|
||||
encoding_name = "gpt-3.5-turbo"
|
||||
max_tokens = 1950
|
||||
encoding = tiktoken.encoding_for_model(encoding_name)
|
||||
tokens = encoding.encode(text)
|
||||
if len(tokens) <= max_tokens:
|
||||
return [text]
|
||||
sliced_tokens = tokens[:max_tokens]
|
||||
sliced_text = encoding.decode(sliced_tokens)
|
||||
return sliced_text
|
||||
|
||||
def slice_title_if_needed(text):
|
||||
encoding_name = "gpt-3.5-turbo"
|
||||
max_tokens = 100
|
||||
encoding = tiktoken.encoding_for_model(encoding_name)
|
||||
tokens = encoding.encode(text)
|
||||
if len(tokens) <= max_tokens:
|
||||
return [text]
|
||||
sliced_tokens = tokens[:max_tokens]
|
||||
sliced_text = encoding.decode(sliced_tokens)
|
||||
return sliced_text
|
||||
|
||||
def replace_with_spaces(text):
|
||||
allowed_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzČčĆćDždžĐ𩹮ž0123456789 "
|
||||
cleaned_text = ''.join(char if char in allowed_chars else ' ' for char in text)
|
||||
return cleaned_text
|
||||
|
||||
def fix_links(links_set):
|
||||
modified_links = set()
|
||||
for link in links_set:
|
||||
if "www" in link:
|
||||
modified_link = link.replace("www.", "")
|
||||
modified_links.add(modified_link)
|
||||
else:
|
||||
modified_links.add(link)
|
||||
return modified_links
|
||||
|
||||
total_links = set()
|
||||
collected_news = set()
|
||||
|
||||
def get_article_links(url, already_checked):
|
||||
response = requests.get(url,headers)
|
||||
if response.status_code == 200:
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
articles = soup.find_all('article')
|
||||
link_store = []
|
||||
|
||||
for article in articles:
|
||||
links = article.find_all('a', href=True)
|
||||
for link in links:
|
||||
link_value = urljoin(url, link['href'])
|
||||
if link_value not in already_checked:
|
||||
link_store.append(link_value)
|
||||
already_checked.add(link_value)
|
||||
return link_store
|
||||
|
||||
already_checked = set()
|
||||
|
||||
for dlink in dlinks:
|
||||
temp_links = get_article_links(dlink, already_checked)
|
||||
if temp_links:
|
||||
total_links.update(temp_links)
|
||||
final_links = {item for item in total_links if item}
|
||||
|
||||
db_links = set(get_all_links())
|
||||
new_links = final_links - db_links
|
||||
final_links = new_links
|
||||
final_links = set(final_links)
|
||||
final_links = fix_links(final_links)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
for link in final_links:
|
||||
response = requests.get(link,headers)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
titles = soup.find_all(['h2', 'h1','h3'])
|
||||
title_text = ' '.join([title.get_text(strip=True) for title in titles])
|
||||
|
||||
texts = soup.find_all(['p'])
|
||||
text_text = ' '.join([text.get_text(strip=True) for text in texts])
|
||||
|
||||
text_text = text_text
|
||||
title_text = title_text
|
||||
|
||||
title_text = replace_with_spaces(title_text)
|
||||
|
||||
text_text = slice_text_at_2k_tokens(text_text)
|
||||
text_text = replace_with_spaces(str(text_text))
|
||||
|
||||
ttk = num_tokens_from_string(text_text)
|
||||
|
||||
if ttk > 1900:
|
||||
title_text = slice_title_if_needed(title_text)
|
||||
try:
|
||||
completion = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": "Data analytic, Journalist and News reporter"},
|
||||
{"role": "user", "content": rf"Extract relevant information from the following input: Title: {title_text}, Text: {text_text}. Remove any non-news element related to the current text and title, and provide the cleaned data make sure that its valid JSON object with 'title' field and 'content' field."}
|
||||
]
|
||||
)
|
||||
generated_text = completion.choices[0].message.content
|
||||
|
||||
generated_text = repair_json(generated_text)
|
||||
|
||||
response_data = json.loads(generated_text)
|
||||
title = response_data["title"]
|
||||
text = response_data["content"]
|
||||
vector = embeddings.embed_query(generated_text)
|
||||
|
||||
if not is_similar_data(title, text, link, vector, threshold=0.98):
|
||||
similar_d = "NO"
|
||||
insert_data(title, text, link, vector,similar_d)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error in completion: {e}")
|
||||
continue
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Test Pyth</title>
|
||||
</head>
|
||||
<body>
|
||||
<div>
|
||||
<article>
|
||||
<h2>Test Title 1</h2>
|
||||
<p>Test Text 1</p>
|
||||
<a href="/article/one"> First</a>
|
||||
</article>
|
||||
<article>
|
||||
<h2>Test Title 2</h2>
|
||||
<p>Test Text 2</p>
|
||||
<a href="/article/two">Second</a>
|
||||
</article>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,12 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Article</title>
|
||||
</head>
|
||||
<body>
|
||||
<h2>Test Title</h2>
|
||||
<p>Test Text</p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,12 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Article</title>
|
||||
</head>
|
||||
<body>
|
||||
<h2>Test Title</h2>
|
||||
<p>Test Text</p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -7,7 +7,7 @@ from langchain.vectorstores.pgvector import PGVector
|
||||
from openai import OpenAI
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapingsingle import get_article_links, insert_data, is_similar_data
|
||||
from pyth.get_articles import get_article_links, insert_data, is_similar_data
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -2,7 +2,7 @@ import unittest
|
||||
import numpy as np
|
||||
import psycopg2
|
||||
import os
|
||||
from vectData import calculate_cosine_similarity, is_similar_data, insert_data, get_data, create_db
|
||||
from pyth.db_management import calculate_cosine_similarity, is_similar_data, insert_data, get_data, create_db
|
||||
|
||||
class TestIntegration(unittest.TestCase):
|
||||
host = os.getenv("DB_HOST")
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
from flask import Flask , render_template , jsonify
|
||||
from vectData import get_ready_data
|
||||
from flask_cors import CORS
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
CORS(app)
|
||||
|
||||
@app.route('/')
|
||||
def index() :
|
||||
return render_template("index.html")
|
||||
|
||||
|
||||
@app.route('/article/one')
|
||||
def articleone():
|
||||
return render_template("one.html")
|
||||
|
||||
|
||||
@app.route('/article/two')
|
||||
def articletwo():
|
||||
return render_template("two.html")
|
||||
|
||||
@app.route('/data/get/news', methods=['GET'])
|
||||
def takenews():
|
||||
data = get_ready_data()
|
||||
return jsonify(data)
|
||||
|
||||
app.run(debug=True)
|
||||
4
run.sh
Normal file
4
run.sh
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
exec kill -9 $(lsof -t -i:8080)
|
||||
exec go run cmd/web/web.go
|
||||
@@ -1,16 +1,39 @@
|
||||
{{define "articlesHTML"}}
|
||||
<ol reversed>
|
||||
{{range .articles}}
|
||||
<li>
|
||||
<div class="article_content">
|
||||
<a href="/{{.ID}}/{{.Slug}}">
|
||||
{{.Title}}</a></div>
|
||||
<div class="timestamp">{{.SourceName}} - {{ .FormatedCreatedAt }}</div>
|
||||
</li>
|
||||
<br><br>
|
||||
{{else}}
|
||||
Nema članaka za izabrani datum.
|
||||
{{end}}
|
||||
|
||||
</ol>
|
||||
{{range .articles}}
|
||||
<article class="news-article">
|
||||
<div class="article_content">
|
||||
<div class="ar-title">
|
||||
<a href="/{{.ID}}/{{.Slug}}">
|
||||
{{.Title}}
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
<a href="/{{.ID}}/{{.Slug}}">
|
||||
<div class="prewi" data-content="{{.Content}}"></div>
|
||||
</a>
|
||||
<div class="timestamp"> starenovine - {{ .FormatedCreatedAt }} - {{.Category}}</div>
|
||||
</article>
|
||||
{{else}}
|
||||
<div class="prewi">
|
||||
Nema članaka za izabrani datum.
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function createPewiev(content) {
|
||||
let slicedContent = content.slice(0,200);
|
||||
if (content.length > 200){
|
||||
slicedContent += '...'
|
||||
}
|
||||
return slicedContent
|
||||
}
|
||||
|
||||
let previewDivs = document.querySelectorAll('.prewi')
|
||||
previewDivs.forEach(function(previewDiv){
|
||||
let content = previewDiv.getAttribute('data-content')
|
||||
previewDiv.textContent = createPewiev(content)
|
||||
})
|
||||
</script>
|
||||
{{end}}
|
||||
|
||||
39
web/data/categoryarticles.html
Normal file
39
web/data/categoryarticles.html
Normal file
@@ -0,0 +1,39 @@
|
||||
{{define "articlecategoryHTML"}}
|
||||
<h3 class="category">{{.title}}</h3>
|
||||
{{range .articles}}
|
||||
<article class="news-article">
|
||||
<div class="article_content">
|
||||
<div class="ar-title">
|
||||
<a href="/{{.ID}}/{{.Slug}}">
|
||||
{{.Title}}
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
<a href="/{{.ID}}/{{.Slug}}">
|
||||
<div class="prewi" data-content="{{.Content}}" data-title="{{.Title}}" data-link="/{{.ID}}/{{.Slug}}"></div>
|
||||
</a>
|
||||
<div class="timestamp"> starenovine - {{ .FormatedCreatedAt }} - {{.Category}}</div>
|
||||
</article>
|
||||
{{else}}
|
||||
<div class="prewi">
|
||||
Nema članaka za izabrani datum.
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
<script>
|
||||
function createPewiev(content) {
|
||||
let slicedContent = content.slice(0,200);
|
||||
if (content.length > 200){
|
||||
slicedContent += '...'
|
||||
}
|
||||
return slicedContent
|
||||
}
|
||||
|
||||
let previewDivs = document.querySelectorAll('.prewi')
|
||||
previewDivs.forEach(function(previewDiv){
|
||||
let content = previewDiv.getAttribute('data-content')
|
||||
previewDiv.textContent = createPewiev(content)
|
||||
})
|
||||
</script>
|
||||
|
||||
{{end}}
|
||||
52
web/data/categorymenu.html
Normal file
52
web/data/categorymenu.html
Normal file
@@ -0,0 +1,52 @@
|
||||
{{define "categorymenuHTML"}}
|
||||
<nav class="hed">
|
||||
<div id="small-menu" onclick="handleSmallMenu();myFunction(this)">
|
||||
<div class="menu-icon">
|
||||
<div class="bar1"></div>
|
||||
<div class="bar2"></div>
|
||||
<div class="bar3"></div>
|
||||
</div>
|
||||
<p>Menu</p>
|
||||
</div>
|
||||
<div class="menu">
|
||||
<a href="/">
|
||||
<div class="home-icon" title="Pocetna">
|
||||
<div class="home-text">Pocetna</div>
|
||||
<i class="fa fa-home" style="font-size:48px;color:white"></i>
|
||||
</div>
|
||||
</a>
|
||||
{{range .categories}}
|
||||
<a href="/{{ . }}">
|
||||
<div class="home-icon" title="{{ . }}">
|
||||
{{ . }}
|
||||
</div>
|
||||
</a>
|
||||
{{end}}
|
||||
</div>
|
||||
</nav>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
|
||||
function handleSmallMenu (){
|
||||
let menu = document.querySelector('.menu')
|
||||
menu.classList.toggle('show-menu')
|
||||
}
|
||||
function myFunction(x) {
|
||||
x.classList.toggle("change");
|
||||
}
|
||||
|
||||
const handleScroll = function(event) {
|
||||
const top = window.scrollY;
|
||||
const header = document.querySelector('.hed');
|
||||
const headerBottom = header.offsetTop + header.offsetHeight;
|
||||
|
||||
if (top >= headerBottom) {
|
||||
header.classList.add('fixed');
|
||||
} else {
|
||||
header.classList.remove('fixed');
|
||||
}
|
||||
}
|
||||
window.addEventListener('scroll', handleScroll);
|
||||
</script>
|
||||
{{end}}
|
||||
@@ -1,13 +1,11 @@
|
||||
{{define "footerHTML"}}
|
||||
|
||||
<footer>
|
||||
SN
|
||||
<div>
|
||||
<nav>
|
||||
<a href="{{.previous}}"><----</a> |
|
||||
<a href="/">Početna</a> |
|
||||
<a href="{{.next}}">----></a>
|
||||
</nav>
|
||||
</div>
|
||||
<center>
|
||||
<div class="fot">
|
||||
<p>starenovine 2023-2024</p>
|
||||
</div>
|
||||
</center>
|
||||
</footer>
|
||||
|
||||
{{end}}
|
||||
|
||||
23
web/data/fullweather.html
Normal file
23
web/data/fullweather.html
Normal file
@@ -0,0 +1,23 @@
|
||||
{{define "fullweatherHTML"}}
|
||||
|
||||
<h2 class="w-title">{{.title}}</h2>
|
||||
<div class="weather-container">
|
||||
{{range .weatherInfo}}
|
||||
<div class="weather-w">
|
||||
<h3>{{.Name}}</h3>
|
||||
{{with index .Weather 0}}
|
||||
<div class="weather-info">Opis: {{.Description}}</div>
|
||||
{{end}}
|
||||
<div class="weather-info">Temperatura: {{.Main.Temp}}°C</div>
|
||||
<div class="weather-info">Osecaj: {{.Main.FellsLike}}°C</div>
|
||||
<div class="weather-info">Pritisak:{{.Main.Preassure}} hPa</div>
|
||||
<div class="weather-info">Vlaznost: {{.Main.Humidity}}%</div>
|
||||
<div class="weather-info">Min Temp: {{.Main.TempMin}}°C</div>
|
||||
<div class="weather-info">Max Temp: {{.Main.TempMax}}°C</div>
|
||||
<div class="weather-info">Vetar: {{.Wind.Speed}} m/s</div>
|
||||
<div class="weather-info">Oblaci: {{.Clouds.All}}%</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
|
||||
{{end}}
|
||||
@@ -4,7 +4,7 @@
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=Edge">
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||||
<meta name="viewport" content="width=device-width,initial-scale=1.0">
|
||||
<meta property="og:site_name" content="starenovine">
|
||||
<meta name="twitter:card" content="preview">
|
||||
<meta property="og:title" content="{{.title}}">
|
||||
@@ -12,43 +12,468 @@
|
||||
<meta property="og:url" content="https://www.starenovine.com">
|
||||
<title>{{.title}} - stare novine</title>
|
||||
<link rel="canonical" href="https://www.starenovine.com/">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
|
||||
<style type="text/css">
|
||||
body {
|
||||
font-family: monospace;
|
||||
font-size: 1.5em;
|
||||
width: 90%;
|
||||
max-width: 98vw;
|
||||
}
|
||||
:root {
|
||||
--ease: cubic-bezier(.2, 1, .3, 1);
|
||||
--icon-size: 60px;
|
||||
--border-radius: 0.5;
|
||||
--background: #2d2c3e;
|
||||
--background-b: #2d2c3e;
|
||||
--green: #16A085;
|
||||
--white: #FFFFFF;
|
||||
}
|
||||
|
||||
h1#title {
|
||||
margin-block-end: 0;
|
||||
font-size: 1.7em;
|
||||
}
|
||||
body {
|
||||
font-size: 1.2em;
|
||||
margin: 0 auto;
|
||||
overflow-x: hidden;
|
||||
background-color: black;
|
||||
}
|
||||
.category{
|
||||
color: white;
|
||||
width: 100%;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.timestamp {
|
||||
font-size: 0.8em;
|
||||
color: gray;
|
||||
}
|
||||
h1#title {
|
||||
background-color: #0B173B;
|
||||
color: white;
|
||||
margin-block-end: 0;
|
||||
font-size: 1.7em;
|
||||
padding-left: 5px;
|
||||
}
|
||||
|
||||
.single_timestamp {
|
||||
font-size: 0.77em;
|
||||
margin-bottom: 0.7em;
|
||||
color: gray;
|
||||
}
|
||||
.logo {
|
||||
text-decoration: none;
|
||||
color: black;
|
||||
}
|
||||
|
||||
#logo {
|
||||
font-size: 2vw;
|
||||
background: white;
|
||||
}
|
||||
.timestamp {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
font-size: 0.8em;
|
||||
color: gray;
|
||||
}
|
||||
|
||||
pre.article_content {
|
||||
background: white;
|
||||
.prewi{
|
||||
color: white;
|
||||
padding-top: 5px;
|
||||
padding-bottom: 2px;
|
||||
margin-left: 5px;
|
||||
}
|
||||
|
||||
.empty {
|
||||
width: 89vw;
|
||||
color: black;
|
||||
box-shadow: 0 4px 8px #0000004d;
|
||||
border: 1px solid black;
|
||||
background: linear-gradient(90deg, rgba(231, 214, 197, 1) 0%, rgba(241, 234, 227, 1) 100%);
|
||||
padding-top: 10px;
|
||||
padding-bottom: 10px;
|
||||
}
|
||||
|
||||
.ar-title {
|
||||
background-color: #0B173B;
|
||||
width: 100%;
|
||||
color: black;
|
||||
padding-top: 10px;
|
||||
padding-bottom: 10px;
|
||||
border-bottom: 3px solid black;
|
||||
}
|
||||
.w-title{
|
||||
color:white;
|
||||
}
|
||||
|
||||
.ar-title > a {
|
||||
font-size: 1.2em;
|
||||
font-weight: 500;
|
||||
text-decoration: none;
|
||||
color: white;
|
||||
padding-left: 5px;
|
||||
}
|
||||
|
||||
.article_content {
|
||||
background-color: black;
|
||||
gap: 4px;
|
||||
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
|
||||
}
|
||||
|
||||
.single_timestamp {
|
||||
font-size: 0.77em;
|
||||
margin-bottom: 0.7em;
|
||||
color: gray;
|
||||
padding-top: 10px;
|
||||
padding-bottom: 10px;
|
||||
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
|
||||
}
|
||||
|
||||
#logo {
|
||||
width: 100%;
|
||||
font-size: 1.85vw;
|
||||
background: linear-gradient(90deg, rgba(1, 1, 1, 1) 0%, rgba(11, 23, 59, 1) 50%, rgba(0, 0, 0, 1) 100%);
|
||||
box-shadow: 2px 0 20px 6px rgba(11, 23, 59, 0.2);
|
||||
color: white;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.list {
|
||||
display: grid;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
list-style: none;
|
||||
margin-top: 15px;
|
||||
gap: 15px;
|
||||
}
|
||||
|
||||
.news-article > a {
|
||||
background: linear-gradient(90deg, rgba(231, 214, 197, 1) 0%, rgba(232, 232, 232, 1) 51%, rgba(231, 214, 197, 1) 100%);
|
||||
color: black;
|
||||
}
|
||||
.news-article:hover .ar-title{
|
||||
background-color: #FF0000;
|
||||
}
|
||||
|
||||
.article_body {
|
||||
color: white;
|
||||
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
|
||||
padding-left: 5px;
|
||||
}
|
||||
|
||||
.fot {
|
||||
display: flex;
|
||||
background-color: #0B173B;
|
||||
text-decoration: none;
|
||||
color: white;
|
||||
border-radius: 5px;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
padding-top: 5px;
|
||||
padding-bottom: 5px;
|
||||
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
|
||||
gap: 5%;
|
||||
}
|
||||
|
||||
.fot > a {
|
||||
color: white;
|
||||
gap: 10px;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.hed {
|
||||
background-color: #0B173B;
|
||||
color: white;
|
||||
border-radius: 5px;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
|
||||
gap: 5%;
|
||||
transition: background-color 0.5s var(--ease);
|
||||
}
|
||||
|
||||
.hed.fixed {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
width: 100%;
|
||||
z-index: 1000;
|
||||
box-shadow: 0 4px 12px -4px rgba(255, 65, 54, 0.5);
|
||||
}
|
||||
|
||||
.hed > a {
|
||||
color: white;
|
||||
gap: 10px;
|
||||
text-decoration: none;
|
||||
}
|
||||
.article_content > a {
|
||||
color: white;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.news-article {
|
||||
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
|
||||
border: 1px solid;
|
||||
border-image: linear-gradient(90deg, rgba(8, 8, 8, 1) 0%, rgba(179, 150, 121, 1) 21%, rgba(0, 0, 0, 1) 37%, rgba(98, 87, 75, 1) 63%, rgba(0, 0, 0, 1) 100%);
|
||||
border-image-slice: 1;
|
||||
}
|
||||
|
||||
pre.article_content {
|
||||
background: white;
|
||||
}
|
||||
|
||||
.news-article > a {
|
||||
text-decoration: none;
|
||||
color: black;
|
||||
}
|
||||
|
||||
#weather {
|
||||
display: flex;
|
||||
width: 100%;
|
||||
height: fit-content;
|
||||
justify-content: space-between;
|
||||
}
|
||||
.weather-container {
|
||||
display: flex;
|
||||
justify-content: space-around;
|
||||
margin-top: 50px;
|
||||
color: white;
|
||||
}
|
||||
.weather-container > div {
|
||||
text-size-adjust: auto;
|
||||
}
|
||||
.weather-w {
|
||||
border: 1px solid #ccc;
|
||||
width: 20%;
|
||||
text-align: center;
|
||||
color: white;
|
||||
background-color: #0B173B;
|
||||
}
|
||||
.weather-w h3 {
|
||||
margin-top: 0;
|
||||
color: white;
|
||||
}
|
||||
.weather-info {
|
||||
margin-top: 10px;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.w-link{
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.weather-widget {
|
||||
width: 19%;
|
||||
height: 50px;
|
||||
display: grid;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
color: white;
|
||||
background-color: #0B173B;
|
||||
border: 1px solid;
|
||||
border-image: linear-gradient(90deg, rgba(8, 8, 8, 1) 0%, rgba(179, 150, 121, 1) 21%, rgba(0, 0, 0, 1) 37%, rgba(98, 87, 75, 1) 63%, rgba(0, 0, 0, 1) 100%);
|
||||
border-image-slice: 1;
|
||||
box-shadow: 0 4px 12px -4px rgba(255, 65, 54, 0.5);
|
||||
}
|
||||
|
||||
.weather-widget > div {
|
||||
font-size: 10;
|
||||
}
|
||||
|
||||
.weather-widget > div > span {
|
||||
font-size: 8;
|
||||
}
|
||||
|
||||
#small-menu{
|
||||
display: none;
|
||||
}
|
||||
|
||||
.menu{
|
||||
width: 100%;
|
||||
text-decoration: none;
|
||||
color: white;
|
||||
display: flex;
|
||||
justify-content: space-evenly;
|
||||
align-items: center;
|
||||
padding-top: 5px;
|
||||
padding-bottom: 5px;
|
||||
}
|
||||
.menu > a {
|
||||
color: white;
|
||||
gap: 10px;
|
||||
text-decoration: none;
|
||||
}
|
||||
.home-text{
|
||||
display: none;
|
||||
}
|
||||
|
||||
|
||||
@media only screen and (max-width: 600px) {
|
||||
#small-menu{
|
||||
display: flex;
|
||||
margin-left: 20px;
|
||||
align-items: center;
|
||||
gap: 5px;
|
||||
}
|
||||
.fa-home{
|
||||
display: none;
|
||||
}
|
||||
.home-text{
|
||||
display: block;
|
||||
}
|
||||
|
||||
.menu{
|
||||
display: none;
|
||||
}
|
||||
.menu.show-menu{
|
||||
width: 100%;
|
||||
text-decoration: none;
|
||||
color: white;
|
||||
display: grid;
|
||||
transition: background-color 0.5s var(--ease);
|
||||
margin-left: 50px;
|
||||
justify-content: left;
|
||||
margin-top: 5px;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
.hed{
|
||||
height: fit-content;
|
||||
display: grid;
|
||||
gap: 0 ;
|
||||
justify-content: left;
|
||||
}
|
||||
.menu.show-menu > a{
|
||||
text-decoration: none;
|
||||
border-image: linear-gradient(90deg, rgba(8, 8, 8, 1) 0%, rgba(179, 150, 121, 1) 21%, rgba(0, 0, 0, 1) 37%, rgba(98, 87, 75, 1) 63%, rgba(0, 0, 0, 1) 100%);
|
||||
}
|
||||
.home-icon {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
width: 10px;
|
||||
height: 10px;
|
||||
background-color: #0B173B;
|
||||
border-radius: 50%;
|
||||
cursor: pointer;
|
||||
transition: background-color 0.5s var(--ease);
|
||||
}
|
||||
|
||||
#weather {
|
||||
display: grid;
|
||||
margin-bottom: 4%;
|
||||
}
|
||||
.weather-widget {
|
||||
width: 90vw;
|
||||
height: 20px;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
}
|
||||
.weather-container {
|
||||
display: grid;
|
||||
color: white;
|
||||
gap: 2%;
|
||||
}
|
||||
.weather-w{
|
||||
width: 100vw;
|
||||
}
|
||||
}
|
||||
|
||||
html {
|
||||
margin: 0 auto;
|
||||
max-width: 98vw;
|
||||
overflow-x: hidden;
|
||||
margin: 0 auto;
|
||||
width: 90%;
|
||||
overflow-x: hidden;
|
||||
}
|
||||
|
||||
.arr-pr-nx {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.arr-pr-nx svg {
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
margin: 0 1rem;
|
||||
cursor: pointer;
|
||||
overflow: visible;
|
||||
fill: white;
|
||||
}
|
||||
|
||||
.arr-pr-nx svg polygon,
|
||||
.arr-pr-nx path {
|
||||
transition: all 0.5s var(--ease);
|
||||
}
|
||||
|
||||
.arr-pr-nx svg:hover polygon,
|
||||
.arr-pr-nx svg:hover path {
|
||||
transition: all 1s var(--ease);
|
||||
fill: #FF0000;
|
||||
}
|
||||
|
||||
.arr-pr-nx svg:hover .arrow {
|
||||
animation: arrow-anim 2.5s var(--ease) infinite;
|
||||
}
|
||||
|
||||
.arr-pr-nx svg:hover .arrow-fixed {
|
||||
animation: arrow-fixed-anim 2.5s var(--ease) infinite;
|
||||
}
|
||||
|
||||
@keyframes arrow-anim {
|
||||
0% {
|
||||
opacity: 1;
|
||||
transform: translateX(0);
|
||||
}
|
||||
5% {
|
||||
transform: translateX(-0.1rem);
|
||||
}
|
||||
100% {
|
||||
transform: translateX(1rem);
|
||||
opacity: 0;
|
||||
}
|
||||
}
|
||||
|
||||
@keyframes arrow-fixed-anim {
|
||||
5% {
|
||||
opacity: 0;
|
||||
}
|
||||
20% {
|
||||
opacity: 0.4;
|
||||
}
|
||||
100% {
|
||||
opacity: 1;
|
||||
}
|
||||
}
|
||||
.home-icon {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
background-color: #0B173B;
|
||||
border-radius: 50%;
|
||||
cursor: pointer;
|
||||
transition: background-color 0.5s var(--ease);
|
||||
}
|
||||
|
||||
.home-icon:hover {
|
||||
background-color: #FF0000;
|
||||
}
|
||||
|
||||
.home-icon i {
|
||||
font-size: 2rem;
|
||||
color: white;
|
||||
transition: color 0.5s var(--ease);
|
||||
}
|
||||
|
||||
.home-icon:hover i {
|
||||
color: #FF0000;
|
||||
}
|
||||
.menu-icon {
|
||||
display: inline-block;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.bar1, .bar2, .bar3 {
|
||||
width: 35px;
|
||||
height: 5px;
|
||||
background-color: white;
|
||||
margin: 6px 0;
|
||||
transition: 0.4s;
|
||||
}
|
||||
|
||||
.change .bar1 {
|
||||
transform: translate(0, 11px) rotate(-45deg);
|
||||
}
|
||||
|
||||
.change .bar2 {opacity: 0;}
|
||||
|
||||
.change .bar3 {
|
||||
transform: translate(0, -11px) rotate(45deg);
|
||||
}
|
||||
|
||||
</style>
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
{{define "headerHTML"}}
|
||||
<header>
|
||||
<center>
|
||||
<a href="/" class="logo">
|
||||
<pre id="logo">
|
||||
_____ ______ ____ ____ ___ ____ ___ __ __ ____ ____ ___
|
||||
/ ___/| | / || \ / _]| \ / \ | | || || \ / _]
|
||||
@@ -10,11 +12,10 @@
|
||||
\___| |__| |__|__||__|\_||_____||__|__| \___/ \_/ |____||__|__||_____|
|
||||
|
||||
</pre>
|
||||
<br>
|
||||
<nav>
|
||||
<a href="{{.previous}}"><----</a> |
|
||||
<a href="/">Početna</a> |
|
||||
<a href="{{.next}}">----></a>
|
||||
</nav>
|
||||
</a>
|
||||
</center>
|
||||
|
||||
{{template "categorymenuHTML" .}}
|
||||
|
||||
</header>
|
||||
{{end}}
|
||||
|
||||
27
web/data/prevnext.html
Normal file
27
web/data/prevnext.html
Normal file
@@ -0,0 +1,27 @@
|
||||
{{define "prevnextHTML"}}
|
||||
<nav class="fot">
|
||||
<a href="{{.previous}}">
|
||||
<div class="arr-pr-nx" title="Prethodna">
|
||||
<svg width="18px" height="17px" viewBox="0 0 18 17" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g id="prev" transform="translate(8.500000, 8.500000) scale(-1, 1) translate(-8.500000, -8.500000)">
|
||||
<polygon class="arrow" points="16.3746667 8.33860465 7.76133333 15.3067621 6.904 14.3175671 14.2906667 8.34246869 6.908 2.42790698 7.76 1.43613596"></polygon>
|
||||
<polygon class="arrow-fixed" points="16.3746667 8.33860465 7.76133333 15.3067621 6.904 14.3175671 14.2906667 8.34246869 6.908 2.42790698 7.76 1.43613596"></polygon>
|
||||
<path d="M-1.48029737e-15,0.56157424 L-1.48029737e-15,16.1929159 L9.708,8.33860465 L-2.66453526e-15,0.56157424 L-1.48029737e-15,0.56157424 Z M1.33333333,3.30246869 L7.62533333,8.34246869 L1.33333333,13.4327013 L1.33333333,3.30246869 L1.33333333,3.30246869 Z"></path>
|
||||
</g>
|
||||
</svg>
|
||||
</div>
|
||||
</a>
|
||||
<a href="{{.next}}">
|
||||
<div class="arr-pr-nx" title="Sledeca">
|
||||
<svg width="18px" height="17px" viewBox="-1 0 18 17" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<g>
|
||||
<polygon class="arrow" points="16.3746667 8.33860465 7.76133333 15.3067621 6.904 14.3175671 14.2906667 8.34246869 6.908 2.42790698 7.76 1.43613596"></polygon>
|
||||
<polygon class="arrow-fixed" points="16.3746667 8.33860465 7.76133333 15.3067621 6.904 14.3175671 14.2906667 8.34246869 6.908 2.42790698 7.76 1.43613596"></polygon>
|
||||
<path d="M-4.58892184e-16,0.56157424 L-4.58892184e-16,16.1929159 L9.708,8.33860465 L-1.64313008e-15,0.56157424 L-4.58892184e-16,0.56157424 Z M1.33333333,3.30246869 L7.62533333,8.34246869 L1.33333333,13.4327013 L1.33333333,3.30246869 L1.33333333,3.30246869 Z"></path>
|
||||
</g>
|
||||
</svg>
|
||||
</div>
|
||||
</a>
|
||||
</nav>
|
||||
|
||||
{{end}}
|
||||
@@ -2,7 +2,7 @@
|
||||
{{with .article }}
|
||||
<div class="article_content">
|
||||
<h1 id="title">{{.Title}}</h1>
|
||||
<div class="single_timestamp">{{.SourceName}} - {{ .FormatedCreatedAt }}</div>
|
||||
<div class="single_timestamp"> starenovine - {{ .FormatedCreatedAt }}</div>
|
||||
<div class="article_body">
|
||||
{{.Content}}
|
||||
</div>
|
||||
|
||||
14
web/data/weatherwidget.html
Normal file
14
web/data/weatherwidget.html
Normal file
@@ -0,0 +1,14 @@
|
||||
{{define "weatherwidgetHTML"}}
|
||||
<br>
|
||||
<a class="w-link" href="/weather">
|
||||
<div id="weather">
|
||||
{{range .weatherInfo}}
|
||||
<div class="weather-widget">
|
||||
<div><span id="city">{{.Name}}</span></div>
|
||||
<div id="temperature">{{.Main.Temp}} °C</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
</a>
|
||||
<br>
|
||||
{{end}}
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
{{template "singleArticleHTML" .}}
|
||||
|
||||
<br>
|
||||
{{template "prevnextHTML" .}}
|
||||
{{template "footerHTML" .}}
|
||||
</body>
|
||||
</html>
|
||||
|
||||
12
web/tpl/category.html
Normal file
12
web/tpl/category.html
Normal file
@@ -0,0 +1,12 @@
|
||||
{{define "categoryHTML"}}
|
||||
|
||||
{{template "headHTML" .}}
|
||||
<body>
|
||||
{{template "headerHTML" .}}
|
||||
{{template "articlecategoryHTML" .}}
|
||||
|
||||
<br>
|
||||
{{template "footerHTML" .}}
|
||||
</body>
|
||||
</html>
|
||||
{{end}}
|
||||
@@ -4,8 +4,11 @@
|
||||
<body>
|
||||
{{template "headerHTML" .}}
|
||||
|
||||
{{template "articlesHTML" .}}
|
||||
{{template "weatherwidgetHTML"}}
|
||||
|
||||
{{template "articlesHTML" .}}
|
||||
<br>
|
||||
{{template "prevnextHTML" .}}
|
||||
{{template "footerHTML" .}}
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
{{define "homeHTML"}}
|
||||
|
||||
{{template "headHTML" .}}
|
||||
<body>
|
||||
{{template "headerHTML" .}}
|
||||
|
||||
{{template "articlesHTML" .}}
|
||||
{{template "weatherwidgetHTML" .}}
|
||||
|
||||
{{template "articlesHTML" .}}
|
||||
<br>
|
||||
{{template "prevnextHTML" .}}
|
||||
{{template "footerHTML" .}}
|
||||
</body>
|
||||
</html>
|
||||
|
||||
12
web/tpl/weather.html
Normal file
12
web/tpl/weather.html
Normal file
@@ -0,0 +1,12 @@
|
||||
{{define "weatherHTML"}}
|
||||
|
||||
{{template "headHTML" .}}
|
||||
<body>
|
||||
{{template "headerHTML" .}}
|
||||
|
||||
{{template "fullweatherHTML" .}}
|
||||
|
||||
{{template "footerHTML" .}}
|
||||
</body>
|
||||
</html>
|
||||
{{end}}
|
||||
Reference in New Issue
Block a user