diff --git a/pyth/avaz.py b/pyth/avaz.py deleted file mode 100644 index 898b5ea..0000000 --- a/pyth/avaz.py +++ /dev/null @@ -1,27 +0,0 @@ -import requests -from bs4 import BeautifulSoup - -def getNews(url): - response = requests.get(url) - - if response.status_code == 200: - soup = BeautifulSoup(response.text, 'html.parser') - - articles = soup.find_all('article', class_='news__item') - - for index, article in enumerate(articles, start=1): - title = article.find('h2').text.strip() - content = article.find('p').text.strip() - category = article.find('span').text.strip() - - print(f"{index}. Title: {title}") - print(f" Content: {content}") - print(f" Category: {category}") - print('****************************') - else: - print(f"Error. Status code: {response.status_code}") - -if __name__ == "__main__": - pUrl = 'https://srpskainfo.com/sve-vijesti/' - - getNews(pUrl) diff --git a/pyth/checkforsimilar.py b/pyth/checkforsimilar.py deleted file mode 100644 index 7c3875d..0000000 --- a/pyth/checkforsimilar.py +++ /dev/null @@ -1,74 +0,0 @@ -import psycopg2 -from openai import OpenAI -from datetime import datetime, timedelta, timezone - - -client = OpenAI(api_key='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') -import spacy - - -nlp = spacy.load("en_core_web_sm") -twenty_minutes_ago_utc = datetime.now(timezone.utc) - timedelta(minutes=20) - - - -db_params = { - 'host': 'localhost', - 'port': '5432', - 'database': 'svevijestiweb', - 'user': 'svevijesti', - 'password': 'salmonela pljusti 221 hamo' -} - - -conn = psycopg2.connect(**db_params) -cursor = conn.cursor() - -def convert_text_to_vector(text): - return nlp(text).vector - -def check_similarity_with_gpt3(text1, text2): - completion = client.chat.completions.create( - model="gpt-3.5-turbo", - messages=[ - {"role": "system", "content": "You are a text similarity assistant."}, - {"role": "user", "content": f"Compare the similarity between the following two texts:\n\nText 1: {text1}\nText 2: {text2}\n\nSimilarity:"} - ] - ) - similarity_score = completion.choices[0].message.content - print("Analiza") - return similarity_score - -cursor.execute("SELECT title FROM articles WHERE articles.created_at < %s", (twenty_minutes_ago_utc,)) -data_from_database = cursor.fetchall() - - -for i in range(len(data_from_database)): - for j in range(i + 1, len(data_from_database)): - text1 = data_from_database[i][0] - text2 = data_from_database[j][0] - - vector1 = convert_text_to_vector(text1) - vector2 = convert_text_to_vector(text2) - - similarity_score = check_similarity_with_gpt3(vector1, vector2 ) - print(similarity_score) - print("T1",text1) - print("T2", text2) - - - similarity_threshold = 0.8 - - if similarity_score > similarity_threshold: - try: - cursor.execute("DELETE FROM articles WHERE content = %s", (text2,)) - conn.commit() - print(f"Deleted rows where title is {text2}") - except Exception as e: - conn.rollback() # Roll back changes if an error occurs - print(f"Error deleting rows: {e}") - - - -cursor.close() -conn.close() diff --git a/pyth/srpskainfo.py b/pyth/srpskainfo.py deleted file mode 100644 index 88c73d9..0000000 --- a/pyth/srpskainfo.py +++ /dev/null @@ -1,34 +0,0 @@ -import requests -from bs4 import BeautifulSoup - -def getNews(url): - response = requests.get(url) - - if response.status_code == 200: - soup = BeautifulSoup(response.text, 'html.parser') - - articles = soup.find_all('article', class_='news__item') - - for index, article in enumerate(articles, start=1): - title = article.find('h2').text.strip() - content = article.find('p').text.strip() - category = article.find('span').text.strip() - slink = article.find('a') - if slink: - slink = slink.get('href', '') - else: - slink = '' - - - print(f"{index}. Title: {title}") - print(f" Content: {content}") - print(f" Category: {category}") - print(f"Link: {slink}") - print('****************************') - else: - print(f"Error. Status code: {response.status_code}") - -if __name__ == "__main__": - pUrl = 'https://srpskainfo.com/sve-vijesti/' - - getNews(pUrl)