Files
old-svevijesti/pyth/checkforsimilar.py
2023-12-18 16:51:47 +01:00

75 lines
2.0 KiB
Python

import psycopg2
from openai import OpenAI
from datetime import datetime, timedelta, timezone
client = OpenAI(api_key='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')
import spacy
nlp = spacy.load("en_core_web_sm")
twenty_minutes_ago_utc = datetime.now(timezone.utc) - timedelta(minutes=20)
db_params = {
'host': 'localhost',
'port': '5432',
'database': 'svevijestiweb',
'user': 'svevijesti',
'password': 'salmonela pljusti 221 hamo'
}
conn = psycopg2.connect(**db_params)
cursor = conn.cursor()
def convert_text_to_vector(text):
return nlp(text).vector
def check_similarity_with_gpt3(text1, text2):
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a text similarity assistant."},
{"role": "user", "content": f"Compare the similarity between the following two texts:\n\nText 1: {text1}\nText 2: {text2}\n\nSimilarity:"}
]
)
similarity_score = completion.choices[0].message.content
print("Analiza")
return similarity_score
cursor.execute("SELECT title FROM articles WHERE articles.created_at < %s", (twenty_minutes_ago_utc,))
data_from_database = cursor.fetchall()
for i in range(len(data_from_database)):
for j in range(i + 1, len(data_from_database)):
text1 = data_from_database[i][0]
text2 = data_from_database[j][0]
vector1 = convert_text_to_vector(text1)
vector2 = convert_text_to_vector(text2)
similarity_score = check_similarity_with_gpt3(vector1, vector2 )
print(similarity_score)
print("T1",text1)
print("T2", text2)
similarity_threshold = 0.8
if similarity_score > similarity_threshold:
try:
cursor.execute("DELETE FROM articles WHERE content = %s", (text2,))
conn.commit()
print(f"Deleted rows where title is {text2}")
except Exception as e:
conn.rollback() # Roll back changes if an error occurs
print(f"Error deleting rows: {e}")
cursor.close()
conn.close()