75 lines
2.0 KiB
Python
75 lines
2.0 KiB
Python
import psycopg2
|
|
from openai import OpenAI
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
|
|
client = OpenAI(api_key='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')
|
|
import spacy
|
|
|
|
|
|
nlp = spacy.load("en_core_web_sm")
|
|
twenty_minutes_ago_utc = datetime.now(timezone.utc) - timedelta(minutes=20)
|
|
|
|
|
|
|
|
db_params = {
|
|
'host': 'localhost',
|
|
'port': '5432',
|
|
'database': 'svevijestiweb',
|
|
'user': 'svevijesti',
|
|
'password': 'salmonela pljusti 221 hamo'
|
|
}
|
|
|
|
|
|
conn = psycopg2.connect(**db_params)
|
|
cursor = conn.cursor()
|
|
|
|
def convert_text_to_vector(text):
|
|
return nlp(text).vector
|
|
|
|
def check_similarity_with_gpt3(text1, text2):
|
|
completion = client.chat.completions.create(
|
|
model="gpt-3.5-turbo",
|
|
messages=[
|
|
{"role": "system", "content": "You are a text similarity assistant."},
|
|
{"role": "user", "content": f"Compare the similarity between the following two texts:\n\nText 1: {text1}\nText 2: {text2}\n\nSimilarity:"}
|
|
]
|
|
)
|
|
similarity_score = completion.choices[0].message.content
|
|
print("Analiza")
|
|
return similarity_score
|
|
|
|
cursor.execute("SELECT title FROM articles WHERE articles.created_at < %s", (twenty_minutes_ago_utc,))
|
|
data_from_database = cursor.fetchall()
|
|
|
|
|
|
for i in range(len(data_from_database)):
|
|
for j in range(i + 1, len(data_from_database)):
|
|
text1 = data_from_database[i][0]
|
|
text2 = data_from_database[j][0]
|
|
|
|
vector1 = convert_text_to_vector(text1)
|
|
vector2 = convert_text_to_vector(text2)
|
|
|
|
similarity_score = check_similarity_with_gpt3(vector1, vector2 )
|
|
print(similarity_score)
|
|
print("T1",text1)
|
|
print("T2", text2)
|
|
|
|
|
|
similarity_threshold = 0.8
|
|
|
|
if similarity_score > similarity_threshold:
|
|
try:
|
|
cursor.execute("DELETE FROM articles WHERE content = %s", (text2,))
|
|
conn.commit()
|
|
print(f"Deleted rows where title is {text2}")
|
|
except Exception as e:
|
|
conn.rollback() # Roll back changes if an error occurs
|
|
print(f"Error deleting rows: {e}")
|
|
|
|
|
|
|
|
cursor.close()
|
|
conn.close()
|