import psycopg2 from openai import OpenAI from datetime import datetime, timedelta, timezone client = OpenAI(api_key='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') import spacy nlp = spacy.load("en_core_web_sm") twenty_minutes_ago_utc = datetime.now(timezone.utc) - timedelta(minutes=20) db_params = { 'host': 'localhost', 'port': '5432', 'database': 'svevijestiweb', 'user': 'svevijesti', 'password': 'salmonela pljusti 221 hamo' } conn = psycopg2.connect(**db_params) cursor = conn.cursor() def convert_text_to_vector(text): return nlp(text).vector def check_similarity_with_gpt3(text1, text2): completion = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a text similarity assistant."}, {"role": "user", "content": f"Compare the similarity between the following two texts:\n\nText 1: {text1}\nText 2: {text2}\n\nSimilarity:"} ] ) similarity_score = completion.choices[0].message.content print("Analiza") return similarity_score cursor.execute("SELECT title FROM articles WHERE articles.created_at < %s", (twenty_minutes_ago_utc,)) data_from_database = cursor.fetchall() for i in range(len(data_from_database)): for j in range(i + 1, len(data_from_database)): text1 = data_from_database[i][0] text2 = data_from_database[j][0] vector1 = convert_text_to_vector(text1) vector2 = convert_text_to_vector(text2) similarity_score = check_similarity_with_gpt3(vector1, vector2 ) print(similarity_score) print("T1",text1) print("T2", text2) similarity_threshold = 0.8 if similarity_score > similarity_threshold: try: cursor.execute("DELETE FROM articles WHERE content = %s", (text2,)) conn.commit() print(f"Deleted rows where title is {text2}") except Exception as e: conn.rollback() # Roll back changes if an error occurs print(f"Error deleting rows: {e}") cursor.close() conn.close()