import psycopg2 from psycopg2 import sql from pgvector.psycopg2 import register_vector from sklearn.metrics.pairwise import cosine_similarity import numpy as np host = 'localhost' port = '5432' user = 'postgres' password = 'salmonela pljusti 221 hamo' dbname = 'vector_svw' def calculate_cosine_similarity(v1, v2): v1_normalized = v1 / np.linalg.norm(v1) v2_normalized = v2 / np.linalg.norm(v2) similarity = cosine_similarity([v1_normalized], [v2_normalized])[0][0] return similarity def is_similar_data(title, text, link, embedding, threshold=0.9): conn = psycopg2.connect( host=host, port=port, user=user, password=password, dbname=dbname ) cursor = conn.cursor() cursor.execute('SELECT title ,embedding FROM vectorsvevijesti;') existing_embeddings = cursor.fetchall() for existing_embedding_tuple in existing_embeddings: existing_title = existing_embedding_tuple[0] existing_embedding = np.array(existing_embedding_tuple[1]).flatten() similarity = calculate_cosine_similarity(existing_embedding, embedding) if similarity > threshold: print(f"Similar data found: \n #{title} \n #{existing_title}") cursor.close() conn.close() return True print(f"Inserting: #{title}") cursor.close() conn.close() return False def insert_data(title, text, link, embedding): conn = psycopg2.connect( host=host, port=port, user=user, password=password, dbname=dbname ) cursor = conn.cursor() cursor.execute(''' INSERT INTO vectorsvevijesti (title, text, link, embedding) VALUES (%s, %s, %s, %s); ''', (title, text, link, embedding)) conn.commit() cursor.close() conn.close() def get_data(): conn = psycopg2.connect( host=host, port=port, user=user, password=password, dbname=dbname ) cursor = conn.cursor() query = '''SELECT title,text,link FROM vectorsvevijesti;''' cursor.execute(query) data = cursor.fetchall() cursor.close() conn.close() return data def create_db(): conn = psycopg2.connect( host=host, port=port, user=user, password=password, dbname=dbname ) cursor = conn.cursor() cursor.execute("CREATE EXTENSION IF NOT EXISTS vector") register_vector(conn) cursor.execute("DROP TABLE IF EXISTS vectorsvevijesti;") cursor.execute(''' CREATE TABLE vectorsvevijesti ( id bigserial PRIMARY KEY, title VARCHAR, text VARCHAR, link VARCHAR, embedding vector(1536) ); ''') conn.commit() cursor.close() conn.close() create_db()