Adding VDB
This commit is contained in:
115
pyth/vectData.py
Normal file
115
pyth/vectData.py
Normal file
@@ -0,0 +1,115 @@
|
||||
import psycopg2
|
||||
from psycopg2 import sql
|
||||
from pgvector.psycopg2 import register_vector
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
import numpy as np
|
||||
|
||||
host = 'localhost'
|
||||
port = '5432'
|
||||
user = 'postgres'
|
||||
password = 'salmonela pljusti 221 hamo'
|
||||
dbname = 'vector_svw'
|
||||
|
||||
def calculate_cosine_similarity(v1, v2):
|
||||
v1_normalized = v1 / np.linalg.norm(v1)
|
||||
v2_normalized = v2 / np.linalg.norm(v2)
|
||||
|
||||
similarity = cosine_similarity([v1_normalized], [v2_normalized])[0][0]
|
||||
return similarity
|
||||
|
||||
def is_similar_data(title, text, link, embedding, threshold=0.9):
|
||||
conn = psycopg2.connect(
|
||||
host=host,
|
||||
port=port,
|
||||
user=user,
|
||||
password=password,
|
||||
dbname=dbname
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('SELECT title ,embedding FROM vectorsvevijesti;')
|
||||
existing_embeddings = cursor.fetchall()
|
||||
|
||||
for existing_embedding_tuple in existing_embeddings:
|
||||
existing_title = existing_embedding_tuple[0]
|
||||
existing_embedding = np.array(existing_embedding_tuple[1]).flatten()
|
||||
similarity = calculate_cosine_similarity(existing_embedding, embedding)
|
||||
if similarity > threshold:
|
||||
print(f"Similar data found: \n #{title} \n #{existing_title}")
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
print(f"Inserting: #{title}")
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return False
|
||||
|
||||
def insert_data(title, text, link, embedding):
|
||||
conn = psycopg2.connect(
|
||||
host=host,
|
||||
port=port,
|
||||
user=user,
|
||||
password=password,
|
||||
dbname=dbname
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('''
|
||||
INSERT INTO vectorsvevijesti (title, text, link, embedding)
|
||||
VALUES (%s, %s, %s, %s);
|
||||
''', (title, text, link, embedding))
|
||||
|
||||
conn.commit()
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
def get_data():
|
||||
conn = psycopg2.connect(
|
||||
host=host,
|
||||
port=port,
|
||||
user=user,
|
||||
password=password,
|
||||
dbname=dbname
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
query = '''SELECT title,text,link FROM vectorsvevijesti;'''
|
||||
|
||||
cursor.execute(query)
|
||||
data = cursor.fetchall()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
return data
|
||||
|
||||
def create_db():
|
||||
conn = psycopg2.connect(
|
||||
host=host,
|
||||
port=port,
|
||||
user=user,
|
||||
password=password,
|
||||
dbname=dbname
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
||||
|
||||
register_vector(conn)
|
||||
|
||||
cursor.execute("DROP TABLE IF EXISTS vectorsvevijesti;")
|
||||
|
||||
cursor.execute('''
|
||||
CREATE TABLE vectorsvevijesti (
|
||||
id bigserial PRIMARY KEY,
|
||||
title VARCHAR,
|
||||
text VARCHAR,
|
||||
link VARCHAR,
|
||||
embedding vector(1536)
|
||||
);
|
||||
''')
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
create_db()
|
||||
Reference in New Issue
Block a user