Files
old-svevijesti/pyth/__pycache__/db_management.cpython-310.pyc

84 lines
8.0 KiB
Plaintext
Raw Normal View History

2024-01-29 14:55:20 +01:00
o
<00><><EFBFBD>ea<00>@srddlZddlmZddlmZddlmZddlZddlZddl m
Z
ddl m Z m Z e
<EFBFBD>e<08> d<07>Ze<08> d<08>Ze<08> d <09>Ze<08> d
<EFBFBD>Ze<08> d <0B>Zejeeeeed <0C>Zd d<0E>Zdd<10>Zd7dd<13>Zdd<15>Zdd<17>Zdd<19>Zdd<1B>Zdd<1D>Zdd<1F>Zd d!<21>Zd"d#<23>Zd$d%<25>Z d&d'<27>Z!d(d)<29>Z"d*d+<2B>Z#d,d-<2D>Z$d.d/<2F>Z%d0d1<64>Z&d2d3<64>Z'd4d3<64>Z'd5d6<64>Z(ddlZddlmZe'<27>e(<28>dS)8<>N)<01>sql)<01>register_vector)<01>cosine_similarity)<01> load_dotenv)<02>datetime<6D> timedelta<74>DB_HOST<53>DB_PORT<52>DB_USER<45> DB_PASSWORD<52>DB_NAME)<05>host<73>port<72>user<65>password<72>dbnamecCs:|tj<01>|<00>}|tj<01>|<01>}t|g|g<01>dd}|S)Nr)<04>np<6E>linalg<6C>normr)<05>v1<76>v2<76> v1_normalized<65> v2_normalized<65>
similarity<EFBFBD>r<00>3/home/amir/Desktop/svevijesti/pyth/db_management.py<70>calculate_cosine_similaritysrcCsHt|t<01>rdd<02>|dd<04><00>d<05>D<00>}t<03>|<01>St|tj<05>r |Std<06><01>)NcSsg|]}t|<01><01>qSr)<01>float)<02>.0<EFBFBD>numrrr<00>
<listcomp>"<00>z*parse_embedding_string.<locals>.<listcomp><3E><00><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,zAInvalid type for embedding_str. Must be either str or np.ndarray.)<07>
isinstance<EFBFBD>str<74>splitr<00>array<61>ndarray<61>
ValueError)<02> embedding_str<74>numbersrrr<00>parse_embedding_string s

 r-<00>\<5C><><EFBFBD>(\<5C>?c Cs<>t<00><01>}|<05>d<01>|<05><03>}|D]O}|d}t<04>|d<00><01><06>} |d}
t| |<03>} | |kr^||
krS|} t||||| <0C>t d|<00>d|<08><00><04>t d|<00><00><02>d} |<05>
<EFBFBD>d St d
<EFBFBD>|<05>
<EFBFBD>d Sqt d|<00><00><02>|<05>
<EFBFBD>d S) Nz2SELECT title,embedding,link FROM vectorsvevijesti;rr"<00>zSimilar data found:
#z
#z Inserting: #<23>NOTzSame article of same source!F) <0B>conn<6E>cursor<6F>execute<74>fetchallrr(<00>flattenr<00> insert_data<74>print<6E>close) <0A>title<6C>text<78>link<6E> embedding<6E> thresholdr2<00>existing_embeddings<67>existing_embedding_tuple<6C>existing_title<6C>existing_embedding<6E> existing_linkr<00> similar_drrr<00>is_similar_data)s0

<02>rDcC<00>*t<00><01>}d}|<00>|<01>|<00><03>}|<00><04>|S)Nz[SELECT title, link, similar_d FROM vectorsvevijesti WHERE similar_d NOT IN ('NO', 'SOURCE')<29>r1r2r3r4r8)r2<00>query<72> similar_datarrr<00> get_similarE<00> 
rIcCsVt<00><01>}|<00>d<01>|<00><03>}|<00><04>dd<03>|D<00>}dd<03>|D<00>}dd<03>|D<00>}|||fS)NzGSELECT title, link, embedding FROM vectorsvevijesti WHERE ready = True;cS<00>g|]}|d<00>qS<00>rr<00>r<00>rowrrrr Sr!z/get_titles_links_embeddings.<locals>.<listcomp>cSrK)r"rrMrrrr Tr!cSsg|]}t|d<00><01>qS)r/)r-rMrrrr UsrF)r2<00>data<74>titles<65>links<6B>
embeddingsrrr<00>get_titles_links_embeddingsMs

rSc Cs@t<00><01>}t<02><03>}|<07>d||||||d|f<08>t<02><05>|<07><06>dS)Nz<4E>
INSERT INTO vectorsvevijesti (title, text, link, embedding, similar_d, time, ready, category)
VALUES (%s, %s, %s, %s, %s ,%s ,%s ,%s);
T)r<00>nowr1r2r3<00>commitr8)r9r:r;r<rC<00>category<72>c_timer2rrrr6Zs<04> r6c
CsLt<00><01><00>}|<06>d||||||f<06>Wd<00>n1swYt<00><03>dS)Nz<4E>INSERT INTO articles (title, content, slug, original_url, source_id, category)
VALUES (%s, %s, %s, %s, %s, %s)ON CONFLICT (original_url) DO NOTHING;<3B>r1r2r3rU)r9r:<00>slugr;<00> source_idrVr2rrr<00> insert_finalds
<06><1C> r[cCrE)Nz-SELECT title,text,link FROM vectorsvevijesti;rF<00>r2rGrOrrr<00>get_datajrJr]cC<00>,t<00><01>}d}|<00>|d<02>|<00><03>}|<00><04>|S)Nz[SELECT title, text, link, time, similar_d, category FROM vectorsvevijesti WHERE ready = %s;)<01>TruerFr\rrr<00>get_ready_datar<00>  r`cCr^)NzGSELECT title, text, link, ready FROM vectorsvevijesti WHERE ready = %s;)<01>FalserFr\rrr<00>get_source_datazrarccC<00>(t<00><01>}d}|<02>|||f<02>t<00><03>dS)Nz<UPDATE vectorsvevijesti SET similar_d = %s WHERE title = %s rX<00><04> new_valuer9r2rGrrr<00>modify_similar_data<74><00> rgcCrd)Nz8UPDATE vectorsvevijesti SET ready = %s WHERE title = %s rXrerrr<00>preparing_articles<65>rhricCs.t<00><01>}d}|<01>||f<01>|<01><03>}|<01><04>|S)NzfSELECT title, text, link, similar_d, embedding, category, ready FROM vectorsvevijesti WHERE title = %srF)r9r2rG<00> specific_postrrr<00>get_specific_data<74>s rkcC<00>4t<00><01>}d}|<00>|<01>dd<03>|<00><03>D<00>}|<00><04>|S)Nz!SELECT link FROM vectorsvevijesticS<00>h|]}|d<00>qSrLr<00>rr;rrr<00> <setcomp><3E>r!z get_all_links.<locals>.<setcomp>rF<00>r2rG<00>db_linksrrr<00> get_all_links<6B><00> 
rrcCrl)Nz(SELECT title, original_url FROM articlescSrmrLrrnrrrro<00>r!z&get_existing_titles.<locals>.<setcomp>rFrprrr<00>get_existing_titles<65>rsrtcCs&t<00><01>}d}|<01>||f<01>|<01><03>dS)Nz-DELETE FROM vectorsvevijesti WHERE title = %s)r1r2r3r8)r9r2rGrrr<00>delete_specific<69>s rucCs@t<00><01>tdd<02>}t<03><04>}d}|<01>||f<01>t<03><06>|<01><07>dS)Nr")<01>daysz,DELETE FROM vectorsvevijesti WHERE time < %s)rrTrr1r2r3rUr8)<03>day_longr2rGrrr<00> cleansing<6E>s  rxcCs*t<00><01>}d}|<00>|<01>t<00><03>|<00><04>dS)Nz&DROP TABLE IF EXISTS vectorsvevijesti;<3B>r1r2r3rUr8)r2rGrrr<00>
drop_table<EFBFBD>s

 rzcC<00>8t<00><01>}|<00>d<01>tt<00>|<00>d<02>t<00><04>|<00><05>dS)N<>%CREATE EXTENSION IF NOT EXISTS vectoraN
CREATE TABLE IF NOT EXISTS vectorsvevijesti (
id bigserial PRIMARY KEY,
title VARCHAR,
text VARCHAR,
link VARCHAR,
embedding vector(1536),
similar_d VARCHAR,
time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
ready BOOLEAN
);
<20>r1r2r3rrUr8<00>r2rrr<00> create_db<64>s 

 rcCr{)Nr|al
CREATE TABLE IF NOT EXISTS vectorsvevijesti (
id bigserial PRIMARY KEY,
title VARCHAR,
text VARCHAR,
link VARCHAR,
embedding vector(1536),
similar_d VARCHAR,
time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
ready BOOLEAN,
category VARCHAR
);
r}r~rrrr<00>s 

 cCs&t<00><01>}|<00>d<01>t<00><03>|<00><04>dS)Nal
CREATE TABLE IF NOT EXISTS "articles" (
"id" bigserial PRIMARY KEY,
"title" text NOT NULL UNIQUE,
"content" text NOT NULL,
"slug" text NOT NULL UNIQUE,
"created_at" timestamptz DEFAULT NOW() NOT NULL,
"original_url" text NOT NULL UNIQUE,
"source_id" int NOT NULL,
"category" VARCHAR
);
ryr~rrr<00>create_ar_table<6C>s
 r<>)r.))<29>psycopg2r<00>pgvector.psycopg2r<00>sklearn.metrics.pairwiser<00>numpyr<00>os<6F>dotenvrrr<00>getenvr rrrr<00>connectr1rr-rDrIrSr6r[r]r`rcrgrirkrrrtrurxrzrr<>rrrr<00><module>s\    




<06>