Files
old-svevijesti/pyth/__pycache__/vectData.cpython-310.pyc

56 lines
6.3 KiB
Plaintext
Raw Normal View History

2024-01-02 15:00:07 +01:00
o
2024-01-08 00:28:20 +01:00
<00><0E>e<EFBFBD><00>@s8ddlZddlmZddlmZddlmZddlZddlZddl m
2024-01-02 15:00:07 +01:00
Z
ddl m Z m Z e
<EFBFBD>e<08> d<07>Ze<08> d<08>Ze<08> d <09>Ze<08> d
2024-01-07 03:41:32 +01:00
<EFBFBD>Ze<08> d <0B>Zejeeeeed <0C>Zd d<0E>Zdd<10>Zd0dd<13>Zdd<15>Zdd<17>Zdd<19>Zdd<1B>Zdd<1D>Zdd<1F>Zd d!<21>Zd"d#<23>Zd$d%<25>Z d&d'<27>Z!d(d)<29>Z"d*d+<2B>Z#d,d-<2D>Z$d.d/<2F>Z%e%<25>dS)1<>N)<01>sql)<01>register_vector)<01>cosine_similarity)<01> load_dotenv)<02>datetime<6D> timedelta<74>DB_HOST<53>DB_PORT<52>DB_USER<45> DB_PASSWORD<52>DB_NAME)<05>host<73>port<72>user<65>password<72>dbnamecCs:|tj<01>|<00>}|tj<01>|<01>}t|g|g<01>dd}|S)Nr)<04>np<6E>linalg<6C>normr)<05>v1<76>v2<76> v1_normalized<65> v2_normalized<65>
similarity<EFBFBD>r<00>8/home/asabani/Desktop/svevijesti-master/pyth/vectData.py<70>calculate_cosine_similaritysrcCsHt|t<01>rdd<02>|dd<04><00>d<05>D<00>}t<03>|<01>St|tj<05>r |Std<06><01>)NcSsg|]}t|<01><01>qSr)<01>float)<02>.0<EFBFBD>numrrr<00>
<listcomp>"<00>z*parse_embedding_string.<locals>.<listcomp><3E><00><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,zAInvalid type for embedding_str. Must be either str or np.ndarray.)<07>
isinstance<EFBFBD>str<74>splitr<00>array<61>ndarray<61>
ValueError)<02> embedding_str<74>numbersrrr<00>parse_embedding_string s

 r-<00>\<5C><><EFBFBD>(\<5C>?c Cs<>t<00><01>}|<05>d<01>|<05><03>}|D]O}|d}t<04>|d<00><01><06>} |d}
t| |<03>} | |kr^||
krS|} t||||| <0C>t d|<00>d|<08><00><04>t d|<00><00><02>d} |<05>
<EFBFBD>d St d
<EFBFBD>|<05>
<EFBFBD>d Sqt d|<00><00><02>|<05>
<EFBFBD>d S) Nz2SELECT title,embedding,link FROM vectorsvevijesti;rr"<00>zSimilar data found:
2024-01-02 15:00:07 +01:00
#z
2024-01-07 03:41:32 +01:00
#z Inserting: #<23>NOTzSame article of same source!F) <0B>conn<6E>cursor<6F>execute<74>fetchallrr(<00>flattenr<00> insert_data<74>print<6E>close) <0A>title<6C>text<78>link<6E> embedding<6E> thresholdr2<00>existing_embeddings<67>existing_embedding_tuple<6C>existing_title<6C>existing_embedding<6E> existing_linkr<00> similar_drrr<00>is_similar_data)s0

<02>rDcC<00>*t<00><01>}d}|<00>|<01>|<00><03>}|<00><04>|S)NzTSELECT title,similar_d FROM vectorsvevijesti WHERE similar_d NOT IN ('NO', 'SOURCE')<29>r1r2r3r4r8)r2<00>query<72> similar_datarrr<00> get_similarEs 
rIcCsVt<00><01>}|<00>d<01>|<00><03>}|<00><04>dd<03>|D<00>}dd<03>|D<00>}dd<03>|D<00>}|||fS)NzGSELECT title, link, embedding FROM vectorsvevijesti WHERE ready = True;cS<00>g|]}|d<00>qS<00>rr<00>r<00>rowrrrr Sr!z/get_titles_links_embeddings.<locals>.<listcomp>cSrJ)r"rrLrrrr Tr!cSsg|]}t|d<00><01>qS)r/)r-rLrrrr UsrF)r2<00>data<74>titles<65>links<6B>
embeddingsrrr<00>get_titles_links_embeddingsMs

rRc
Cs>t<00><01>}t<02><03>}|<06>d||||||df<07>t<02><05>|<06><06>dS)Nz<4E>
2024-01-06 08:17:05 +01:00
INSERT INTO vectorsvevijesti (title, text, link, embedding, similar_d, time, ready)
VALUES (%s, %s, %s, %s, %s ,%s ,%s);
2024-01-07 03:41:32 +01:00
T)r<00>nowr1r2r3<00>commitr8)r9r:r;r<rC<00>c_timer2rrrr6Zs<04> r6cCrE)Nz-SELECT title,text,link FROM vectorsvevijesti;rF<00>r2rGrNrrr<00>get_datads 
rWcC<00>,t<00><01>}d}|<00>|d<02>|<00><03>}|<00><04>|S)N<>GSELECT title, text, link, ready FROM vectorsvevijesti WHERE ready = %s;)<01>TruerFrVrrr<00>get_ready_datam<00>  r[cCrX)NrY)<01>FalserFrVrrr<00>get_source_dataur\r^cC<00>(t<00><01>}d}|<02>|||f<02>t<00><03>dS)Nz<UPDATE vectorsvevijesti SET similar_d = %s WHERE title = %s <20>r1r2r3rT<00><04> new_valuer9r2rGrrr<00>modify_similar_data~<00> rccCr_)Nz8UPDATE vectorsvevijesti SET ready = %s WHERE title = %s r`rarrr<00>preparing_articles<65>rdrecCs.t<00><01>}d}|<01>||f<01>|<01><03>}|<01><04>|S)Nz\SELECT title, text, link, similar_d, embedding, ready FROM vectorsvevijesti WHERE title = %srF)r9r2rG<00> specific_postrrr<00>get_specific_data<74>s rgcCs4t<00><01>}d}|<00>|<01>dd<03>|<00><03>D<00>}|<00><04>|S)Nz!SELECT link FROM vectorsvevijesticSsh|]}|d<00>qSrKr)rr;rrr<00> <setcomp><3E>r!z get_all_links.<locals>.<setcomp>rF)r2rG<00>db_linksrrr<00> get_all_links<6B>s 
rjcCs&t<00><01>}d}|<01>||f<01>|<01><03>dS)Nz-DELETE FROM vectorsvevijesti WHERE title = %s)r1r2r3r8)r9r2rGrrr<00>delete_specific<69>s rkcCs@t<00><01>tdd<02>}t<03><04>}d}|<01>||f<01>t<03><06>|<01><07>dS)Nr")<01>daysz,DELETE FROM vectorsvevijesti WHERE time < %s)rrSrr1r2r3rTr8)<03>day_longr2rGrrr<00> cleansing<6E>s  rncCs*t<00><01>}d}|<00>|<01>t<00><03>|<00><04>dS)Nz&DROP TABLE IF EXISTS vectorsvevijesti;)r1r2r3rTr8)r2rGrrr<00>
drop_table<EFBFBD>s

 rocCs8t<00><01>}|<00>d<01>tt<00>|<00>d<02>t<00><04>|<00><05>dS)Nz%CREATE EXTENSION IF NOT EXISTS vectoraN
2024-01-02 15:00:07 +01:00
CREATE TABLE IF NOT EXISTS vectorsvevijesti (
id bigserial PRIMARY KEY,
title VARCHAR,
text VARCHAR,
link VARCHAR,
embedding vector(1536),
similar_d VARCHAR,
2024-01-06 08:17:05 +01:00
time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
ready BOOLEAN
2024-01-02 15:00:07 +01:00
);
2024-01-07 03:41:32 +01:00
)r1r2r3rrTr8)r2rrr<00> create_db<64>s 

 rp)r.)&<26>psycopg2r<00>pgvector.psycopg2r<00>sklearn.metrics.pairwiser<00>numpyr<00>os<6F>dotenvrrr<00>getenvr rrrr<00>connectr1rr-rDrIrRr6rWr[r^rcrergrjrkrnrorprrrr<00><module>sN    
2024-01-02 15:00:07 +01:00




2024-01-07 03:41:32 +01:00
<06>