Files
old-svevijesti/pyth/__pycache__/vectData.cpython-310.pyc

57 lines
7.0 KiB
Plaintext
Raw Normal View History

2024-01-02 15:00:07 +01:00
o
2024-01-29 14:55:20 +01:00
<00><><EFBFBD>e<EFBFBD><00>@sHddlZddlmZddlmZddlmZddlZddlZddl m
2024-01-02 15:00:07 +01:00
Z
ddl m Z m Z e
<EFBFBD>e<08> d<07>Ze<08> d<08>Ze<08> d <09>Ze<08> d
2024-01-29 14:55:20 +01:00
<EFBFBD>Ze<08> d <0B>Zejeeeeed <0C>Zd d<0E>Zdd<10>Zd4dd<13>Zdd<15>Zdd<17>Zdd<19>Zdd<1B>Zdd<1D>Zdd<1F>Zd d!<21>Zd"d#<23>Zd$d%<25>Z d&d'<27>Z!d(d)<29>Z"d*d+<2B>Z#d,d-<2D>Z$d.d/<2F>Z%d0d1<64>Z&d2d3<64>Z'e'<27>dS)5<>N)<01>sql)<01>register_vector)<01>cosine_similarity)<01> load_dotenv)<02>datetime<6D> timedelta<74>DB_HOST<53>DB_PORT<52>DB_USER<45> DB_PASSWORD<52>DB_NAME)<05>host<73>port<72>user<65>password<72>dbnamecCs:|tj<01>|<00>}|tj<01>|<01>}t|g|g<01>dd}|S)Nr)<04>np<6E>linalg<6C>normr)<05>v1<76>v2<76> v1_normalized<65> v2_normalized<65>
similarity<EFBFBD>r<00>1/home/asabani/Desktop/svevijesti/pyth/vectData.py<70>calculate_cosine_similaritysrcCsHt|t<01>rdd<02>|dd<04><00>d<05>D<00>}t<03>|<01>St|tj<05>r |Std<06><01>)NcSsg|]}t|<01><01>qSr)<01>float)<02>.0<EFBFBD>numrrr<00>
2024-01-07 03:41:32 +01:00
<listcomp>"<00>z*parse_embedding_string.<locals>.<listcomp><3E><00><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,zAInvalid type for embedding_str. Must be either str or np.ndarray.)<07>
isinstance<EFBFBD>str<74>splitr<00>array<61>ndarray<61>
ValueError)<02> embedding_str<74>numbersrrr<00>parse_embedding_string s

 r-<00>\<5C><><EFBFBD>(\<5C>?c Cs<>t<00><01>}|<05>d<01>|<05><03>}|D]O}|d}t<04>|d<00><01><06>} |d}
t| |<03>} | |kr^||
krS|} t||||| <0C>t d|<00>d|<08><00><04>t d|<00><00><02>d} |<05>
<EFBFBD>d St d
<EFBFBD>|<05>
<EFBFBD>d Sqt d|<00><00><02>|<05>
<EFBFBD>d S) Nz2SELECT title,embedding,link FROM vectorsvevijesti;rr"<00>zSimilar data found:
2024-01-02 15:00:07 +01:00
#z
2024-01-07 03:41:32 +01:00
#z Inserting: #<23>NOTzSame article of same source!F) <0B>conn<6E>cursor<6F>execute<74>fetchallrr(<00>flattenr<00> insert_data<74>print<6E>close) <0A>title<6C>text<78>link<6E> embedding<6E> thresholdr2<00>existing_embeddings<67>existing_embedding_tuple<6C>existing_title<6C>existing_embedding<6E> existing_linkr<00> similar_drrr<00>is_similar_data)s0

2024-01-29 14:55:20 +01:00
<02>rDcC<00>*t<00><01>}d}|<00>|<01>|<00><03>}|<00><04>|S)Nz[SELECT title, link, similar_d FROM vectorsvevijesti WHERE similar_d NOT IN ('NO', 'SOURCE')<29>r1r2r3r4r8)r2<00>query<72> similar_datarrr<00> get_similarE<00> 
rIcCsVt<00><01>}|<00>d<01>|<00><03>}|<00><04>dd<03>|D<00>}dd<03>|D<00>}dd<03>|D<00>}|||fS)NzGSELECT title, link, embedding FROM vectorsvevijesti WHERE ready = True;cS<00>g|]}|d<00>qS<00>rr<00>r<00>rowrrrr Sr!z/get_titles_links_embeddings.<locals>.<listcomp>cSrK)r"rrMrrrr Tr!cSsg|]}t|d<00><01>qS)r/)r-rMrrrr UsrF)r2<00>data<74>titles<65>links<6B>
2024-01-07 03:41:32 +01:00
embeddingsrrr<00>get_titles_links_embeddingsMs

2024-01-29 14:55:20 +01:00
rSc
2024-01-07 03:41:32 +01:00
Cs>t<00><01>}t<02><03>}|<06>d||||||df<07>t<02><05>|<06><06>dS)Nz<4E>
2024-01-06 08:17:05 +01:00
INSERT INTO vectorsvevijesti (title, text, link, embedding, similar_d, time, ready)
VALUES (%s, %s, %s, %s, %s ,%s ,%s);
2024-01-29 14:55:20 +01:00
T)r<00>nowr1r2r3<00>commitr8)r9r:r;r<rC<00>c_timer2rrrr6Zs<04> r6c CsJt<00><01><00>}|<05>d|||||f<05>Wd<00>n1swYt<00><03>dS)Nz<4E>INSERT INTO articles (title, content, slug, original_url, source_id)
VALUES (%s, %s, %s, %s, %s)ON CONFLICT (original_url) DO NOTHING;<3B>r1r2r3rU)r9r:<00>slugr;<00> source_idr2rrr<00> insert_finalds
 <06><1C> rZcCrE)Nz-SELECT title,text,link FROM vectorsvevijesti;rF<00>r2rGrOrrr<00>get_datajrJr\cC<00>,t<00><01>}d}|<00>|d<02>|<00><03>}|<00><04>|S)NzQSELECT title, text, link, time, similar_d FROM vectorsvevijesti WHERE ready = %s;)<01>TruerFr[rrr<00>get_ready_datar<00>  r_cCr])NzGSELECT title, text, link, ready FROM vectorsvevijesti WHERE ready = %s;)<01>FalserFr[rrr<00>get_source_datazr`rbcC<00>(t<00><01>}d}|<02>|||f<02>t<00><03>dS)Nz<UPDATE vectorsvevijesti SET similar_d = %s WHERE title = %s rW<00><04> new_valuer9r2rGrrr<00>modify_similar_data<74><00> rfcCrc)Nz8UPDATE vectorsvevijesti SET ready = %s WHERE title = %s rWrdrrr<00>preparing_articles<65>rgrhcCs.t<00><01>}d}|<01>||f<01>|<01><03>}|<01><04>|S)Nz\SELECT title, text, link, similar_d, embedding, ready FROM vectorsvevijesti WHERE title = %srF)r9r2rG<00> specific_postrrr<00>get_specific_data<74>s rjcC<00>4t<00><01>}d}|<00>|<01>dd<03>|<00><03>D<00>}|<00><04>|S)Nz!SELECT link FROM vectorsvevijesticS<00>h|]}|d<00>qSrLr<00>rr;rrr<00> <setcomp><3E>r!z get_all_links.<locals>.<setcomp>rF<00>r2rG<00>db_linksrrr<00> get_all_links<6B><00> 
rqcCrk)Nz(SELECT title, original_url FROM articlescSrlrLrrmrrrrn<00>r!z&get_existing_titles.<locals>.<setcomp>rFrorrr<00>get_existing_titles<65>rrrscCs&t<00><01>}d}|<01>||f<01>|<01><03>dS)Nz-DELETE FROM vectorsvevijesti WHERE title = %s)r1r2r3r8)r9r2rGrrr<00>delete_specific<69>s rtcCs@t<00><01>tdd<02>}t<03><04>}d}|<01>||f<01>t<03><06>|<01><07>dS)Nr")<01>daysz,DELETE FROM vectorsvevijesti WHERE time < %s)rrTrr1r2r3rUr8)<03>day_longr2rGrrr<00> cleansing<6E>s  rwcCs*t<00><01>}d}|<00>|<01>t<00><03>|<00><04>dS)Nz&DROP TABLE IF EXISTS vectorsvevijesti;)r1r2r3rUr8)r2rGrrr<00>
drop_table<EFBFBD>s
2024-01-07 03:41:32 +01:00

2024-01-29 14:55:20 +01:00
 rxcCs8t<00><01>}|<00>d<01>tt<00>|<00>d<02>t<00><04>|<00><05>dS)Nz%CREATE EXTENSION IF NOT EXISTS vectoraN
2024-01-02 15:00:07 +01:00
CREATE TABLE IF NOT EXISTS vectorsvevijesti (
id bigserial PRIMARY KEY,
title VARCHAR,
text VARCHAR,
link VARCHAR,
embedding vector(1536),
similar_d VARCHAR,
2024-01-06 08:17:05 +01:00
time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
ready BOOLEAN
2024-01-02 15:00:07 +01:00
);
2024-01-29 14:55:20 +01:00
)r1r2r3rrUr8)r2rrr<00> create_db<64>s 
2024-01-07 03:41:32 +01:00

2024-01-29 14:55:20 +01:00
 ry)r.)(<28>psycopg2r<00>pgvector.psycopg2r<00>sklearn.metrics.pairwiser<00>numpyr<00>os<6F>dotenvrrr<00>getenvr rrrr<00>connectr1rr-rDrIrSr6rZr\r_rbrfrhrjrqrsrtrwrxryrrrr<00><module>sR    
2024-01-02 15:00:07 +01:00




2024-01-07 03:41:32 +01:00
<06>
 
2024-01-29 14:55:20 +01:00

2024-01-07 03:41:32 +01:00