added article.py
This commit is contained in:
@@ -4,7 +4,7 @@ from urllib.parse import urljoin
|
||||
from openai import OpenAI , APIError
|
||||
import os
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from vectData import (insert_data ,is_similar_data ,get_similar, get_specific_data, delete_specific,get_all_links,cleansing ,modify_similar_data)
|
||||
from vectData import (insert_data ,is_similar_data ,get_similar, get_specific_data,get_all_links,cleansing ,modify_similar_data)
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
import tiktoken
|
||||
@@ -48,6 +48,19 @@ def replace_with_spaces(text):
|
||||
cleaned_text = ''.join(char if char in allowed_chars else ' ' for char in text)
|
||||
return cleaned_text
|
||||
|
||||
|
||||
def fix_links(links_set):
|
||||
modified_links = set()
|
||||
|
||||
for link in links_set:
|
||||
if "www" in link:
|
||||
modified_link = link.replace("www.", "")
|
||||
modified_links.add(modified_link)
|
||||
else:
|
||||
modified_links.add(link)
|
||||
|
||||
return modified_links
|
||||
|
||||
total_links = set()
|
||||
collected_news = set()
|
||||
|
||||
@@ -78,13 +91,13 @@ for dlink in dlinks:
|
||||
total_links.update(temp_links)
|
||||
|
||||
final_links = {item for item in total_links if item}
|
||||
i = 0
|
||||
|
||||
db_links = set(get_all_links())
|
||||
new_links = final_links - db_links
|
||||
final_links = new_links
|
||||
final_links = set(final_links)
|
||||
|
||||
|
||||
final_links = fix_links(final_links)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -142,6 +155,7 @@ if __name__ == '__main__':
|
||||
print(f"Error in completion: {e}")
|
||||
continue
|
||||
|
||||
|
||||
def comb_similar():
|
||||
|
||||
print("Checking similar")
|
||||
@@ -185,12 +199,17 @@ def comb_similar():
|
||||
combined_text = f"{text1}{text2}{text3}"
|
||||
combined_text = slice_text_at_2k_tokens(combined_text)
|
||||
user_message = rf"Here is text {combined_text}, combined from 3 sources, filter text, and make news content, return as JSON only with 'content' field"
|
||||
link = f"{link1} {link2} {link3}"
|
||||
if link1 != link2 and link1 != link3 and link2 != link3:
|
||||
link = f"{link1} {link2} {link3}"
|
||||
else:
|
||||
link = link1
|
||||
|
||||
else:
|
||||
user_message = rf"Here are 3 texts {text1} {text2} and {text3}, combine the following texts into a cohesive news remove any non-news related to both texts and provide the cleaned data as a JSON only with 'content' field."
|
||||
link = f"{link1} {link2} {link3}"
|
||||
|
||||
if link1 != link2 and link1 != link3 and link2 != link3:
|
||||
link = f"{link1} {link2} {link3}"
|
||||
else:
|
||||
link = link1
|
||||
else:
|
||||
ftcheck = num_tokens_from_string(f_text)
|
||||
stcheck = num_tokens_from_string(s_text)
|
||||
@@ -198,12 +217,17 @@ def comb_similar():
|
||||
if fscomb <2000:
|
||||
combined_text = f"{f_text}{s_text}"
|
||||
user_message = rf"Here is text {combined_text}, combined from 2 sources, filter text, and make news content, return as JSON only with 'content' field"
|
||||
link = f"{link_f} {link_s}"
|
||||
if link_f != link_s:
|
||||
link = f"{link_f} {link_s}"
|
||||
else:
|
||||
link = link_f
|
||||
|
||||
else:
|
||||
user_message = rf"Here are 2 texts {f_text} and {s_text}, combine the following texts into a cohesive news remove any non-news related to both texts and provide the cleaned data as a JSON only with 'content' field."
|
||||
link = f"{link_f} {link_s}"
|
||||
|
||||
if link_f != link_s:
|
||||
link = f"{link_f} {link_s}"
|
||||
else:
|
||||
link = link_f
|
||||
try:
|
||||
completion = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
@@ -213,7 +237,6 @@ def comb_similar():
|
||||
]
|
||||
)
|
||||
generated_text = completion.choices[0].message.content
|
||||
generated_text = generated_text
|
||||
|
||||
if similar_article:
|
||||
if f_title == s_title:
|
||||
@@ -222,6 +245,7 @@ def comb_similar():
|
||||
similar_article.remove(sa)
|
||||
print("Modified")
|
||||
else:
|
||||
print(f"First: {f_title}")
|
||||
print(f"Second: {s_title}")
|
||||
modify_similar_data(first_t,"SOURCE")
|
||||
modify_similar_data(second_t,"SOURCE")
|
||||
@@ -243,5 +267,3 @@ def comb_similar():
|
||||
except Exception as e:
|
||||
print(f"Error in completion: {e}")
|
||||
continue
|
||||
|
||||
comb_similar()
|
||||
Reference in New Issue
Block a user