Removing previous f.
This commit is contained in:
@@ -155,115 +155,3 @@ if __name__ == '__main__':
|
|||||||
print(f"Error in completion: {e}")
|
print(f"Error in completion: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
def comb_similar():
|
|
||||||
|
|
||||||
print("Checking similar")
|
|
||||||
similar_article = get_similar()
|
|
||||||
|
|
||||||
grouped_data = {}
|
|
||||||
|
|
||||||
|
|
||||||
for sa in similar_article:
|
|
||||||
if similar_article:
|
|
||||||
first_t = get_specific_data(sa[0])
|
|
||||||
second_t = get_specific_data(sa[1])
|
|
||||||
link_f = first_t[0][2]
|
|
||||||
link_s = second_t[0][2]
|
|
||||||
f_text = first_t[0][1]
|
|
||||||
s_text = second_t[0][1]
|
|
||||||
f_title = first_t[0][0]
|
|
||||||
s_title = second_t[0][0]
|
|
||||||
|
|
||||||
if f_title in grouped_data:
|
|
||||||
grouped_data[f_title].append((f_text, link_f))
|
|
||||||
else:
|
|
||||||
grouped_data[f_title] = [(f_text, link_f)]
|
|
||||||
|
|
||||||
if s_title in grouped_data:
|
|
||||||
grouped_data[s_title].append((s_text, link_s))
|
|
||||||
else:
|
|
||||||
grouped_data[s_title] = [(s_text, link_s)]
|
|
||||||
|
|
||||||
for title, tuples in grouped_data.items():
|
|
||||||
if len(tuples) == 3:
|
|
||||||
text1, link1 = tuples[0]
|
|
||||||
text2, link2 = tuples[1]
|
|
||||||
text3, link3 = tuples[2]
|
|
||||||
|
|
||||||
t1check = num_tokens_from_string(text1)
|
|
||||||
t2check = num_tokens_from_string(text2)
|
|
||||||
t3check = num_tokens_from_string(text3)
|
|
||||||
slice_if_more = t1check,t2check,t3check
|
|
||||||
if slice_if_more < 2000:
|
|
||||||
combined_text = f"{text1}{text2}{text3}"
|
|
||||||
combined_text = slice_text_at_2k_tokens(combined_text)
|
|
||||||
user_message = rf"Here is text {combined_text}, combined from 3 sources, filter text, and make news content, return as JSON only with 'content' field"
|
|
||||||
if link1 != link2 and link1 != link3 and link2 != link3:
|
|
||||||
link = f"{link1} {link2} {link3}"
|
|
||||||
else:
|
|
||||||
link = link1
|
|
||||||
|
|
||||||
else:
|
|
||||||
user_message = rf"Here are 3 texts {text1} {text2} and {text3}, combine the following texts into a cohesive news remove any non-news related to both texts and provide the cleaned data as a JSON only with 'content' field."
|
|
||||||
if link1 != link2 and link1 != link3 and link2 != link3:
|
|
||||||
link = f"{link1} {link2} {link3}"
|
|
||||||
else:
|
|
||||||
link = link1
|
|
||||||
else:
|
|
||||||
ftcheck = num_tokens_from_string(f_text)
|
|
||||||
stcheck = num_tokens_from_string(s_text)
|
|
||||||
fscomb = ftcheck + stcheck
|
|
||||||
if fscomb <2000:
|
|
||||||
combined_text = f"{f_text}{s_text}"
|
|
||||||
user_message = rf"Here is text {combined_text}, combined from 2 sources, filter text, and make news content, return as JSON only with 'content' field"
|
|
||||||
if link_f != link_s:
|
|
||||||
link = f"{link_f} {link_s}"
|
|
||||||
else:
|
|
||||||
link = link_f
|
|
||||||
|
|
||||||
else:
|
|
||||||
user_message = rf"Here are 2 texts {f_text} and {s_text}, combine the following texts into a cohesive news remove any non-news related to both texts and provide the cleaned data as a JSON only with 'content' field."
|
|
||||||
if link_f != link_s:
|
|
||||||
link = f"{link_f} {link_s}"
|
|
||||||
else:
|
|
||||||
link = link_f
|
|
||||||
try:
|
|
||||||
completion = client.chat.completions.create(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": "Data analytic, Journalist and News reporter"},
|
|
||||||
{"role": "user", "content": user_message}
|
|
||||||
]
|
|
||||||
)
|
|
||||||
generated_text = completion.choices[0].message.content
|
|
||||||
|
|
||||||
if similar_article:
|
|
||||||
if f_title == s_title:
|
|
||||||
print(f_title)
|
|
||||||
modify_similar_data(first_t,"SOURCE")
|
|
||||||
similar_article.remove(sa)
|
|
||||||
print("Modified")
|
|
||||||
else:
|
|
||||||
print(f"First: {f_title}")
|
|
||||||
print(f"Second: {s_title}")
|
|
||||||
modify_similar_data(first_t,"SOURCE")
|
|
||||||
modify_similar_data(second_t,"SOURCE")
|
|
||||||
similar_article.remove(sa)
|
|
||||||
print("Modified")
|
|
||||||
else:
|
|
||||||
print("Similar list is empty")
|
|
||||||
|
|
||||||
response_data = json.loads(generated_text)
|
|
||||||
title = f_title
|
|
||||||
text = response_data["content"]
|
|
||||||
|
|
||||||
vector = embeddings.embed_query(generated_text)
|
|
||||||
|
|
||||||
if not is_similar_data(title, text, link, vector, threshold=0.98):
|
|
||||||
similar_d = "NO"
|
|
||||||
insert_data(title, text, link, vector, similar_d)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error in completion: {e}")
|
|
||||||
continue
|
|
||||||
|
|||||||
Reference in New Issue
Block a user