From 96a2d888953319b0b2593dd504ad461f6073c4b5 Mon Sep 17 00:00:00 2001
From: Amir Sabani <amirsabani303@gmail.com>
Date: Sat, 6 Jan 2024 08:26:31 +0100
Subject: [PATCH] Removing previous f.

---
 pyth/scrapingsingle.py | 112 -----------------------------------------
 1 file changed, 112 deletions(-)

diff --git a/pyth/scrapingsingle.py b/pyth/scrapingsingle.py
index 44ff2eb..ac86b52 100644
--- a/pyth/scrapingsingle.py
+++ b/pyth/scrapingsingle.py
@@ -155,115 +155,3 @@ if __name__ == '__main__':
         print(f"Error in completion: {e}")
         continue
 
-
-def comb_similar():
-
-    print("Checking similar")
-    similar_article = get_similar()
-
-    grouped_data = {}
-
-
-    for sa in similar_article:
-        if similar_article:
-            first_t = get_specific_data(sa[0])
-            second_t = get_specific_data(sa[1])
-            link_f = first_t[0][2]
-            link_s = second_t[0][2]
-            f_text = first_t[0][1]
-            s_text = second_t[0][1]
-            f_title = first_t[0][0]
-            s_title = second_t[0][0]
-
-            if f_title in grouped_data:
-                grouped_data[f_title].append((f_text, link_f))
-            else:
-                grouped_data[f_title] = [(f_text, link_f)]
-
-            if s_title in grouped_data:
-                  grouped_data[s_title].append((s_text, link_s))
-            else:
-                 grouped_data[s_title] = [(s_text, link_s)]
-
-            for title, tuples in grouped_data.items():
-                if len(tuples) == 3:
-                    text1, link1 = tuples[0]
-                    text2, link2 = tuples[1]
-                    text3, link3 = tuples[2]
-
-                    t1check = num_tokens_from_string(text1)
-                    t2check = num_tokens_from_string(text2)
-                    t3check = num_tokens_from_string(text3)
-                    slice_if_more = t1check,t2check,t3check
-                    if slice_if_more < 2000:
-                        combined_text = f"{text1}{text2}{text3}"
-                        combined_text = slice_text_at_2k_tokens(combined_text)
-                        user_message = rf"Here is text {combined_text}, combined from 3 sources, filter text, and make news content, return as JSON only with 'content' field"
-                        if link1 != link2 and link1 != link3 and link2 != link3:
-                            link = f"{link1} {link2} {link3}"
-                        else:
-                            link = link1
-
-                    else:
-                        user_message = rf"Here are 3 texts {text1} {text2} and {text3}, combine the following texts into a cohesive news remove any non-news related to both texts and provide the cleaned data as a JSON only with 'content' field."
-                        if link1 != link2 and link1 != link3 and link2 != link3:
-                            link = f"{link1} {link2} {link3}"
-                        else:
-                            link = link1
-                else:
-                    ftcheck = num_tokens_from_string(f_text)
-                    stcheck = num_tokens_from_string(s_text)
-                    fscomb = ftcheck + stcheck
-                    if fscomb <2000:
-                        combined_text = f"{f_text}{s_text}"
-                        user_message = rf"Here is text {combined_text}, combined from 2 sources, filter text, and make news content, return as JSON only with 'content' field"
-                        if link_f != link_s:
-                            link = f"{link_f} {link_s}"
-                        else:
-                            link = link_f
-
-                    else:
-                        user_message = rf"Here are 2 texts {f_text} and {s_text}, combine the following texts into a cohesive news remove any non-news related to both texts and provide the cleaned data as a JSON only with 'content' field."
-                        if link_f != link_s:
-                            link = f"{link_f} {link_s}"
-                        else:
-                            link = link_f
-            try:
-                completion = client.chat.completions.create(
-                    model="gpt-3.5-turbo",
-                    messages=[
-                        {"role": "system", "content": "Data analytic, Journalist and News reporter"},
-                        {"role": "user", "content": user_message}
-                    ]
-                )
-                generated_text = completion.choices[0].message.content
-
-                if similar_article:
-                    if f_title == s_title:
-                        print(f_title)
-                        modify_similar_data(first_t,"SOURCE")
-                        similar_article.remove(sa)
-                        print("Modified")
-                    else:
-                        print(f"First: {f_title}")
-                        print(f"Second: {s_title}")
-                        modify_similar_data(first_t,"SOURCE")
-                        modify_similar_data(second_t,"SOURCE")
-                        similar_article.remove(sa)
-                        print("Modified")
-                else:
-                    print("Similar list is empty")
-
-                response_data = json.loads(generated_text)
-                title = f_title
-                text = response_data["content"]
-
-                vector = embeddings.embed_query(generated_text)
-
-                if not is_similar_data(title, text, link, vector, threshold=0.98):
-                    similar_d = "NO"
-                    insert_data(title, text, link, vector, similar_d)
-
-            except Exception as e:
-                print(f"Error in completion: {e}")
-                continue