Add add events script

2023-05-15 17:15:30 +02:00
parent 4b93527998
commit 8a90dc9d62
2 changed files with 93 additions and 81 deletions
--- a/backend/add_events.py
+++ b/backend/add_events.py
@@ -0,0 +1,90 @@
+import os
+from dotenv import load_dotenv
+import openai
+import redis
+import random
+import requests
+from bs4 import BeautifulSoup
+
+openai.api_key = os.getenv("OPENAI_API_KEY")
+redis_url = os.getenv("REDIS_URL")
+redis_client = redis.from_url(redis_url)
+
+def add_current_events():
+    # If the key doesn't exist, extract titles from the URLs and filter unique titles
+    urls = ['https://www.klix.ba', 'https://www.avaz.ba']
+    titles = extract_titles(urls)
+    unique_titles = filter_unique_titles(titles)
+    # Convert the unique titles list to a string separated by newline
+    todays_events_str = "\n".join(unique_titles)
+    # Save the result to Redis with a 10-minute expiration time
+    redis_client.set('todays_events', todays_events_str, ex=600)
+    # return the result but split by newline, and then choose 7 random titles, and merge again into newline separated string
+    return "\n".join(random.sample(todays_events_str.split("\n"), 7))
+
+
+
+def extract_titles(urls):
+    titles = []
+
+    # Set the User-Agent to Chrome on Windows
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
+    }
+
+    for url in urls:
+        try:
+            response = requests.get(url, headers=headers)
+            soup = BeautifulSoup(response.content, 'html.parser')
+
+            for tag in ['h1', 'h2', 'h3']:
+                headers = soup.find_all(tag)
+
+                for header in headers:
+                    titles.append(header.text.strip())
+        except Exception as e:
+            print(f"Error processing URL {url}: {e}")
+
+    return titles
+
+def filter_out_titles_with_duplicate_meanings(titles):
+    filtered_titles = []
+
+    for title in titles:
+        if title not in filtered_titles:
+            filtered_titles.append(title)
+
+    return filtered_titles
+
+
+def filter_unique_titles(titles):
+    # Prepare the prompt
+    prompt = "Filter the following titles to include only unique topics, preferring longer titles when collisions are found:\n"
+    for title in titles:
+        prompt += f"- {title}\n"
+
+    prompt += "Filtered unique titles:\n"
+
+
+    print(prompt)
+    # Call the GPT API
+    response = openai.Completion.create(
+        engine="text-davinci-002",
+        prompt=prompt,
+        max_tokens=1000,
+        n=1,
+        stop=None,
+        temperature=0.7,
+    )
+
+
+
+    # Extract the filtered titles
+    filtered_titles = response.choices[0].text.strip().split("\n")
+
+    print(filtered_titles)
+    # Clean up and return the titles
+    return [title.strip() for title in filtered_titles if title.strip()]
+
+if __name__ == '__main__':
+    add_current_events()
--- a/backend/app.py
+++ b/backend/app.py
@@ -22,6 +22,8 @@ cors = CORS(app, resources={
            "http://pitajramizu.com",
            "http://www.pitajramizu.com",
            "https://c50a-77-77-231-127.ngrok-free.app"
+            "https://pitajramizu.com",
+            "https://www.pitajramizu.com",
        ]
    }
 })
@@ -98,70 +100,6 @@ def chat():



-
-def extract_titles(urls):
-    titles = []
-
-    # Set the User-Agent to Chrome on Windows
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
-    }
-
-    for url in urls:
-        try:
-            response = requests.get(url, headers=headers)
-            soup = BeautifulSoup(response.content, 'html.parser')
-
-            for tag in ['h1', 'h2', 'h3']:
-                headers = soup.find_all(tag)
-
-                for header in headers:
-                    titles.append(header.text.strip())
-        except Exception as e:
-            print(f"Error processing URL {url}: {e}")
-
-    return titles
-
-def filter_out_titles_with_duplicate_meanings(titles):
-    filtered_titles = []
-
-    for title in titles:
-        if title not in filtered_titles:
-            filtered_titles.append(title)
-
-    return filtered_titles
-
-
-def filter_unique_titles(titles):
-    # Prepare the prompt
-    prompt = "Filter the following titles to include only unique topics, preferring longer titles when collisions are found:\n"
-    for title in titles:
-        prompt += f"- {title}\n"
-
-    prompt += "Filtered unique titles:\n"
-
-
-    print(prompt)
-    # Call the GPT API
-    response = openai.Completion.create(
-        engine="text-davinci-002",
-        prompt=prompt,
-        max_tokens=1000,
-        n=1,
-        stop=None,
-        temperature=0.7,
-    )
-
-
-
-    # Extract the filtered titles
-    filtered_titles = response.choices[0].text.strip().split("\n")
-
-    print(filtered_titles)
-    # Clean up and return the titles
-    return [title.strip() for title in filtered_titles if title.strip()]
-
-
 def get_todays_events():
    # Check if the 'todays_events' key exists
    todays_events = redis_client.get('todays_events')
@@ -170,23 +108,7 @@ def get_todays_events():
        # If the key exists, return its value
        return todays_events.decode('utf-8')
    else:
-        # If the key doesn't exist, extract titles from the URLs and filter unique titles
-        urls = ['https://www.klix.ba', 'https://www.avaz.ba']
-        titles = extract_titles(urls)
-        unique_titles = filter_unique_titles(titles)
-
-        # Convert the unique titles list to a string separated by newline
-        todays_events_str = "\n".join(unique_titles)
-
-        # Save the result to Redis with a 10-minute expiration time
-        redis_client.set('todays_events', todays_events_str, ex=600)
-
-        # return the result but split by newline, and then choose 7 random titles, and merge again into newline separated string
-        return "\n".join(random.sample(todays_events_str.split("\n"), 7))
-
-
-
-
+        return ""


 if __name__ == '__main__':