Current events.

2023-05-07 18:10:39 +02:00
parent 98810b975c
commit 15fcc31d85
1 changed files with 101 additions and 0 deletions
--- a/backend/app.py
+++ b/backend/app.py
@@ -7,6 +7,8 @@ from flask import Flask, jsonify, request
 from flask_cors import CORS
 import string
 import pickle
+import requests
+from bs4 import BeautifulSoup

 load_dotenv()

@@ -34,6 +36,8 @@ def generate_token():
@app.route('/api/session', methods=['GET'])
 def get_session():
    token = generate_token()
+
+    todays_events = get_todays_events()
    context = dict(conversation=[{
          "role": "system",
          "content": "You are a Bosnian girl. You never speak any other language - always respond in Bosnian even when asked explicitly. If you don't understand someone just tell them 'Ne kontam. Mozes li bolje to napisati?' or some other variant but always in bosnian. Do not speak English! Ever!"
@@ -43,6 +47,9 @@ def get_session():
                     "Don't try to be helpful. Act like you need to be entertained, and if the user is polite and entertaining then gradually open up in conversation."
                     "Assume user sees a picture of your face and hair. You have green eyes. You have brown hair. Your smile is nice. Your nose is a bit big and you feel self conscious about it. "
                     "Always use šta as a word for what. Use babo to refer to fathers. Use mama to refer to mothers. Use nana to refer to grandmothers. "
+                    f"If user wants to talk about current events here is what happened today:\n{todays_events}\n. Never just list the events even if user asks you to - mention one to three events at a time."
+
+
      }], violated=False)
    redis_client.set(token, pickle.dumps(context), ex=864000)  # Store the context with a 10 days expiration.
    return jsonify(token=token)
@@ -89,6 +96,100 @@ def chat():

    return jsonify(reply=reply)

+
+import requests
+from bs4 import BeautifulSoup
+
+def extract_titles(urls):
+    titles = []
+
+    # Set the User-Agent to Chrome on Windows
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
+    }
+
+    for url in urls:
+        try:
+            response = requests.get(url, headers=headers)
+            soup = BeautifulSoup(response.content, 'html.parser')
+
+            for tag in ['h1', 'h2', 'h3']:
+                headers = soup.find_all(tag)
+
+                for header in headers:
+                    titles.append(header.text.strip())
+        except Exception as e:
+            print(f"Error processing URL {url}: {e}")
+
+    return titles
+
+def filter_out_titles_with_duplicate_meanings(titles):
+    filtered_titles = []
+
+    for title in titles:
+        if title not in filtered_titles:
+            filtered_titles.append(title)
+
+    return filtered_titles
+
+
+def filter_unique_titles(titles):
+    # Prepare the prompt
+    prompt = "Filter the following titles to include only unique topics, preferring longer titles when collisions are found:\n"
+    for title in titles:
+        prompt += f"- {title}\n"
+
+    prompt += "Filtered unique titles:\n"
+
+
+    print(prompt)
+    # Call the GPT API
+    response = openai.Completion.create(
+        engine="text-davinci-002",
+        prompt=prompt,
+        max_tokens=1000,
+        n=1,
+        stop=None,
+        temperature=0.7,
+    )
+
+
+
+    # Extract the filtered titles
+    filtered_titles = response.choices[0].text.strip().split("\n")
+
+    print(filtered_titles)
+    # Clean up and return the titles
+    return [title.strip() for title in filtered_titles if title.strip()]
+
+
+def get_todays_events():
+    # Check if the 'todays_events' key exists
+    todays_events = redis_client.get('todays_events')
+
+    if todays_events:
+        # If the key exists, return its value
+        return todays_events.decode('utf-8')
+    else:
+        # If the key doesn't exist, extract titles from the URLs and filter unique titles
+        urls = ['https://www.klix.ba', 'https://www.avaz.ba']
+        titles = extract_titles(urls)
+        unique_titles = filter_unique_titles(titles)
+
+        # Convert the unique titles list to a string separated by newline
+        todays_events_str = "\n".join(unique_titles)
+
+        # Save the result to Redis with a 10-minute expiration time
+        redis_client.set('todays_events', todays_events_str, ex=600)
+
+        # return the result but split by newline, and then choose 7 random titles, and merge again into newline separated string
+        return "\n".join(random.sample(todays_events_str.split("\n"), 7))
+
+
+
+
+
+
 if __name__ == '__main__':
    app.run(debug=True, port=3001)