Add add events script
This commit is contained in:
90
backend/add_events.py
Normal file
90
backend/add_events.py
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import openai
|
||||||
|
import redis
|
||||||
|
import random
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||||
|
redis_url = os.getenv("REDIS_URL")
|
||||||
|
redis_client = redis.from_url(redis_url)
|
||||||
|
|
||||||
|
def add_current_events():
|
||||||
|
# If the key doesn't exist, extract titles from the URLs and filter unique titles
|
||||||
|
urls = ['https://www.klix.ba', 'https://www.avaz.ba']
|
||||||
|
titles = extract_titles(urls)
|
||||||
|
unique_titles = filter_unique_titles(titles)
|
||||||
|
# Convert the unique titles list to a string separated by newline
|
||||||
|
todays_events_str = "\n".join(unique_titles)
|
||||||
|
# Save the result to Redis with a 10-minute expiration time
|
||||||
|
redis_client.set('todays_events', todays_events_str, ex=600)
|
||||||
|
# return the result but split by newline, and then choose 7 random titles, and merge again into newline separated string
|
||||||
|
return "\n".join(random.sample(todays_events_str.split("\n"), 7))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def extract_titles(urls):
|
||||||
|
titles = []
|
||||||
|
|
||||||
|
# Set the User-Agent to Chrome on Windows
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
||||||
|
}
|
||||||
|
|
||||||
|
for url in urls:
|
||||||
|
try:
|
||||||
|
response = requests.get(url, headers=headers)
|
||||||
|
soup = BeautifulSoup(response.content, 'html.parser')
|
||||||
|
|
||||||
|
for tag in ['h1', 'h2', 'h3']:
|
||||||
|
headers = soup.find_all(tag)
|
||||||
|
|
||||||
|
for header in headers:
|
||||||
|
titles.append(header.text.strip())
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing URL {url}: {e}")
|
||||||
|
|
||||||
|
return titles
|
||||||
|
|
||||||
|
def filter_out_titles_with_duplicate_meanings(titles):
|
||||||
|
filtered_titles = []
|
||||||
|
|
||||||
|
for title in titles:
|
||||||
|
if title not in filtered_titles:
|
||||||
|
filtered_titles.append(title)
|
||||||
|
|
||||||
|
return filtered_titles
|
||||||
|
|
||||||
|
|
||||||
|
def filter_unique_titles(titles):
|
||||||
|
# Prepare the prompt
|
||||||
|
prompt = "Filter the following titles to include only unique topics, preferring longer titles when collisions are found:\n"
|
||||||
|
for title in titles:
|
||||||
|
prompt += f"- {title}\n"
|
||||||
|
|
||||||
|
prompt += "Filtered unique titles:\n"
|
||||||
|
|
||||||
|
|
||||||
|
print(prompt)
|
||||||
|
# Call the GPT API
|
||||||
|
response = openai.Completion.create(
|
||||||
|
engine="text-davinci-002",
|
||||||
|
prompt=prompt,
|
||||||
|
max_tokens=1000,
|
||||||
|
n=1,
|
||||||
|
stop=None,
|
||||||
|
temperature=0.7,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Extract the filtered titles
|
||||||
|
filtered_titles = response.choices[0].text.strip().split("\n")
|
||||||
|
|
||||||
|
print(filtered_titles)
|
||||||
|
# Clean up and return the titles
|
||||||
|
return [title.strip() for title in filtered_titles if title.strip()]
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
add_current_events()
|
||||||
@@ -22,6 +22,8 @@ cors = CORS(app, resources={
|
|||||||
"http://pitajramizu.com",
|
"http://pitajramizu.com",
|
||||||
"http://www.pitajramizu.com",
|
"http://www.pitajramizu.com",
|
||||||
"https://c50a-77-77-231-127.ngrok-free.app"
|
"https://c50a-77-77-231-127.ngrok-free.app"
|
||||||
|
"https://pitajramizu.com",
|
||||||
|
"https://www.pitajramizu.com",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -98,70 +100,6 @@ def chat():
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def extract_titles(urls):
|
|
||||||
titles = []
|
|
||||||
|
|
||||||
# Set the User-Agent to Chrome on Windows
|
|
||||||
headers = {
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
|
||||||
}
|
|
||||||
|
|
||||||
for url in urls:
|
|
||||||
try:
|
|
||||||
response = requests.get(url, headers=headers)
|
|
||||||
soup = BeautifulSoup(response.content, 'html.parser')
|
|
||||||
|
|
||||||
for tag in ['h1', 'h2', 'h3']:
|
|
||||||
headers = soup.find_all(tag)
|
|
||||||
|
|
||||||
for header in headers:
|
|
||||||
titles.append(header.text.strip())
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error processing URL {url}: {e}")
|
|
||||||
|
|
||||||
return titles
|
|
||||||
|
|
||||||
def filter_out_titles_with_duplicate_meanings(titles):
|
|
||||||
filtered_titles = []
|
|
||||||
|
|
||||||
for title in titles:
|
|
||||||
if title not in filtered_titles:
|
|
||||||
filtered_titles.append(title)
|
|
||||||
|
|
||||||
return filtered_titles
|
|
||||||
|
|
||||||
|
|
||||||
def filter_unique_titles(titles):
|
|
||||||
# Prepare the prompt
|
|
||||||
prompt = "Filter the following titles to include only unique topics, preferring longer titles when collisions are found:\n"
|
|
||||||
for title in titles:
|
|
||||||
prompt += f"- {title}\n"
|
|
||||||
|
|
||||||
prompt += "Filtered unique titles:\n"
|
|
||||||
|
|
||||||
|
|
||||||
print(prompt)
|
|
||||||
# Call the GPT API
|
|
||||||
response = openai.Completion.create(
|
|
||||||
engine="text-davinci-002",
|
|
||||||
prompt=prompt,
|
|
||||||
max_tokens=1000,
|
|
||||||
n=1,
|
|
||||||
stop=None,
|
|
||||||
temperature=0.7,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Extract the filtered titles
|
|
||||||
filtered_titles = response.choices[0].text.strip().split("\n")
|
|
||||||
|
|
||||||
print(filtered_titles)
|
|
||||||
# Clean up and return the titles
|
|
||||||
return [title.strip() for title in filtered_titles if title.strip()]
|
|
||||||
|
|
||||||
|
|
||||||
def get_todays_events():
|
def get_todays_events():
|
||||||
# Check if the 'todays_events' key exists
|
# Check if the 'todays_events' key exists
|
||||||
todays_events = redis_client.get('todays_events')
|
todays_events = redis_client.get('todays_events')
|
||||||
@@ -170,23 +108,7 @@ def get_todays_events():
|
|||||||
# If the key exists, return its value
|
# If the key exists, return its value
|
||||||
return todays_events.decode('utf-8')
|
return todays_events.decode('utf-8')
|
||||||
else:
|
else:
|
||||||
# If the key doesn't exist, extract titles from the URLs and filter unique titles
|
return ""
|
||||||
urls = ['https://www.klix.ba', 'https://www.avaz.ba']
|
|
||||||
titles = extract_titles(urls)
|
|
||||||
unique_titles = filter_unique_titles(titles)
|
|
||||||
|
|
||||||
# Convert the unique titles list to a string separated by newline
|
|
||||||
todays_events_str = "\n".join(unique_titles)
|
|
||||||
|
|
||||||
# Save the result to Redis with a 10-minute expiration time
|
|
||||||
redis_client.set('todays_events', todays_events_str, ex=600)
|
|
||||||
|
|
||||||
# return the result but split by newline, and then choose 7 random titles, and merge again into newline separated string
|
|
||||||
return "\n".join(random.sample(todays_events_str.split("\n"), 7))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
Reference in New Issue
Block a user