2024-01-02 15:00:07 +01:00
import unittest
from unittest . mock import patch
import requests
from bs4 import BeautifulSoup
from langchain . embeddings import OpenAIEmbeddings
from langchain . vectorstores . pgvector import PGVector
from openai import OpenAI
import json
from dotenv import load_dotenv
2024-01-29 14:55:20 +01:00
from pyth . get_articles import get_article_links , insert_data , is_similar_data
2024-01-02 15:00:07 +01:00
import os
load_dotenv ( )
OPENAI_API_KEY = os . getenv ( " OPENAI_API_KEY " )
client = OpenAI ( )
embeddings = OpenAIEmbeddings ( )
already_checked = set ( )
total_links = set ( )
collected_news = set ( )
dlinks = ' http://127.0.0.1:5000/ '
class TestIntegration ( unittest . TestCase ) :
def test_integration ( self ) :
link = get_article_links ( dlinks , already_checked )
self . assertEqual ( len ( already_checked ) , 2 )
for link in total_links :
response = requests . get ( link )
soup = BeautifulSoup ( response . text , ' html.parser ' )
titles = soup . find_all ( [ ' h2 ' , ' h1 ' , ' h3 ' ] )
title_text = ' ' . join ( [ title . get_text ( strip = True ) for title in titles ] )
texts = soup . find_all ( [ ' p ' ] )
text_text = ' ' . join ( [ text . get_text ( strip = True ) for text in texts ] )
completion = client . chat . completions . create (
model = " gpt-3.5-turbo " ,
messages = [
{ " role " : " system " , " content " : " Data analytic, Journalist and News reporter " } ,
{ " role " : " user " , " content " : rf " Extract relevant information from the following input: Title: { title_text } , Text: { text_text } . Remove any non-news element related to the current text and title, and provide the cleaned data as a JSON object with ' title ' and ' content ' fields. " }
]
)
generated_text = completion . choices [ 0 ] . message . content
response_data = json . loads ( generated_text )
title = response_data [ " title " ]
text = response_data [ " content " ]
vector = embeddings . embed_query ( generated_text )
self . assertIn ( " Test Title " , title )
self . assertIn ( " Test Text " , text )
self . assertEqual ( len ( total_links ) , 2 )