Combine similar article

2024-01-02 15:00:07 +01:00
parent fff1c94a3d
commit ae1c1902da
15 changed files with 726 additions and 39 deletions
--- a/pyth/.env
+++ b/pyth/.env
@@ -0,0 +1,7 @@
 OPENAI_API_KEY = "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" 
 DB_HOST =localhost
 DB_PORT =5432
 DB_USER =postgres
 DB_PASSWORD =salmonela pljusti 221 hamo
 DB_NAME =svevijestiweb
--- a/pyth/.gitlab-ci.yml
+++ b/pyth/.gitlab-ci.yml
@@ -0,0 +1,21 @@
 stages:
  - test
 variables:
 before_script:
  - pip install -r requirements.txt 
 test_file1:
  stage: test
  script:
    - python -m pytest tests/test_scrapingsingle.py
  only:
    - master
 test_file2:
  stage: test
  script:
    - python -m pytest tests/test_vectData.py
  only:
    - master
--- a/pyth/pycache/scrapingsingle.cpython-310.pyc
+++ b/pyth/pycache/scrapingsingle.cpython-310.pyc
--- a/pyth/pycache/vectData.cpython-310.pyc
+++ b/pyth/pycache/vectData.cpython-310.pyc
--- a/pyth/requirements.txt
+++ b/pyth/requirements.txt
@@ -0,0 +1,141 @@
 aiohttp==3.9.1
 aiosignal==1.3.1
 annotated-types==0.6.0
 anyio==4.2.0
 apturl==0.5.2
 async-timeout==4.0.3
 attrs==23.1.0
 beautifulsoup4==4.12.2
 blinker==1.7.0
 blis==0.7.11
 Brlapi==0.8.3
 catalogue==2.0.10
 certifi==2020.6.20
 chardet==4.0.0
 charset-normalizer==3.3.2
 click==8.1.7
 cloudpathlib==0.16.0
 colorama==0.4.4
 command-not-found==0.3
 confection==0.1.4
 cryptography==3.4.8
 cupshelpers==1.0
 cymem==2.0.8
 dataclasses-json==0.6.3
 DateTime==5.4
 dbus-python==1.2.18
 decorator==4.4.2
 defer==1.0.6
 distro==1.7.0
 distro-info==1.1+ubuntu0.1
 en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
 exceptiongroup==1.2.0
 Flask==3.0.0
 Flask-Cors==4.0.0
 frozenlist==1.4.1
 greenlet==1.1.2
 gyp==0.1
 h11==0.14.0
 httpcore==1.0.2
 httplib2==0.20.2
 httpx==0.25.2
 idna==3.3
 importlib-metadata==4.6.4
 itsdangerous==2.1.2
 jeepney==0.7.1
 Jinja2==3.1.2
 joblib==1.3.2
 jsonpatch==1.33
 jsonpointer==2.4
 keyring==23.5.0
 langchain==0.0.352
 langchain-community==0.0.6
 langchain-core==0.1.3
 langcodes==3.3.0
 langsmith==0.0.74
 language-selector==0.1
 launchpadlib==1.10.16
 lazr.restfulclient==0.14.4
 lazr.uri==1.0.6
 louis==3.20.0
 macaroonbakery==1.3.1
 MarkupSafe==2.1.3
 marshmallow==3.20.1
 more-itertools==8.10.0
 multidict==6.0.4
 murmurhash==1.0.10
 mypy-extensions==1.0.0
 netifaces==0.11.0
 numpy==1.26.2
 oauthlib==3.2.0
 olefile==0.46
 openai==1.5.0
 packaging==23.2
 pbr==5.8.0
 pexpect==4.8.0
 pgvector==0.2.4
 Pillow==9.0.1
 preshed==3.0.9
 protobuf==3.12.4
 psycopg==3.1.15
 psycopg2-binary==2.9.9
 ptyprocess==0.7.0
 pycairo==1.20.1
 pycups==2.0.1
 pydantic==2.5.2
 pydantic_core==2.14.5
 PyGObject==3.42.1
 PyJWT==2.3.0
 pymacaroons==0.13.0
 PyNaCl==1.5.0
 pyparsing==2.4.7
 pyRFC3339==1.1
 python-apt==2.4.0+ubuntu2
 python-dateutil==2.8.1
 python-debian==0.1.43+ubuntu1.1
 python-dotenv==1.0.0
 pytz==2022.1
 pyxdg==0.27
 PyYAML==5.4.1
 regex==2023.10.3
 reportlab==3.6.8
 requests==2.31.0
 scikit-learn==1.3.2
 scipy==1.11.4
 SecretStorage==3.3.1
 six==1.16.0
 slugify==0.0.1
 smart-open==6.4.0
 sniffio==1.3.0
 soupsieve==2.5
 spacy==3.7.2
 spacy-legacy==3.0.12
 spacy-loggers==1.0.5
 SQLAlchemy==1.4.31
 sqlalchemy-migrate==0.13.0
 sqlparse==0.4.2
 srsly==2.4.8
 systemd-python==234
 Tempita==0.5.2
 tenacity==8.2.3
 thinc==8.2.2
 threadpoolctl==3.2.0
 tiktoken==0.5.2
 tqdm==4.66.1
 typer==0.9.0
 typing-inspect==0.9.0
 typing_extensions==4.9.0
 ubuntu-advantage-tools==8001
 ubuntu-drivers-common==0.0.0
 ufw==0.36.1
 unattended-upgrades==0.1
 urllib3==1.26.5
 wadllib==1.3.6
 wasabi==1.1.2
 weasel==0.3.4
 Werkzeug==3.0.1
 xdg==5
 xkit==0.0.0
 yarl==1.9.4
 zipp==1.0.0
 zope.interface==6.1
--- a/pyth/scrapingsingle.py
+++ b/pyth/scrapingsingle.py
@@ -1,15 +1,20 @@
 from bs4 import BeautifulSoup
 import requests
 from urllib.parse import urljoin
-from openai import OpenAI
+from openai import OpenAI , APIError 
 import os
 from langchain.embeddings import OpenAIEmbeddings
-from langchain.vectorstores.pgvector import PGVector
+from vectData import (insert_data ,is_similar_data ,get_similar, get_specific_data, delete_specific,get_all_links,cleansing ,modify_similar_data)
 from vectData import insert_data ,is_similar_data 
 import json
 from dotenv import load_dotenv
 import tiktoken
-os.environ["OPENAI_API_KEY"] = "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"
+load_dotenv()
 cleansing()
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 client = OpenAI()
 embeddings = OpenAIEmbeddings()
@@ -17,9 +22,36 @@ dlinks = ['https://klix.ba', 'https://srpskainfo.com', 'https://bljesak.info']
 headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1.1; SM-G928X Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83 Mobile Safari/537.36'}
 def num_tokens_from_string(string: str, model="gpt-3.5-turbo") -> int:
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(string))
 def slice_text_at_2k_tokens(text):
    encoding_name = "gpt-3.5-turbo"
    max_tokens = 2000
    encoding = tiktoken.encoding_for_model(encoding_name)
    tokens = encoding.encode(text)
    if len(tokens) <= max_tokens:
        return [text] 
    sliced_tokens = tokens[:max_tokens]
    sliced_text = encoding.decode(sliced_tokens)
    return sliced_text
 def replace_with_spaces(text):
    allowed_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzČčĆćDždžĐđŠšŽž0123456789 "
    cleaned_text = ''.join(char if char in allowed_chars else ' ' for char in text)
    return cleaned_text
 total_links = set()
 collected_news = set()
 def get_article_links(url, already_checked):
    response = requests.get(url,headers)
    if response.status_code == 200:
@@ -36,6 +68,8 @@ def get_article_links(url, already_checked):
                    already_checked.add(link_value)
        return link_store
 already_checked = set()
 for dlink in dlinks:
@@ -44,8 +78,17 @@ for dlink in dlinks:
        total_links.update(temp_links)
 final_links = {item for item in total_links if item}
 i = 0 
-for link in final_links:
+db_links = set(get_all_links())
 new_links = final_links - db_links
 final_links = new_links
 if __name__ == '__main__':
 for link in final_links:
    response = requests.get(link,headers)
    soup = BeautifulSoup(response.text, 'html.parser')
@@ -54,6 +97,16 @@ for link in final_links:
    texts = soup.find_all(['p'])
    text_text = ' '.join([text.get_text(strip=True) for text in texts])
    text_text = text_text
    title_text = title_text
    title_text = replace_with_spaces(title_text)
    print(f"Tokens usage: {num_tokens_from_string(text_text, 'gpt-3.5-turbo')}")
    text_text = slice_text_at_2k_tokens(text_text)
    text_text = replace_with_spaces(str(text_text))
    try:
        completion = client.chat.completions.create(
@@ -65,23 +118,130 @@ for link in final_links:
        )
        generated_text = completion.choices[0].message.content
        generated_text = generated_text
        response_data = json.loads(generated_text)
        title = response_data["title"]
        text = response_data["content"]
-        print("*********************************")
+        #print("*********************************")
-        print(f"Title: {title}")
+        #print(f"Title: {title}")
-        print("---------------------------------")
+        #print("---------------------------------")
-        print(f"Content : {text}")
+        #print(f"Content : {text}")
-        print("*********************************")
+        #print("*********************************")
        vector = embeddings.embed_query(generated_text)
        if not is_similar_data(title, text, link, vector, threshold=0.9):
         insert_data(title, text, link, vector)
        if not is_similar_data(title, text, link, vector, threshold=0.98):
         similar_d = "NO"
         insert_data(title, text, link, vector,similar_d)
    except Exception as e:
        print(f"Error in completion: {e}")
        continue
 def comb_similar():
    print("Checking similar")
    similar_article = get_similar()
    grouped_data = {}
    for sa in similar_article:
        if similar_article:
            first_t = get_specific_data(sa[0])
            second_t = get_specific_data(sa[1])
            link_f = first_t[0][2]
            link_s = second_t[0][2]
            f_text = first_t[0][1]
            s_text = second_t[0][1]
            f_title = first_t[0][0]
            s_title = second_t[0][0]
            if f_title in grouped_data:
                grouped_data[f_title].append((f_text, link_f))
            else:
                grouped_data[f_title] = [(f_text, link_f)]
            if s_title in grouped_data:
                  grouped_data[s_title].append((s_text, link_s))
            else:
                 grouped_data[s_title] = [(s_text, link_s)]
            for title, tuples in grouped_data.items():
                if len(tuples) == 3:
                    text1, link1 = tuples[0]
                    text2, link2 = tuples[1]
                    text3, link3 = tuples[2]
                    t1check = num_tokens_from_string(text1)
                    t2check = num_tokens_from_string(text2)
                    t3check = num_tokens_from_string(text3)
                    slice_if_more = t1check,t2check,t3check
                    if slice_if_more < 2000:
                        combined_text = f"{text1}{text2}{text3}"
                        combined_text = slice_text_at_2k_tokens(combined_text)
                        user_message = rf"Here is text {combined_text}, combined from 3 sources, filter text, and make news content, return as JSON only with 'content' field"
                        link = f"{link1} {link2} {link3}"
                    else:
                        user_message = rf"Here are 3 texts {text1} {text2} and {text3}, combine the following texts into a cohesive news remove any non-news related to both texts and provide the cleaned data as a JSON only with 'content' field."
                        link = f"{link1} {link2} {link3}"
                else:
                    ftcheck = num_tokens_from_string(f_text)
                    stcheck = num_tokens_from_string(s_text)
                    fscomb = ftcheck + stcheck
                    if fscomb <2000:
                        combined_text = f"{f_text}{s_text}"
                        user_message = rf"Here is text {combined_text}, combined from 2 sources, filter text, and make news content, return as JSON only with 'content' field"
                        link = f"{link_f} {link_s}"
                    else:
                        user_message = rf"Here are 2 texts {f_text} and {s_text}, combine the following texts into a cohesive news remove any non-news related to both texts and provide the cleaned data as a JSON only with 'content' field."
                        link = f"{link_f} {link_s}"
            try:
                completion = client.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=[
                        {"role": "system", "content": "Data analytic, Journalist and News reporter"},
                        {"role": "user", "content": user_message}
                    ]
                )
                generated_text = completion.choices[0].message.content
                generated_text = generated_text
                if similar_article:
                    if f_title == s_title:
                        print(f_title)
                        modify_similar_data(first_t,"SOURCE")
                        similar_article.remove(sa)
                        print("Modified")
                    else:
                        print(f"Second: {s_title}")
                        modify_similar_data(first_t,"SOURCE")
                        modify_similar_data(second_t,"SOURCE")
                        similar_article.remove(sa)
                        print("Modified")
                else:
                    print("Similar list is empty")
                response_data = json.loads(generated_text)
                title = f_title
                text = response_data["content"]
                vector = embeddings.embed_query(generated_text)
                if not is_similar_data(title, text, link, vector, threshold=0.98):
                    similar_d = "NO"
                    insert_data(title, text, link, vector, similar_d)
            except Exception as e:
                print(f"Error in completion: {e}")
                continue
 comb_similar()
--- a/pyth/templates/index.html
+++ b/pyth/templates/index.html
@@ -0,0 +1,23 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Test Pyth</title>
 </head>
 <body>
    <div>
        <article>
            <h2>Test Title 1</h2>
            <p>Test Text 1</p>
            <a href="/article/one"> First</a>
        </article>
        <article>
            <h2>Test Title 2</h2>
            <p>Test Text 2</p>
            <a href="/article/two">Second</a>
        </article>
    </div>
 </body>
 </html>
--- a/pyth/templates/one.html
+++ b/pyth/templates/one.html
@@ -0,0 +1,12 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Article</title>
 </head>
 <body>
    <h2>Test Title</h2>
    <p>Test Text</p>
 </body>
 </html>
--- a/pyth/templates/two.html
+++ b/pyth/templates/two.html
@@ -0,0 +1,12 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Article</title>
 </head>
 <body>
    <h2>Test Title</h2>
    <p>Test Text</p>
 </body>
 </html>
--- a/pyth/tests/pycache/test_scrapingsingle.cpython-310.pyc
+++ b/pyth/tests/pycache/test_scrapingsingle.cpython-310.pyc
--- a/pyth/tests/pycache/test_vectData.cpython-310.pyc
+++ b/pyth/tests/pycache/test_vectData.cpython-310.pyc
--- a/pyth/tests/test_scrapingsingle.py
+++ b/pyth/tests/test_scrapingsingle.py
@@ -0,0 +1,60 @@
 import unittest
 from unittest.mock import patch
 import requests
 from bs4 import BeautifulSoup
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.vectorstores.pgvector import PGVector
 from openai import OpenAI
 import json
 from dotenv import load_dotenv
 from scrapingsingle import get_article_links, insert_data, is_similar_data
 import os
 load_dotenv()
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 client = OpenAI()
 embeddings = OpenAIEmbeddings()
 already_checked = set()
 total_links = set()
 collected_news = set()
 dlinks = 'http://127.0.0.1:5000/'
 class TestIntegration(unittest.TestCase):
    def test_integration(self):
        link = get_article_links(dlinks,already_checked)
        self.assertEqual(len(already_checked), 2)
        for link in total_links:
            response = requests.get(link)
            soup = BeautifulSoup(response.text, 'html.parser')
            titles = soup.find_all(['h2', 'h1', 'h3'])
            title_text = ' '.join([title.get_text(strip=True) for title in titles])
            texts = soup.find_all(['p'])
            text_text = ' '.join([text.get_text(strip=True) for text in texts])
            completion = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "Data analytic, Journalist and News reporter"},
                    {"role": "user", "content": rf"Extract relevant information from the following input: Title: {title_text}, Text: {text_text}. Remove any non-news element related to the current text and title, and provide the cleaned data as a JSON object with 'title' and 'content' fields."}
                ]
            )
            generated_text = completion.choices[0].message.content
            response_data = json.loads(generated_text)
            title = response_data["title"]
            text = response_data["content"]
            vector = embeddings.embed_query(generated_text)
            self.assertIn("Test Title", title)
            self.assertIn("Test Text", text)
            self.assertEqual(len(total_links), 2)
--- a/pyth/tests/test_vectData.py
+++ b/pyth/tests/test_vectData.py
@@ -0,0 +1,89 @@
 import unittest
 import numpy as np
 import psycopg2
 import os
 from vectData import calculate_cosine_similarity, is_similar_data, insert_data, get_data, create_db
 class TestIntegration(unittest.TestCase):
    host = os.getenv("DB_HOST")
    port = os.getenv("DB_PORT")
    user = os.getenv("DB_USER")
    password = os.getenv("DB_PASSWORD")
    dbname = os.getenv("DB_NAME")
    @classmethod
    def setUpClass(cls):
        cls.host = os.getenv("DB_HOST")
        cls.port = os.getenv("DB_PORT")
        cls.user = os.getenv("DB_USER")
        cls.password = os.getenv("DB_PASSWORD")
        cls.dbname = os.getenv("DB_NAME")
        cls.conn = psycopg2.connect(
            host=cls.host,
            port=cls.port,
            user=cls.user,
            password=cls.password,
            dbname=cls.dbname
        )
        create_db(cls.conn)
    @classmethod
    def tearDownClass(cls):
        cls.conn.close()
    def setUp(self):
        if self.conn.closed:
            self.conn = psycopg2.connect(
                host=self.host,
                port=self.port,
                user=self.user,
                password=self.password,
                dbname=self.dbname
            )
        self.cursor = self.conn.cursor()
    def tearDown(self):
        if not self.cursor.closed:
            self.cursor.close()
        if not self.conn.closed:
            self.conn.close()
    def test_insert_and_retrieve_data(self):
        title = 'test_title'
        text = 'test_text'
        link = 'test_link'
        embedding = np.arange(1, 1537)
        insert_data(title, text, link, embedding)
        data = get_data()
        self.assertEqual(data, [(title, text, link)])
    def test_is_similar_data_integration(self):
        title = 'test_title'
        text = 'test_text'
        link = 'test_link'
        embedding = np.arange(1, 1537)
        insert_data(title, text, link, embedding)
        result = is_similar_data(title, text, link, embedding)
        self.assertTrue(result)
        result = is_similar_data(title, text, link, embedding)
        self.assertTrue(result)
        result = is_similar_data(title, text, link, embedding)
        self.assertTrue(result)
    def test_create_db_integration(self):
        cursor = self.conn.cursor()
        cursor.execute("SELECT * FROM information_schema.tables WHERE table_name = 'vectorsvevijesti'")
        table_exist = bool(cursor.fetchone())
        self.assertTrue(table_exist)
 if __name__ == '__main__':
    unittest.main()
--- a/pyth/vectData.py
+++ b/pyth/vectData.py
@@ -3,12 +3,26 @@ from psycopg2 import sql
 from pgvector.psycopg2 import register_vector
 from sklearn.metrics.pairwise import cosine_similarity
 import numpy as np
 import os
 from dotenv import load_dotenv
 from datetime import datetime ,timedelta
-host = 'localhost'
+
-port = '5432'
+load_dotenv()
-user = 'postgres'
+
-password = 'salmonela pljusti 221 hamo'
+host = os.getenv("DB_HOST")
-dbname = 'vector_svw'
+port = os.getenv("DB_PORT")
 user = os.getenv("DB_USER")
 password = os.getenv("DB_PASSWORD")
 dbname = os.getenv("DB_NAME")
 conn = psycopg2.connect(
        host=host,
        port=port,
        user=user,
        password=password,
        dbname=dbname
    )
 def calculate_cosine_similarity(v1, v2):
    v1_normalized = v1 / np.linalg.norm(v1)
@@ -17,7 +31,7 @@ def calculate_cosine_similarity(v1, v2):
    similarity = cosine_similarity([v1_normalized], [v2_normalized])[0][0]
    return similarity
-def is_similar_data(title, text, link, embedding, threshold=0.9):
+def is_similar_data(title, text, link, embedding, threshold=0.98):
    conn = psycopg2.connect(
        host=host,
        port=port,
@@ -27,25 +41,33 @@ def is_similar_data(title, text, link, embedding, threshold=0.9):
    )
    cursor = conn.cursor()
-    cursor.execute('SELECT title ,embedding FROM vectorsvevijesti;')
+    cursor.execute('SELECT title,embedding,link FROM vectorsvevijesti;')
    existing_embeddings = cursor.fetchall()
    for existing_embedding_tuple in existing_embeddings:
        existing_title = existing_embedding_tuple[0]
        existing_embedding = np.array(existing_embedding_tuple[1]).flatten()
        existing_link = existing_embedding_tuple[2]
        similarity = calculate_cosine_similarity(existing_embedding, embedding)
        if similarity > threshold:
-            print(f"Similar data found: \n #{title} \n #{existing_title}")
+            if link != existing_link:
-            cursor.close()
+                similar_d = existing_title
-            conn.close()
+                insert_data(title,text,link,embedding,similar_d)
-            return True
+                print(f"Similar data found: \n #{title} \n #{existing_title}")
                print(f"Inserting: #{title} \n")
                similar_d = "NO"
                cursor.close()
                return True
            else:
                print(f"Same source of same article!")
                cursor.close()
                return True
    print(f"Inserting: #{title}")
    cursor.close()
    conn.close()
    return False
-def insert_data(title, text, link, embedding):
+def get_similar():
    conn = psycopg2.connect(
        host=host,
        port=port,
@@ -53,17 +75,35 @@ def insert_data(title, text, link, embedding):
        password=password,
        dbname=dbname
    )
    cursor = conn.cursor()
    query = '''SELECT title,similar_d FROM vectorsvevijesti WHERE similar_d NOT IN ('NO', 'SOURCE')'''
    cursor.execute(query)
    similar_data = cursor.fetchall()
    cursor.close()
    return similar_data
 def insert_data(title, text, link, embedding, similar_d):
    conn = psycopg2.connect(
        host=host,
        port=port,
        user=user,
        password=password,
        dbname=dbname
    )
    c_time = datetime.now()
    cursor = conn.cursor()
    cursor.execute('''
-        INSERT INTO vectorsvevijesti (title, text, link, embedding)
+        INSERT INTO vectorsvevijesti (title, text, link, embedding, similar_d, time)
-        VALUES (%s, %s, %s, %s);
+        VALUES (%s, %s, %s, %s, %s ,%s);
-    ''', (title, text, link, embedding))
+    ''', (title, text, link, embedding , similar_d, c_time))
    conn.commit()
    cursor.close()
    conn.close()
 def get_data():
    conn = psycopg2.connect(
@@ -79,11 +119,110 @@ def get_data():
    cursor.execute(query)
    data = cursor.fetchall()
    cursor.close()
    conn.close()
    return data
-def create_db():
+def modify_similar_data(new_value ,title):
    conn = psycopg2.connect(
        host=host,
        port=port,
        user=user,
        password=password,
        dbname=dbname
    ) 
    cursor = conn.cursor()
    query = '''UPDATE vectorsvevijesti SET similar_d = %s WHERE title = %s '''
    cursor.execute(query, (new_value, title))
    conn.commit()
 def get_specific_data(title):
    conn = psycopg2.connect(
        host=host,
        port=port,
        user=user,
        password=password,
        dbname=dbname
    )
    cursor = conn.cursor()
    query = '''SELECT title, text, link, similar_d, embedding FROM vectorsvevijesti WHERE title = %s'''
    cursor.execute(query, (title,))
    specific_post = cursor.fetchall()
    cursor.close()
    return specific_post
 def get_all_links():
    conn = psycopg2.connect(
        host=host,
        port=port,
        user=user,
        password=password,
        dbname=dbname
    )
    cursor = conn.cursor()
    query = '''SELECT link FROM vectorsvevijesti'''
    cursor.execute(query)
    db_links = {link[0] for link in cursor.fetchall()}
    cursor.close()
    return db_links
 def delete_specific(title):
    conn = psycopg2.connect(
        host=host,
        port=port,
        user=user,
        password=password,
        dbname=dbname
    )
    cursor = conn.cursor()
    query = '''DELETE FROM vectorsvevijesti WHERE title = %s'''
    cursor.execute(query,(title,))
    cursor.close()
 def cleansing():
    conn = psycopg2.connect(
        host=host,
        port=port,
        user=user,
        password=password,
        dbname=dbname
    )
    day_long = datetime.now() - timedelta(days=1)
    cursor = conn.cursor()
    query = '''DELETE FROM vectorsvevijesti WHERE time < %s'''
    cursor.execute(query,(day_long,))
    conn.commit()
    cursor.close()
 def drop_table():
    conn = psycopg2.connect(
        host=host,
        port=port,
        user=user,
        password=password,
        dbname=dbname
    )
    cursor = conn.cursor()
    query = '''DROP TABLE IF EXISTS vectorsvevijesti;'''
    cursor.execute(query)
    conn.commit()
    cursor.close()
 def create_db(conn):
    conn = psycopg2.connect(
        host=host,
        port=port,
@@ -97,19 +236,18 @@ def create_db():
    register_vector(conn)
    cursor.execute("DROP TABLE IF EXISTS vectorsvevijesti;")
    cursor.execute('''
-        CREATE TABLE vectorsvevijesti (
+        CREATE TABLE IF NOT EXISTS vectorsvevijesti (
            id bigserial PRIMARY KEY,
            title VARCHAR,
            text VARCHAR,
            link VARCHAR,
-            embedding vector(1536)
+            embedding vector(1536),
            similar_d VARCHAR,
            time TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        );
    ''')
    conn.commit()
    cursor.close()
-    conn.close()
+create_db(conn)
 create_db()
--- a/pyth/web-server.py
+++ b/pyth/web-server.py
@@ -0,0 +1,24 @@
 from flask import Flask , render_template , jsonify
 from vectData import get_data
 from flask_cors import CORS
 app = Flask(__name__)
 CORS(app)
@app.route('/')
 def index() :
    return render_template("index.html")
@app.route('/article/one')
 def articleone():
    return render_template("one.html")
@app.route('/article/two')
 def articletwo():
    return render_template("two.html")
 app.run(debug=True)