70 lines
1.8 KiB
Python
70 lines
1.8 KiB
Python
from slugify import slugify
|
|
import random
|
|
from db_management import get_ready_data,insert_final,get_existing_titles
|
|
|
|
def create_slug(title):
|
|
base_slug = "{} {}".format(random.randint(1, 1000), title)
|
|
slug = slugify(base_slug)
|
|
return slug
|
|
|
|
def get_source_id(link,similar):
|
|
if similar == "NO":
|
|
if "srpskainfo" in link:
|
|
return 1
|
|
elif "klix" in link:
|
|
return 2
|
|
elif "bljesak" in link:
|
|
return 3
|
|
elif "blic" in link:
|
|
return 4
|
|
elif "index.hr" in link:
|
|
return 6
|
|
elif "avaz" in link:
|
|
return 7
|
|
elif "telegraf" in link:
|
|
return 8
|
|
elif "vijesti.me" in link:
|
|
return 9
|
|
elif "dnevnik.hr" in link:
|
|
return 10
|
|
elif "24sata.hr" in link:
|
|
return 11
|
|
else:
|
|
return 0
|
|
else:
|
|
return 5
|
|
|
|
data = get_ready_data()
|
|
|
|
def remove_braces_and_quotes(text):
|
|
final_text = text.replace('{"', '')
|
|
final_text = final_text.replace('"}', '')
|
|
|
|
return final_text
|
|
|
|
|
|
def publish_articles():
|
|
for d in data:
|
|
title = d[0]
|
|
text = d[1]
|
|
link = d[2]
|
|
similar_d = d[4]
|
|
category = d[5]
|
|
slug = create_slug(title)
|
|
source_id = get_source_id(link,similar_d)
|
|
|
|
check = get_existing_titles()
|
|
|
|
title_check = any(title in t for t in check)
|
|
link_check = any(link in l for l in check)
|
|
|
|
if title_check or link_check:
|
|
continue
|
|
else:
|
|
text = remove_braces_and_quotes(text)
|
|
title = remove_braces_and_quotes(title)
|
|
print(f"Source: {source_id}")
|
|
print(f"Link: {link}")
|
|
insert_final(title, text, slug, link, source_id, category)
|
|
print(f"Publishing: {title}")
|