delete proba files

This commit is contained in:
ismailsosic
2022-06-07 00:18:15 +02:00
parent b256070de3
commit bfbc70337a
9 changed files with 51 additions and 40 deletions

View File

@@ -145,7 +145,7 @@ def CarsCrawler(number_of_pages):
## Getting all id's of articles
for i in range(0, len(results_all_items_per_page)):
if(results_all_items_per_page[i].find('p')):
if results_all_items_per_page[i].find('p'):
# Divide id from rest of link
address_content = results_all_items_per_page[i].find('a')['href']
temp = address_content.split('/')
@@ -159,7 +159,8 @@ def CarsCrawler(number_of_pages):
podaci = filters.copy()
# Add kategorija
if (category_of_vehicle == 18): podaci['Kategorija'] = ('Automobili')
if category_of_vehicle == 18:
podaci['Kategorija'] = ('Automobili')
# Artikal olx_link
artikal_link = 'https://www.olx.ba/artikal/' + olx_id[i]
@@ -175,7 +176,7 @@ def CarsCrawler(number_of_pages):
# Osnovni filteri
# Cijena
if (result_item.find('div',{'id':'pc'})):
if result_item.find('div',{'id':'pc'}):
x_cijena = result_item.find('div',{'id':'pc'}).findAll('p')
item_cijena = x_cijena[1].get_text().split()[0]
if(item_cijena == 'Po'):
@@ -185,7 +186,7 @@ def CarsCrawler(number_of_pages):
# Lokacija
#kanton
if (result_item.find('div',{'class':'mobile-lokacija'})):
if result_item.find('div',{'class':'mobile-lokacija'}):
x_lokacija = result_item.find('div',{'class':'mobile-lokacija'})['data-content'].split()
item_kanton = x_lokacija[0].replace(',','')
podaci['Lokacija_kanton'] = item_kanton
@@ -199,47 +200,47 @@ def CarsCrawler(number_of_pages):
# print(podaci['Lokacija_grad'])
# Stanje
if (result_item.find('div',{'class':'mobile-stanje'})):
if result_item.find('div',{'class':'mobile-stanje'}):
x_stanje = result_item.find('div',{'class':'mobile-stanje'}).get_text().split()
item_stanje = x_stanje[1]
podaci['Stanje'] = item_stanje
# print(podaci['Stanje'])
# Ispod divs
if (result_item.find_all('div',{'class':'ispod'})):
if result_item.find_all('div',{'class':'ispod'}):
ispod_divs = result_item.find_all('div',{'class':'ispod'})
for i in range (len(ispod_divs)):
# Proizvodjac
if (i==0):
if i==0:
x_proizvodjac_p = ispod_divs[0].find_all('p')
x_proizvodjac = x_proizvodjac_p[1].get_text()
podaci['Proizvođač'] = x_proizvodjac
# print(podaci['Proizvođač'])
# Model
if (i==1):
if i==1:
x_model_p = ispod_divs[1].find_all('p')
x_model = x_model_p[1].get_text()
podaci['Model'] = x_model
# print(podaci['Model'])
# Godiste
if (i==2):
if i==2:
x_godiste_p = ispod_divs[2].find_all('p')
x_godiste = x_godiste_p[1].get_text()
podaci['Godište'] = x_godiste
# print(podaci['Godište'])
# Kilometraza
if (i==3):
if i==3:
x_kilometraza_p = ispod_divs[3].find_all('p')
x_kilometraza = x_kilometraza_p[1].get_text()
podaci['Kilometraža'] = x_kilometraza
# print(podaci['Kilometraža'])
# Gorivo
if (i==4):
if i==4:
x_gorivo_p = ispod_divs[4].find_all('p')
x_gorivo = x_gorivo_p[1].get_text()
podaci['Gorivo'] = x_gorivo
@@ -249,14 +250,14 @@ def CarsCrawler(number_of_pages):
# Napredni filteri
# Dodatna polja
if (result_item.find_all('div',{'id':'dodatnapolja1'})):
if result_item.find_all('div',{'id':'dodatnapolja1'}):
dodatnapolja_all_divs = result_item.find_all('div',{'id':'dodatnapolja1'})
for i in range (0,len(dodatnapolja_all_divs)):
if(dodatnapolja_all_divs[i].find_all('div',{'class','df'})):
if dodatnapolja_all_divs[i].find_all('div',{'class','df'}):
df_pom = dodatnapolja_all_divs[i].find_all('div',{'class','df'})
for j in range (0,len(df_pom)):
df_pom1 = df_pom[j].find('div',{'class','df1'}).get_text()
if (df_pom[j].find('div',{'class','df2'}).find('i')):
if df_pom[j].find('div',{'class','df2'}).find('i'):
df_pom2 = True
else : df_pom2 = df_pom[j].find('div',{'class','df2'}).get_text()
podaci[df_pom1] = df_pom2
@@ -268,7 +269,7 @@ def CarsCrawler(number_of_pages):
# print(kw)
# Datum objave
if (result_item.find('time', {'class' : 'entry-date'})):
if result_item.find('time', {'class' : 'entry-date'}):
date_objave_div = result_item.find('time', {'class' : 'entry-date'}).get_text().split()
datum_objave = date_objave_div[0]
datum_objave = datum_objave.split('.')
@@ -277,7 +278,7 @@ def CarsCrawler(number_of_pages):
# Datum obnove
if (result_item.find('div', {'class' : 'ob'})):
if result_item.find('div', {'class' : 'ob'}):
datum_obnove_div = result_item.find('div', {'class' : 'ob'}).attrs
datum_vrijeme = datum_obnove_div["data-content"].split('.')
datum = datum_vrijeme[2] + "-" + datum_vrijeme[1] + "-" + datum_vrijeme[0]
@@ -306,22 +307,4 @@ def CarsCrawler(number_of_pages):
# olx_db = pd.DataFrame(podaci_db) # treba biti niz
# # print(olx_db)
# olx_db.to_excel('proba231112.xlsx',index=False)
# olx_db.to_excel('proba231112.xlsx',index=False)

View File

@@ -14,7 +14,7 @@ while True:
print("_________________________________")
global num_of_crawl
if num_of_crawl == 0:
cars = CarsCrawler(5)
cars = CarsCrawler(3)
num_of_crawl += 1
else:
cars = CarsCrawler(2)
@@ -194,4 +194,4 @@ while True:
print("Novi artikal je spasen")
print("___________________________")
Saver()
time.sleep(120)
time.sleep(180)

7
kivi_cars/cars/urls.py Normal file
View File

@@ -0,0 +1,7 @@
from django.urls import path
from Generator import views
from cars.saver import Saver
urlpatterns = [
path('', views.Saver)
]

View File

@@ -25,7 +25,10 @@ SECRET_KEY = 'django-insecure-xb8dcaoj_mlmd5@(p4%!!3yzu@%%u&fk+ush=7pm2873azpy@(
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = []
ALLOWED_HOSTS = [
'localhost',
'127.0.0.1'
]
# Application definition
@@ -38,6 +41,8 @@ INSTALLED_APPS = [
'django.contrib.messages',
'django.contrib.staticfiles',
'corsheaders',
'rest_framework',
'Generator',
@@ -52,6 +57,20 @@ MIDDLEWARE = [
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'corsheaders.middleware.CorsMiddleware',
'django.middleware.common.CommonMiddleware',
]
CORS_ALLOWED_ORIGINS = [
"http://localhost:3000",
"http://127.0.0.1:3000"
]
CORS_ALLOW_METHODS = [
'DELETE',
'GET',
'POST',
'PUT',
]
ROOT_URLCONF = 'kivi_cars.urls'

View File

@@ -16,10 +16,10 @@ Including another URLconf
from django.urls.conf import include
from django.contrib import admin
from django.urls import path
from cars.saver import Saver
urlpatterns = [
path('', include("cars.urls")),
path('admin/', admin.site.urls),
path('generator/', include("Generator.urls")),
# path('generator/', include("Generator.urls")),
]

View File

@@ -14,3 +14,5 @@ from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'kivi_cars.settings')
application = get_wsgi_application()

Binary file not shown.

Binary file not shown.

Binary file not shown.