delete proba files

This commit is contained in:
ismailsosic
2022-06-07 00:18:15 +02:00
parent b256070de3
commit bfbc70337a
9 changed files with 51 additions and 40 deletions

View File

@@ -145,7 +145,7 @@ def CarsCrawler(number_of_pages):
## Getting all id's of articles ## Getting all id's of articles
for i in range(0, len(results_all_items_per_page)): for i in range(0, len(results_all_items_per_page)):
if(results_all_items_per_page[i].find('p')): if results_all_items_per_page[i].find('p'):
# Divide id from rest of link # Divide id from rest of link
address_content = results_all_items_per_page[i].find('a')['href'] address_content = results_all_items_per_page[i].find('a')['href']
temp = address_content.split('/') temp = address_content.split('/')
@@ -159,7 +159,8 @@ def CarsCrawler(number_of_pages):
podaci = filters.copy() podaci = filters.copy()
# Add kategorija # Add kategorija
if (category_of_vehicle == 18): podaci['Kategorija'] = ('Automobili') if category_of_vehicle == 18:
podaci['Kategorija'] = ('Automobili')
# Artikal olx_link # Artikal olx_link
artikal_link = 'https://www.olx.ba/artikal/' + olx_id[i] artikal_link = 'https://www.olx.ba/artikal/' + olx_id[i]
@@ -175,7 +176,7 @@ def CarsCrawler(number_of_pages):
# Osnovni filteri # Osnovni filteri
# Cijena # Cijena
if (result_item.find('div',{'id':'pc'})): if result_item.find('div',{'id':'pc'}):
x_cijena = result_item.find('div',{'id':'pc'}).findAll('p') x_cijena = result_item.find('div',{'id':'pc'}).findAll('p')
item_cijena = x_cijena[1].get_text().split()[0] item_cijena = x_cijena[1].get_text().split()[0]
if(item_cijena == 'Po'): if(item_cijena == 'Po'):
@@ -185,7 +186,7 @@ def CarsCrawler(number_of_pages):
# Lokacija # Lokacija
#kanton #kanton
if (result_item.find('div',{'class':'mobile-lokacija'})): if result_item.find('div',{'class':'mobile-lokacija'}):
x_lokacija = result_item.find('div',{'class':'mobile-lokacija'})['data-content'].split() x_lokacija = result_item.find('div',{'class':'mobile-lokacija'})['data-content'].split()
item_kanton = x_lokacija[0].replace(',','') item_kanton = x_lokacija[0].replace(',','')
podaci['Lokacija_kanton'] = item_kanton podaci['Lokacija_kanton'] = item_kanton
@@ -199,47 +200,47 @@ def CarsCrawler(number_of_pages):
# print(podaci['Lokacija_grad']) # print(podaci['Lokacija_grad'])
# Stanje # Stanje
if (result_item.find('div',{'class':'mobile-stanje'})): if result_item.find('div',{'class':'mobile-stanje'}):
x_stanje = result_item.find('div',{'class':'mobile-stanje'}).get_text().split() x_stanje = result_item.find('div',{'class':'mobile-stanje'}).get_text().split()
item_stanje = x_stanje[1] item_stanje = x_stanje[1]
podaci['Stanje'] = item_stanje podaci['Stanje'] = item_stanje
# print(podaci['Stanje']) # print(podaci['Stanje'])
# Ispod divs # Ispod divs
if (result_item.find_all('div',{'class':'ispod'})): if result_item.find_all('div',{'class':'ispod'}):
ispod_divs = result_item.find_all('div',{'class':'ispod'}) ispod_divs = result_item.find_all('div',{'class':'ispod'})
for i in range (len(ispod_divs)): for i in range (len(ispod_divs)):
# Proizvodjac # Proizvodjac
if (i==0): if i==0:
x_proizvodjac_p = ispod_divs[0].find_all('p') x_proizvodjac_p = ispod_divs[0].find_all('p')
x_proizvodjac = x_proizvodjac_p[1].get_text() x_proizvodjac = x_proizvodjac_p[1].get_text()
podaci['Proizvođač'] = x_proizvodjac podaci['Proizvođač'] = x_proizvodjac
# print(podaci['Proizvođač']) # print(podaci['Proizvođač'])
# Model # Model
if (i==1): if i==1:
x_model_p = ispod_divs[1].find_all('p') x_model_p = ispod_divs[1].find_all('p')
x_model = x_model_p[1].get_text() x_model = x_model_p[1].get_text()
podaci['Model'] = x_model podaci['Model'] = x_model
# print(podaci['Model']) # print(podaci['Model'])
# Godiste # Godiste
if (i==2): if i==2:
x_godiste_p = ispod_divs[2].find_all('p') x_godiste_p = ispod_divs[2].find_all('p')
x_godiste = x_godiste_p[1].get_text() x_godiste = x_godiste_p[1].get_text()
podaci['Godište'] = x_godiste podaci['Godište'] = x_godiste
# print(podaci['Godište']) # print(podaci['Godište'])
# Kilometraza # Kilometraza
if (i==3): if i==3:
x_kilometraza_p = ispod_divs[3].find_all('p') x_kilometraza_p = ispod_divs[3].find_all('p')
x_kilometraza = x_kilometraza_p[1].get_text() x_kilometraza = x_kilometraza_p[1].get_text()
podaci['Kilometraža'] = x_kilometraza podaci['Kilometraža'] = x_kilometraza
# print(podaci['Kilometraža']) # print(podaci['Kilometraža'])
# Gorivo # Gorivo
if (i==4): if i==4:
x_gorivo_p = ispod_divs[4].find_all('p') x_gorivo_p = ispod_divs[4].find_all('p')
x_gorivo = x_gorivo_p[1].get_text() x_gorivo = x_gorivo_p[1].get_text()
podaci['Gorivo'] = x_gorivo podaci['Gorivo'] = x_gorivo
@@ -249,14 +250,14 @@ def CarsCrawler(number_of_pages):
# Napredni filteri # Napredni filteri
# Dodatna polja # Dodatna polja
if (result_item.find_all('div',{'id':'dodatnapolja1'})): if result_item.find_all('div',{'id':'dodatnapolja1'}):
dodatnapolja_all_divs = result_item.find_all('div',{'id':'dodatnapolja1'}) dodatnapolja_all_divs = result_item.find_all('div',{'id':'dodatnapolja1'})
for i in range (0,len(dodatnapolja_all_divs)): for i in range (0,len(dodatnapolja_all_divs)):
if(dodatnapolja_all_divs[i].find_all('div',{'class','df'})): if dodatnapolja_all_divs[i].find_all('div',{'class','df'}):
df_pom = dodatnapolja_all_divs[i].find_all('div',{'class','df'}) df_pom = dodatnapolja_all_divs[i].find_all('div',{'class','df'})
for j in range (0,len(df_pom)): for j in range (0,len(df_pom)):
df_pom1 = df_pom[j].find('div',{'class','df1'}).get_text() df_pom1 = df_pom[j].find('div',{'class','df1'}).get_text()
if (df_pom[j].find('div',{'class','df2'}).find('i')): if df_pom[j].find('div',{'class','df2'}).find('i'):
df_pom2 = True df_pom2 = True
else : df_pom2 = df_pom[j].find('div',{'class','df2'}).get_text() else : df_pom2 = df_pom[j].find('div',{'class','df2'}).get_text()
podaci[df_pom1] = df_pom2 podaci[df_pom1] = df_pom2
@@ -268,7 +269,7 @@ def CarsCrawler(number_of_pages):
# print(kw) # print(kw)
# Datum objave # Datum objave
if (result_item.find('time', {'class' : 'entry-date'})): if result_item.find('time', {'class' : 'entry-date'}):
date_objave_div = result_item.find('time', {'class' : 'entry-date'}).get_text().split() date_objave_div = result_item.find('time', {'class' : 'entry-date'}).get_text().split()
datum_objave = date_objave_div[0] datum_objave = date_objave_div[0]
datum_objave = datum_objave.split('.') datum_objave = datum_objave.split('.')
@@ -277,7 +278,7 @@ def CarsCrawler(number_of_pages):
# Datum obnove # Datum obnove
if (result_item.find('div', {'class' : 'ob'})): if result_item.find('div', {'class' : 'ob'}):
datum_obnove_div = result_item.find('div', {'class' : 'ob'}).attrs datum_obnove_div = result_item.find('div', {'class' : 'ob'}).attrs
datum_vrijeme = datum_obnove_div["data-content"].split('.') datum_vrijeme = datum_obnove_div["data-content"].split('.')
datum = datum_vrijeme[2] + "-" + datum_vrijeme[1] + "-" + datum_vrijeme[0] datum = datum_vrijeme[2] + "-" + datum_vrijeme[1] + "-" + datum_vrijeme[0]
@@ -307,21 +308,3 @@ def CarsCrawler(number_of_pages):
# # print(olx_db) # # print(olx_db)
# olx_db.to_excel('proba231112.xlsx',index=False) # olx_db.to_excel('proba231112.xlsx',index=False)

View File

@@ -14,7 +14,7 @@ while True:
print("_________________________________") print("_________________________________")
global num_of_crawl global num_of_crawl
if num_of_crawl == 0: if num_of_crawl == 0:
cars = CarsCrawler(5) cars = CarsCrawler(3)
num_of_crawl += 1 num_of_crawl += 1
else: else:
cars = CarsCrawler(2) cars = CarsCrawler(2)
@@ -194,4 +194,4 @@ while True:
print("Novi artikal je spasen") print("Novi artikal je spasen")
print("___________________________") print("___________________________")
Saver() Saver()
time.sleep(120) time.sleep(180)

7
kivi_cars/cars/urls.py Normal file
View File

@@ -0,0 +1,7 @@
from django.urls import path
from Generator import views
from cars.saver import Saver
urlpatterns = [
path('', views.Saver)
]

View File

@@ -25,7 +25,10 @@ SECRET_KEY = 'django-insecure-xb8dcaoj_mlmd5@(p4%!!3yzu@%%u&fk+ush=7pm2873azpy@(
# SECURITY WARNING: don't run with debug turned on in production! # SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True DEBUG = True
ALLOWED_HOSTS = [] ALLOWED_HOSTS = [
'localhost',
'127.0.0.1'
]
# Application definition # Application definition
@@ -38,6 +41,8 @@ INSTALLED_APPS = [
'django.contrib.messages', 'django.contrib.messages',
'django.contrib.staticfiles', 'django.contrib.staticfiles',
'corsheaders',
'rest_framework', 'rest_framework',
'Generator', 'Generator',
@@ -52,6 +57,20 @@ MIDDLEWARE = [
'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware', 'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware',
'corsheaders.middleware.CorsMiddleware',
'django.middleware.common.CommonMiddleware',
]
CORS_ALLOWED_ORIGINS = [
"http://localhost:3000",
"http://127.0.0.1:3000"
]
CORS_ALLOW_METHODS = [
'DELETE',
'GET',
'POST',
'PUT',
] ]
ROOT_URLCONF = 'kivi_cars.urls' ROOT_URLCONF = 'kivi_cars.urls'

View File

@@ -16,10 +16,10 @@ Including another URLconf
from django.urls.conf import include from django.urls.conf import include
from django.contrib import admin from django.contrib import admin
from django.urls import path from django.urls import path
from cars.saver import Saver
urlpatterns = [ urlpatterns = [
path('', include("cars.urls")),
path('admin/', admin.site.urls), path('admin/', admin.site.urls),
path('generator/', include("Generator.urls")), # path('generator/', include("Generator.urls")),
] ]

View File

@@ -14,3 +14,5 @@ from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'kivi_cars.settings') os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'kivi_cars.settings')
application = get_wsgi_application() application = get_wsgi_application()

Binary file not shown.

Binary file not shown.

Binary file not shown.