delete proba files
This commit is contained in:
@@ -145,7 +145,7 @@ def CarsCrawler(number_of_pages):
|
||||
|
||||
## Getting all id's of articles
|
||||
for i in range(0, len(results_all_items_per_page)):
|
||||
if(results_all_items_per_page[i].find('p')):
|
||||
if results_all_items_per_page[i].find('p'):
|
||||
# Divide id from rest of link
|
||||
address_content = results_all_items_per_page[i].find('a')['href']
|
||||
temp = address_content.split('/')
|
||||
@@ -159,7 +159,8 @@ def CarsCrawler(number_of_pages):
|
||||
podaci = filters.copy()
|
||||
|
||||
# Add kategorija
|
||||
if (category_of_vehicle == 18): podaci['Kategorija'] = ('Automobili')
|
||||
if category_of_vehicle == 18:
|
||||
podaci['Kategorija'] = ('Automobili')
|
||||
|
||||
# Artikal olx_link
|
||||
artikal_link = 'https://www.olx.ba/artikal/' + olx_id[i]
|
||||
@@ -175,7 +176,7 @@ def CarsCrawler(number_of_pages):
|
||||
# Osnovni filteri
|
||||
|
||||
# Cijena
|
||||
if (result_item.find('div',{'id':'pc'})):
|
||||
if result_item.find('div',{'id':'pc'}):
|
||||
x_cijena = result_item.find('div',{'id':'pc'}).findAll('p')
|
||||
item_cijena = x_cijena[1].get_text().split()[0]
|
||||
if(item_cijena == 'Po'):
|
||||
@@ -185,7 +186,7 @@ def CarsCrawler(number_of_pages):
|
||||
|
||||
# Lokacija
|
||||
#kanton
|
||||
if (result_item.find('div',{'class':'mobile-lokacija'})):
|
||||
if result_item.find('div',{'class':'mobile-lokacija'}):
|
||||
x_lokacija = result_item.find('div',{'class':'mobile-lokacija'})['data-content'].split()
|
||||
item_kanton = x_lokacija[0].replace(',','')
|
||||
podaci['Lokacija_kanton'] = item_kanton
|
||||
@@ -199,47 +200,47 @@ def CarsCrawler(number_of_pages):
|
||||
# print(podaci['Lokacija_grad'])
|
||||
|
||||
# Stanje
|
||||
if (result_item.find('div',{'class':'mobile-stanje'})):
|
||||
if result_item.find('div',{'class':'mobile-stanje'}):
|
||||
x_stanje = result_item.find('div',{'class':'mobile-stanje'}).get_text().split()
|
||||
item_stanje = x_stanje[1]
|
||||
podaci['Stanje'] = item_stanje
|
||||
# print(podaci['Stanje'])
|
||||
|
||||
# Ispod divs
|
||||
if (result_item.find_all('div',{'class':'ispod'})):
|
||||
if result_item.find_all('div',{'class':'ispod'}):
|
||||
ispod_divs = result_item.find_all('div',{'class':'ispod'})
|
||||
for i in range (len(ispod_divs)):
|
||||
|
||||
# Proizvodjac
|
||||
if (i==0):
|
||||
if i==0:
|
||||
x_proizvodjac_p = ispod_divs[0].find_all('p')
|
||||
x_proizvodjac = x_proizvodjac_p[1].get_text()
|
||||
podaci['Proizvođač'] = x_proizvodjac
|
||||
# print(podaci['Proizvođač'])
|
||||
|
||||
# Model
|
||||
if (i==1):
|
||||
if i==1:
|
||||
x_model_p = ispod_divs[1].find_all('p')
|
||||
x_model = x_model_p[1].get_text()
|
||||
podaci['Model'] = x_model
|
||||
# print(podaci['Model'])
|
||||
|
||||
# Godiste
|
||||
if (i==2):
|
||||
if i==2:
|
||||
x_godiste_p = ispod_divs[2].find_all('p')
|
||||
x_godiste = x_godiste_p[1].get_text()
|
||||
podaci['Godište'] = x_godiste
|
||||
# print(podaci['Godište'])
|
||||
|
||||
# Kilometraza
|
||||
if (i==3):
|
||||
if i==3:
|
||||
x_kilometraza_p = ispod_divs[3].find_all('p')
|
||||
x_kilometraza = x_kilometraza_p[1].get_text()
|
||||
podaci['Kilometraža'] = x_kilometraza
|
||||
# print(podaci['Kilometraža'])
|
||||
|
||||
# Gorivo
|
||||
if (i==4):
|
||||
if i==4:
|
||||
x_gorivo_p = ispod_divs[4].find_all('p')
|
||||
x_gorivo = x_gorivo_p[1].get_text()
|
||||
podaci['Gorivo'] = x_gorivo
|
||||
@@ -249,14 +250,14 @@ def CarsCrawler(number_of_pages):
|
||||
# Napredni filteri
|
||||
|
||||
# Dodatna polja
|
||||
if (result_item.find_all('div',{'id':'dodatnapolja1'})):
|
||||
if result_item.find_all('div',{'id':'dodatnapolja1'}):
|
||||
dodatnapolja_all_divs = result_item.find_all('div',{'id':'dodatnapolja1'})
|
||||
for i in range (0,len(dodatnapolja_all_divs)):
|
||||
if(dodatnapolja_all_divs[i].find_all('div',{'class','df'})):
|
||||
if dodatnapolja_all_divs[i].find_all('div',{'class','df'}):
|
||||
df_pom = dodatnapolja_all_divs[i].find_all('div',{'class','df'})
|
||||
for j in range (0,len(df_pom)):
|
||||
df_pom1 = df_pom[j].find('div',{'class','df1'}).get_text()
|
||||
if (df_pom[j].find('div',{'class','df2'}).find('i')):
|
||||
if df_pom[j].find('div',{'class','df2'}).find('i'):
|
||||
df_pom2 = True
|
||||
else : df_pom2 = df_pom[j].find('div',{'class','df2'}).get_text()
|
||||
podaci[df_pom1] = df_pom2
|
||||
@@ -268,7 +269,7 @@ def CarsCrawler(number_of_pages):
|
||||
# print(kw)
|
||||
|
||||
# Datum objave
|
||||
if (result_item.find('time', {'class' : 'entry-date'})):
|
||||
if result_item.find('time', {'class' : 'entry-date'}):
|
||||
date_objave_div = result_item.find('time', {'class' : 'entry-date'}).get_text().split()
|
||||
datum_objave = date_objave_div[0]
|
||||
datum_objave = datum_objave.split('.')
|
||||
@@ -277,7 +278,7 @@ def CarsCrawler(number_of_pages):
|
||||
|
||||
|
||||
# Datum obnove
|
||||
if (result_item.find('div', {'class' : 'ob'})):
|
||||
if result_item.find('div', {'class' : 'ob'}):
|
||||
datum_obnove_div = result_item.find('div', {'class' : 'ob'}).attrs
|
||||
datum_vrijeme = datum_obnove_div["data-content"].split('.')
|
||||
datum = datum_vrijeme[2] + "-" + datum_vrijeme[1] + "-" + datum_vrijeme[0]
|
||||
@@ -306,22 +307,4 @@ def CarsCrawler(number_of_pages):
|
||||
# olx_db = pd.DataFrame(podaci_db) # treba biti niz
|
||||
# # print(olx_db)
|
||||
|
||||
# olx_db.to_excel('proba231112.xlsx',index=False)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# olx_db.to_excel('proba231112.xlsx',index=False)
|
||||
@@ -14,7 +14,7 @@ while True:
|
||||
print("_________________________________")
|
||||
global num_of_crawl
|
||||
if num_of_crawl == 0:
|
||||
cars = CarsCrawler(5)
|
||||
cars = CarsCrawler(3)
|
||||
num_of_crawl += 1
|
||||
else:
|
||||
cars = CarsCrawler(2)
|
||||
@@ -194,4 +194,4 @@ while True:
|
||||
print("Novi artikal je spasen")
|
||||
print("___________________________")
|
||||
Saver()
|
||||
time.sleep(120)
|
||||
time.sleep(180)
|
||||
7
kivi_cars/cars/urls.py
Normal file
7
kivi_cars/cars/urls.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from django.urls import path
|
||||
from Generator import views
|
||||
from cars.saver import Saver
|
||||
|
||||
urlpatterns = [
|
||||
path('', views.Saver)
|
||||
]
|
||||
@@ -25,7 +25,10 @@ SECRET_KEY = 'django-insecure-xb8dcaoj_mlmd5@(p4%!!3yzu@%%u&fk+ush=7pm2873azpy@(
|
||||
# SECURITY WARNING: don't run with debug turned on in production!
|
||||
DEBUG = True
|
||||
|
||||
ALLOWED_HOSTS = []
|
||||
ALLOWED_HOSTS = [
|
||||
'localhost',
|
||||
'127.0.0.1'
|
||||
]
|
||||
|
||||
|
||||
# Application definition
|
||||
@@ -38,6 +41,8 @@ INSTALLED_APPS = [
|
||||
'django.contrib.messages',
|
||||
'django.contrib.staticfiles',
|
||||
|
||||
'corsheaders',
|
||||
|
||||
|
||||
'rest_framework',
|
||||
'Generator',
|
||||
@@ -52,6 +57,20 @@ MIDDLEWARE = [
|
||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||
'django.contrib.messages.middleware.MessageMiddleware',
|
||||
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
||||
'corsheaders.middleware.CorsMiddleware',
|
||||
'django.middleware.common.CommonMiddleware',
|
||||
]
|
||||
|
||||
CORS_ALLOWED_ORIGINS = [
|
||||
"http://localhost:3000",
|
||||
"http://127.0.0.1:3000"
|
||||
]
|
||||
|
||||
CORS_ALLOW_METHODS = [
|
||||
'DELETE',
|
||||
'GET',
|
||||
'POST',
|
||||
'PUT',
|
||||
]
|
||||
|
||||
ROOT_URLCONF = 'kivi_cars.urls'
|
||||
|
||||
@@ -16,10 +16,10 @@ Including another URLconf
|
||||
from django.urls.conf import include
|
||||
from django.contrib import admin
|
||||
from django.urls import path
|
||||
from cars.saver import Saver
|
||||
|
||||
urlpatterns = [
|
||||
path('', include("cars.urls")),
|
||||
path('admin/', admin.site.urls),
|
||||
path('generator/', include("Generator.urls")),
|
||||
# path('generator/', include("Generator.urls")),
|
||||
]
|
||||
|
||||
|
||||
@@ -14,3 +14,5 @@ from django.core.wsgi import get_wsgi_application
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'kivi_cars.settings')
|
||||
|
||||
application = get_wsgi_application()
|
||||
|
||||
|
||||
|
||||
BIN
proba2.xlsx
BIN
proba2.xlsx
Binary file not shown.
BIN
proba3.xlsx
BIN
proba3.xlsx
Binary file not shown.
BIN
proba4.xlsx
BIN
proba4.xlsx
Binary file not shown.
Reference in New Issue
Block a user