Merge branch 'after-scraper-fix' into 'master'

After scraper fix See merge request saburly/marketalarm/web!104
2020-02-28 16:14:43 +00:00
parent 2a13ab55ed df5e38092d
commit 692577fb8c
3 changed files with 22 additions and 8 deletions
--- a/app/crawler/specificCrawlers/olx.js
+++ b/app/crawler/specificCrawlers/olx.js
@@ -205,7 +205,7 @@ class OlxCrawler {
  }
  async scrapeAd(url) {
-    //console.log("Scraping : ", url);
+    console.log("Scraping : ", url);
    try {
      const adPageSource = await fetch(url);
      const body = await adPageSource.text();
@@ -240,13 +240,21 @@ class OlxCrawler {
      let price = null;
      let normalPrice = null;
      let urgentPrice = null;
-      const normalPriceValue = $("#pc > p:nth-child(2)").text();
+      const normalPriceValue = $("#pc > p:nth-child(2)")
        .text()
        .trim();
      const urgentPriceValue = $(
        "#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p"
      )
        .text()
        .trim();
      //Debug
      //console.log("Title:", title);
      //console.log("Url scraped:", url);
      // console.log("Normal price value:", normalPriceValue);
      // console.log("Urgent price value:", urgentPriceValue);
      //
      if (normalPriceValue && normalPriceValue.length > 0) {
        normalPrice = normalPriceValue
          .replace(/\r\n|\n|\r/gm, "")
@@ -288,7 +296,7 @@ class OlxCrawler {
      let otherInformationDivId;
      //We need to locate DIV ID where other information are stored
-      for (let possibleId = 10; possibleId <= 20; possibleId++) {
+      for (let possibleId = 1; possibleId <= 30; possibleId++) {
        const adTypeFieldTitle = $(
          `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${possibleId}) > div:nth-child(2) > div.df1`
        )
@@ -660,6 +668,8 @@ class OlxCrawler {
        distanceToRiver,
        numberOfViewsAgency
      };
      //
      //console.log("Scraped data:", data);
      return data;
    } catch (e) {
--- a/app/crawler/specificCrawlers/saljic.js
+++ b/app/crawler/specificCrawlers/saljic.js
@@ -274,10 +274,14 @@ class SaljicCrawler {
        .trim();
      const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
-      const tmpLatLong = latAndLongSrc.split("marker=")[1];
+      let tmpLatLong;
-      const latText = tmpLatLong.split("%2C")[0];
+      let latText;
-      const longText = tmpLatLong.split("%2C")[1];
+      let longText;
-
+      if (latAndLongSrc) {
        tmpLatLong = latAndLongSrc.split("marker=")[1];
        latText = tmpLatLong.split("%2C")[0];
        longText = tmpLatLong.split("%2C")[1];
      }
      const locationLat = parseFloat(latText) || null;
      const locationLong = parseFloat(longText) || null;
--- a/test/olxScrapeTest.js
+++ b/test/olxScrapeTest.js
@@ -9,7 +9,7 @@ if (urlToScrape) {
  (async () => {
    const data = await crawler.scrapeAd(urlToScrape);
-    console.log(data);
+    console.log("Scraped data:", data);
  })();
 } else {
  console.log("No URL to scrape. Use like this : ");