diff --git a/internal/scraper/blijesak.go b/internal/scraper/blijesak.go index 8fcac46..263d2c5 100644 --- a/internal/scraper/blijesak.go +++ b/internal/scraper/blijesak.go @@ -80,7 +80,7 @@ func setupBljesakArticlePageCrawler(crArticlePage *colly.Collector) { text := "" - e.ForEach("div.col-xs-12, article.b-article-detail, div.col-article-content, div.intro, div.s-main-content", func(_ int, el *colly.HTMLElement) { + e.ForEach("div.intro, div#infiniteLoadBreakpoint", func(_ int, el *colly.HTMLElement) { text += extractJustText(el.DOM) }) diff --git a/internal/scraper/srpskainfo.go b/internal/scraper/srpskainfo.go index 15198ff..71bec8f 100644 --- a/internal/scraper/srpskainfo.go +++ b/internal/scraper/srpskainfo.go @@ -79,7 +79,7 @@ func setupSiArticlePageCrawler(crArticlePage *colly.Collector) { text := "" - e.ForEach("div.article__top-content, div.article__content, h4, h3, h2, div.article__content", func(_ int, el *colly.HTMLElement) { + e.ForEach("div.article__top-content, div.article__content", func(_ int, el *colly.HTMLElement) { text += extractJustText(el.DOM) }) diff --git a/spider b/spider index 0aa5c5c..6299f26 100755 Binary files a/spider and b/spider differ