Fix scrapers
This commit is contained in:
@@ -80,7 +80,7 @@ func setupBljesakArticlePageCrawler(crArticlePage *colly.Collector) {
|
|||||||
|
|
||||||
text := ""
|
text := ""
|
||||||
|
|
||||||
e.ForEach("article.b-article-detail, div#infiniteLoadBreakpoint, div.col-article-content, div.intro, div.s-main-content", func(_ int, el *colly.HTMLElement) {
|
e.ForEach("div.col-xs-12, article.b-article-detail, div#infiniteLoadBreakpoint, div.col-article-content, div.intro, div.s-main-content, h2, h3, h4, h5", func(_ int, el *colly.HTMLElement) {
|
||||||
text += extractJustText(el.DOM)
|
text += extractJustText(el.DOM)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ func extractJustText(el *goquery.Selection) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
el.Children().Each(func(_ int, el2 *goquery.Selection) {
|
el.Children().Each(func(_ int, el2 *goquery.Selection) {
|
||||||
if el2.Is("div, p, span, a") {
|
if el2.Is("div, p, span, a, h2, h3, h4, b, i") {
|
||||||
textPart += extractJustText(el2)
|
textPart += extractJustText(el2)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ func setupSiArticlePageCrawler(crArticlePage *colly.Collector) {
|
|||||||
|
|
||||||
text := ""
|
text := ""
|
||||||
|
|
||||||
e.ForEach("p.article__top-content, p.article__content, h4.article__content, h3.article__content, h2.article__content, div.article__content", func(_ int, el *colly.HTMLElement) {
|
e.ForEach("div.article__top-content, div.article__content, h4, h3, h2, div.article__content", func(_ int, el *colly.HTMLElement) {
|
||||||
text += extractJustText(el.DOM)
|
text += extractJustText(el.DOM)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user