Files
old-svevijesti/internal/scraper/scraper.go
2022-02-17 18:58:16 +01:00

27 lines
601 B
Go

package scraper
import (
"github.com/PuerkitoBio/goquery"
)
func extractJustText(el *goquery.Selection) string {
textPart := ""
htmlPart, _ := el.Html()
if len(el.Nodes) == 0 {
return ""
}
//fmt.Println("Checking: ", htmlPart, "Duzina: ", strconv.Itoa(len(el.Nodes)), " Type je ", el.Nodes[0].Type, " jednakost ", el.Text() == htmlPart)
if len(el.Nodes) == 1 && el.Text() == htmlPart {
return el.Text() + "\n"
}
el.Children().Each(func(_ int, el2 *goquery.Selection) {
if el2.Is("div, p, span, a, h2, h3, h4, b, i") {
textPart += extractJustText(el2)
}
})
return textPart
}