Prva verzija - klix scraper
This commit is contained in:
26
internal/scraper/scraper.go
Normal file
26
internal/scraper/scraper.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
func extractJustText(el *goquery.Selection) string {
|
||||
textPart := ""
|
||||
htmlPart, _ := el.Html()
|
||||
if len(el.Nodes) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
//fmt.Println("Checking: ", htmlPart, "Duzina: ", strconv.Itoa(len(el.Nodes)), " Type je ", el.Nodes[0].Type, " jednakost ", el.Text() == htmlPart)
|
||||
if len(el.Nodes) == 1 && el.Text() == htmlPart {
|
||||
return el.Text() + "\n"
|
||||
}
|
||||
|
||||
el.Children().Each(func(_ int, el2 *goquery.Selection) {
|
||||
if el2.Is("div, p, span, a") {
|
||||
textPart += extractJustText(el2)
|
||||
}
|
||||
})
|
||||
|
||||
return textPart
|
||||
}
|
||||
Reference in New Issue
Block a user