Files
goyomi/internal/parser/html.go
T
2026-05-11 06:48:23 +00:00

68 lines
1.5 KiB
Go
Executable File

package parser
import (
"net/http"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
)
func Parse(html string) (*goquery.Document, error) {
return goquery.NewDocumentFromReader(strings.NewReader(html))
}
func ParseResponse(resp *http.Response) (*goquery.Document, error) {
defer resp.Body.Close()
return goquery.NewDocumentFromReader(resp.Body)
}
func Select(doc *goquery.Document, css string) *goquery.Selection {
return doc.Find(css)
}
func SelectFrom(sel *goquery.Selection, css string) *goquery.Selection {
return sel.Find(css)
}
func Attr(sel *goquery.Selection, name string) string {
val, _ := sel.Attr(name)
return val
}
// AbsURL resolves a relative URL attribute against baseURL.
func AbsURL(sel *goquery.Selection, attr string, baseURL string) string {
val := Attr(sel, attr)
if val == "" {
return ""
}
base, err := url.Parse(baseURL)
if err != nil {
return val
}
ref, err := url.Parse(val)
if err != nil {
return val
}
return base.ResolveReference(ref).String()
}
// OwnText returns the text content of the element excluding child elements.
func OwnText(sel *goquery.Selection) string {
clone := sel.Clone()
clone.Children().Remove()
return strings.TrimSpace(clone.Text())
}
func TextTrim(sel *goquery.Selection) string {
return strings.TrimSpace(sel.Text())
}
func First(sel *goquery.Selection) *goquery.Selection {
return sel.First()
}
func Each(sel *goquery.Selection, fn func(i int, s *goquery.Selection)) {
sel.Each(fn)
}