68 lines
1.5 KiB
Go
Executable File
68 lines
1.5 KiB
Go
Executable File
package parser
|
|
|
|
import (
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
func Parse(html string) (*goquery.Document, error) {
|
|
return goquery.NewDocumentFromReader(strings.NewReader(html))
|
|
}
|
|
|
|
func ParseResponse(resp *http.Response) (*goquery.Document, error) {
|
|
defer resp.Body.Close()
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func Select(doc *goquery.Document, css string) *goquery.Selection {
|
|
return doc.Find(css)
|
|
}
|
|
|
|
func SelectFrom(sel *goquery.Selection, css string) *goquery.Selection {
|
|
return sel.Find(css)
|
|
}
|
|
|
|
func Attr(sel *goquery.Selection, name string) string {
|
|
val, _ := sel.Attr(name)
|
|
return val
|
|
}
|
|
|
|
// AbsURL resolves a relative URL attribute against baseURL.
|
|
func AbsURL(sel *goquery.Selection, attr string, baseURL string) string {
|
|
val := Attr(sel, attr)
|
|
if val == "" {
|
|
return ""
|
|
}
|
|
base, err := url.Parse(baseURL)
|
|
if err != nil {
|
|
return val
|
|
}
|
|
ref, err := url.Parse(val)
|
|
if err != nil {
|
|
return val
|
|
}
|
|
return base.ResolveReference(ref).String()
|
|
}
|
|
|
|
// OwnText returns the text content of the element excluding child elements.
|
|
func OwnText(sel *goquery.Selection) string {
|
|
clone := sel.Clone()
|
|
clone.Children().Remove()
|
|
return strings.TrimSpace(clone.Text())
|
|
}
|
|
|
|
func TextTrim(sel *goquery.Selection) string {
|
|
return strings.TrimSpace(sel.Text())
|
|
}
|
|
|
|
func First(sel *goquery.Selection) *goquery.Selection {
|
|
return sel.First()
|
|
}
|
|
|
|
func Each(sel *goquery.Selection, fn func(i int, s *goquery.Selection)) {
|
|
sel.Each(fn)
|
|
}
|