Files
goyomi/sources/base/keyoapp/keyoapp.go
T
achmad 00e61480c3 fix(base): add override hooks for masonry, madara, keyoapp
Madara:
- Add PopularURL/LatestURL Config hooks for custom URL building
  (needed by hentai4free which uses search-based popular/latest URLs)

Masonry:
- Replace CSS :not(:has(a[href*=/video/])) with programmatic filtering.
  goquery/cascadia doesn't support :has() + attribute selectors
  (Jsoup does, hence Kotlin works but Go didn't)

Keyoapp:
- Add overridable selector fields (PopularSelector, DescriptionSelector,
  StatusSelector, AuthorSelector, ArtistSelector) to Config
2026-05-14 22:31:11 +07:00

357 lines
9.9 KiB
Go
Executable File

// Package keyoapp implements the Keyoapp manga base.
// HTML scraping; popular from homepage; pages via CDN URL extracted from inline JS.
package keyoapp
import (
"context"
"fmt"
"net/http"
"net/url"
"regexp"
"strings"
"github.com/PuerkitoBio/goquery"
"goyomi/internal/httpclient/flare"
"goyomi/internal/source"
"goyomi/sources/base/util"
)
type Config struct {
Name string
BaseURL string
Lang string
// Override popular manga selector. Empty means use default.
PopularSelector string
// Override description selector. Empty means use default.
DescriptionSelector string
// Override status selector. Empty means use default.
StatusSelector string
// Override author selector. Empty means use default.
AuthorSelector string
// Override artist selector. Empty means use default.
ArtistSelector string
}
type Source struct {
cfg Config
client *flare.Client
id int64
}
func New(cfg Config) *Source {
c := flare.NewClient(flare.WithRateLimit(1, 2))
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
}
func (s *Source) ID() int64 { return s.id }
func (s *Source) Name() string { return s.cfg.Name }
func (s *Source) Lang() string { return s.cfg.Lang }
func (s *Source) SupportsLatest() bool { return true }
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("keyoapp: HTTP %d", resp.StatusCode)
}
return goquery.NewDocumentFromReader(resp.Body)
}
var imgURLRe = regexp.MustCompile(`url\(['"]?([^('")\s]+)`)
func getImageURL(el *goquery.Selection, baseURL string) string {
// Find any descendant with background-image style
var u string
el.Find("*[style]").Each(func(_ int, s *goquery.Selection) {
if u != "" {
return
}
style := s.AttrOr("style", "")
if !strings.Contains(style, "background-image") {
return
}
if m := imgURLRe.FindStringSubmatch(style); len(m) > 1 {
raw := m[1]
// strip w= query that keyoapp uses for thumbnail sizing
if parsed, err := url.Parse(raw); err == nil {
q := parsed.Query()
q.Del("w")
parsed.RawQuery = q.Encode()
u = parsed.String()
} else {
u = raw
}
}
})
if u != "" {
return util.AbsURL(baseURL, u)
}
return ""
}
func relURL(raw, baseURL string) string {
u, err := url.Parse(raw)
if err != nil {
return raw
}
base, err := url.Parse(baseURL)
if err != nil {
return raw
}
if u.Host == base.Host {
return u.Path
}
return raw
}
func (s *Source) mangaFromElement(el *goquery.Selection) source.SManga {
m := source.SManga{}
m.ThumbnailURL = getImageURL(el, s.cfg.BaseURL)
el.Find("a[href]").First().Each(func(_ int, a *goquery.Selection) {
m.Title = a.AttrOr("title", "")
href := a.AttrOr("href", "")
m.URL = relURL(href, s.cfg.BaseURL)
})
return m
}
func (s *Source) popularSelector() string {
if s.cfg.PopularSelector != "" {
return s.cfg.PopularSelector
}
var parts []string
for _, label := range []string{"Popular", "Popularie", "Trending"} {
parts = append(parts, fmt.Sprintf("div:contains(%s) + div .group.overflow-hidden.grid", label))
}
return strings.Join(parts, ", ")
}
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
doc, err := s.get(context.Background(), s.base())
if err != nil {
return source.MangasPage{}, err
}
var mangas []source.SManga
doc.Find(s.popularSelector()).Each(func(_ int, el *goquery.Selection) {
m := s.mangaFromElement(el)
if m.URL != "" && m.Title != "" {
mangas = append(mangas, m)
}
})
return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil
}
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
doc, err := s.get(context.Background(), s.base()+"/latest/")
if err != nil {
return source.MangasPage{}, err
}
var mangas []source.SManga
doc.Find("div.grid > div.group").Each(func(_ int, el *goquery.Selection) {
m := s.mangaFromElement(el)
if m.URL != "" && m.Title != "" {
mangas = append(mangas, m)
}
})
return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil
}
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
u := fmt.Sprintf("%s/series/?q=%s", s.base(), query)
doc, err := s.get(context.Background(), u)
if err != nil {
return source.MangasPage{}, err
}
var mangas []source.SManga
// Filter client-side by title
doc.Find("#searched_series_page > button").Each(func(_ int, el *goquery.Selection) {
title := el.AttrOr("title", "")
if query != "" && !strings.Contains(strings.ToLower(title), strings.ToLower(query)) {
return
}
m := s.mangaFromElement(el)
if m.URL == "" {
el.Find("a[href]").First().Each(func(_ int, a *goquery.Selection) {
m.URL = relURL(a.AttrOr("href", ""), s.cfg.BaseURL)
m.Title = a.AttrOr("title", strings.TrimSpace(a.Text()))
})
}
if m.Title == "" {
m.Title = title
}
if m.URL != "" {
mangas = append(mangas, m)
}
})
return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil
}
func parseStatus(s *goquery.Selection) int {
if s == nil || s.Length() == 0 {
return source.StatusUnknown
}
switch strings.ToLower(strings.TrimSpace(s.Text())) {
case "ongoing":
return source.StatusOngoing
case "dropped":
return source.StatusCancelled
case "paused":
return source.StatusHiatus
case "completed":
return source.StatusCompleted
default:
return source.StatusUnknown
}
}
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
if err != nil {
return manga, err
}
result := source.SManga{URL: manga.URL}
// Thumbnail from div[class*=photoURL] background-image style
result.ThumbnailURL = getImageURL(doc.Find("div[class*=photoURL]").First(), s.cfg.BaseURL)
descSel := "div:containsOwn(Synopsis) ~ div"
if s.cfg.DescriptionSelector != "" {
descSel = s.cfg.DescriptionSelector
}
result.Description = strings.TrimSpace(doc.Find(descSel).First().Text())
statusSel := "div:has(span:containsOwn(Status)) ~ div"
if s.cfg.StatusSelector != "" {
statusSel = s.cfg.StatusSelector
}
result.Status = parseStatus(doc.Find(statusSel).First())
authorSel := "div:has(span:containsOwn(Author)) ~ div"
if s.cfg.AuthorSelector != "" {
authorSel = s.cfg.AuthorSelector
}
result.Author = strings.TrimSpace(doc.Find(authorSel).First().Text())
artistSel := "div:has(span:containsOwn(Artist)) ~ div"
if s.cfg.ArtistSelector != "" {
artistSel = s.cfg.ArtistSelector
}
result.Artist = strings.TrimSpace(doc.Find(artistSel).First().Text())
// Title from h1 inside the series header
result.Title = strings.TrimSpace(doc.Find("h1").First().Text())
if result.Title == "" {
result.Title = manga.Title
}
// Genres from grid links
var genres []string
doc.Find("div.grid:has(>h1) > div > a:not([title='Status'])").Each(func(_ int, a *goquery.Selection) {
if t := strings.TrimSpace(a.Text()); t != "" {
genres = append(genres, t)
}
})
result.Genre = strings.Join(genres, ", ")
return result, nil
}
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
if err != nil {
return nil, err
}
// Exclude upcoming and (optionally) paid chapters
sel := "#chapters > a:not(:has(.text-sm span))"
var chapters []source.SChapter
doc.Find(sel).Each(func(_ int, el *goquery.Selection) {
ch := source.SChapter{}
el.Find("a[href]").First().Each(func(_ int, a *goquery.Selection) {
href := a.AttrOr("href", "")
ch.URL = relURL(href, s.cfg.BaseURL)
})
if ch.URL == "" {
href := el.AttrOr("href", "")
ch.URL = relURL(href, s.cfg.BaseURL)
}
ch.Name = strings.TrimSpace(el.Find(".text-sm").Text())
if dateEl := el.Find(".text-xs").First(); dateEl.Length() > 0 {
ch.DateUpload = util.ParseRelativeDate(strings.TrimSpace(dateEl.Text()))
if ch.DateUpload == 0 {
ch.DateUpload = util.ParseAbsoluteDate(strings.TrimSpace(dateEl.Text()), "Jan 2, 2006")
}
}
if ch.URL != "" {
chapters = append(chapters, ch)
}
})
return chapters, nil
}
var (
cdnHostRe = regexp.MustCompile("realUrl\\s*=\\s*`[^`]+//([^/`]+)")
cdnCleanRe = regexp.MustCompile(`\$\{[^}]*\}`)
)
func getCdnURL(doc *goquery.Document) string {
var cdnURL string
doc.Find("script").Each(func(_ int, el *goquery.Selection) {
if cdnURL != "" {
return
}
html, _ := el.Html()
if m := cdnHostRe.FindStringSubmatch(html); len(m) > 1 {
host := cdnCleanRe.ReplaceAllString(m[1], "")
cdnURL = "https://" + host + "/uploads"
}
})
return cdnURL
}
var oldCdnRe = regexp.MustCompile(`^(https?:)?//cdn\d*\.keyoapp\.com`)
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL))
if err != nil {
return nil, err
}
cdnURL := getCdnURL(doc)
// Primary: #pages > img[uid] with CDN URL
var pages []source.Page
if cdnURL != "" {
doc.Find("#pages > img[uid]").Each(func(i int, img *goquery.Selection) {
uid := img.AttrOr("uid", "")
if uid != "" {
pages = append(pages, source.Page{Index: i, ImageURL: cdnURL + "/" + uid})
}
})
}
if len(pages) > 0 {
return pages, nil
}
// Fallback: old CDN direct src
doc.Find("#pages > img").Each(func(i int, img *goquery.Selection) {
src := img.AttrOr("src", "")
if src == "" {
src = img.AttrOr("data-src", "")
}
if oldCdnRe.MatchString(src) {
pages = append(pages, source.Page{Index: i, ImageURL: src})
}
})
return pages, nil
}
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
func (s *Source) GetFilterList() []source.Filter { return nil }