9a42dd2ab1
- Remove global ProxyEnabled() logic from httpclient - Each source now explicitly chooses client at import time: - flare client: for JS-rendering/cloudflare sources - normal httpclient: for REST API sources - Updated 29 base sources based on Kotlin reference (network.cloudflareClient)
326 lines
9.1 KiB
Go
Executable File
326 lines
9.1 KiB
Go
Executable File
// Package keyoapp implements the Keyoapp manga base.
|
|
// HTML scraping; popular from homepage; pages via CDN URL extracted from inline JS.
|
|
package keyoapp
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient/flare"
|
|
"goyomi/internal/source"
|
|
"goyomi/sources/base/util"
|
|
)
|
|
|
|
type Config struct {
|
|
Name string
|
|
BaseURL string
|
|
Lang string
|
|
}
|
|
|
|
type Source struct {
|
|
cfg Config
|
|
client *flare.Client
|
|
id int64
|
|
}
|
|
|
|
func New(cfg Config) *Source {
|
|
c := flare.NewClient(flare.WithRateLimit(1, 2))
|
|
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.cfg.Name }
|
|
func (s *Source) Lang() string { return s.cfg.Lang }
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("keyoapp: HTTP %d", resp.StatusCode)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
var imgURLRe = regexp.MustCompile(`url\(['"]?([^('")\s]+)`)
|
|
|
|
func getImageURL(el *goquery.Selection, baseURL string) string {
|
|
// Find any descendant with background-image style
|
|
var u string
|
|
el.Find("*[style]").Each(func(_ int, s *goquery.Selection) {
|
|
if u != "" {
|
|
return
|
|
}
|
|
style := s.AttrOr("style", "")
|
|
if !strings.Contains(style, "background-image") {
|
|
return
|
|
}
|
|
if m := imgURLRe.FindStringSubmatch(style); len(m) > 1 {
|
|
raw := m[1]
|
|
// strip w= query that keyoapp uses for thumbnail sizing
|
|
if parsed, err := url.Parse(raw); err == nil {
|
|
q := parsed.Query()
|
|
q.Del("w")
|
|
parsed.RawQuery = q.Encode()
|
|
u = parsed.String()
|
|
} else {
|
|
u = raw
|
|
}
|
|
}
|
|
})
|
|
if u != "" {
|
|
return util.AbsURL(baseURL, u)
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func relURL(raw, baseURL string) string {
|
|
u, err := url.Parse(raw)
|
|
if err != nil {
|
|
return raw
|
|
}
|
|
base, err := url.Parse(baseURL)
|
|
if err != nil {
|
|
return raw
|
|
}
|
|
if u.Host == base.Host {
|
|
return u.Path
|
|
}
|
|
return raw
|
|
}
|
|
|
|
func (s *Source) mangaFromElement(el *goquery.Selection) source.SManga {
|
|
m := source.SManga{}
|
|
m.ThumbnailURL = getImageURL(el, s.cfg.BaseURL)
|
|
el.Find("a[href]").First().Each(func(_ int, a *goquery.Selection) {
|
|
m.Title = a.AttrOr("title", "")
|
|
href := a.AttrOr("href", "")
|
|
m.URL = relURL(href, s.cfg.BaseURL)
|
|
})
|
|
return m
|
|
}
|
|
|
|
var popularSelectors = []string{"Popular", "Popularie", "Trending"}
|
|
|
|
func popularSelector() string {
|
|
var parts []string
|
|
for _, s := range popularSelectors {
|
|
parts = append(parts, fmt.Sprintf("div:contains(%s) + div .group.overflow-hidden.grid", s))
|
|
}
|
|
return strings.Join(parts, ", ")
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.base())
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
var mangas []source.SManga
|
|
doc.Find(popularSelector()).Each(func(_ int, el *goquery.Selection) {
|
|
m := s.mangaFromElement(el)
|
|
if m.URL != "" && m.Title != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.base()+"/latest/")
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
var mangas []source.SManga
|
|
doc.Find("div.grid > div.group").Each(func(_ int, el *goquery.Selection) {
|
|
m := s.mangaFromElement(el)
|
|
if m.URL != "" && m.Title != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
u := fmt.Sprintf("%s/series/?q=%s", s.base(), query)
|
|
doc, err := s.get(context.Background(), u)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
var mangas []source.SManga
|
|
// Filter client-side by title
|
|
doc.Find("#searched_series_page > button").Each(func(_ int, el *goquery.Selection) {
|
|
title := el.AttrOr("title", "")
|
|
if query != "" && !strings.Contains(strings.ToLower(title), strings.ToLower(query)) {
|
|
return
|
|
}
|
|
m := s.mangaFromElement(el)
|
|
if m.URL == "" {
|
|
el.Find("a[href]").First().Each(func(_ int, a *goquery.Selection) {
|
|
m.URL = relURL(a.AttrOr("href", ""), s.cfg.BaseURL)
|
|
m.Title = a.AttrOr("title", strings.TrimSpace(a.Text()))
|
|
})
|
|
}
|
|
if m.Title == "" {
|
|
m.Title = title
|
|
}
|
|
if m.URL != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil
|
|
}
|
|
|
|
func parseStatus(s *goquery.Selection) int {
|
|
if s == nil || s.Length() == 0 {
|
|
return source.StatusUnknown
|
|
}
|
|
switch strings.ToLower(strings.TrimSpace(s.Text())) {
|
|
case "ongoing":
|
|
return source.StatusOngoing
|
|
case "dropped":
|
|
return source.StatusCancelled
|
|
case "paused":
|
|
return source.StatusHiatus
|
|
case "completed":
|
|
return source.StatusCompleted
|
|
default:
|
|
return source.StatusUnknown
|
|
}
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
result := source.SManga{URL: manga.URL}
|
|
|
|
// Thumbnail from div[class*=photoURL] background-image style
|
|
result.ThumbnailURL = getImageURL(doc.Find("div[class*=photoURL]").First(), s.cfg.BaseURL)
|
|
result.Description = strings.TrimSpace(doc.Find("div:containsOwn(Synopsis) ~ div").First().Text())
|
|
result.Status = parseStatus(doc.Find("div:has(span:containsOwn(Status)) ~ div").First())
|
|
result.Author = strings.TrimSpace(doc.Find("div:has(span:containsOwn(Author)) ~ div").First().Text())
|
|
result.Artist = strings.TrimSpace(doc.Find("div:has(span:containsOwn(Artist)) ~ div").First().Text())
|
|
|
|
// Title from h1 inside the series header
|
|
result.Title = strings.TrimSpace(doc.Find("h1").First().Text())
|
|
if result.Title == "" {
|
|
result.Title = manga.Title
|
|
}
|
|
|
|
// Genres from grid links
|
|
var genres []string
|
|
doc.Find("div.grid:has(>h1) > div > a:not([title='Status'])").Each(func(_ int, a *goquery.Selection) {
|
|
if t := strings.TrimSpace(a.Text()); t != "" {
|
|
genres = append(genres, t)
|
|
}
|
|
})
|
|
result.Genre = strings.Join(genres, ", ")
|
|
return result, nil
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// Exclude upcoming and (optionally) paid chapters
|
|
sel := "#chapters > a:not(:has(.text-sm span))"
|
|
var chapters []source.SChapter
|
|
doc.Find(sel).Each(func(_ int, el *goquery.Selection) {
|
|
ch := source.SChapter{}
|
|
el.Find("a[href]").First().Each(func(_ int, a *goquery.Selection) {
|
|
href := a.AttrOr("href", "")
|
|
ch.URL = relURL(href, s.cfg.BaseURL)
|
|
})
|
|
if ch.URL == "" {
|
|
href := el.AttrOr("href", "")
|
|
ch.URL = relURL(href, s.cfg.BaseURL)
|
|
}
|
|
ch.Name = strings.TrimSpace(el.Find(".text-sm").Text())
|
|
if dateEl := el.Find(".text-xs").First(); dateEl.Length() > 0 {
|
|
ch.DateUpload = util.ParseRelativeDate(strings.TrimSpace(dateEl.Text()))
|
|
if ch.DateUpload == 0 {
|
|
ch.DateUpload = util.ParseAbsoluteDate(strings.TrimSpace(dateEl.Text()), "Jan 2, 2006")
|
|
}
|
|
}
|
|
if ch.URL != "" {
|
|
chapters = append(chapters, ch)
|
|
}
|
|
})
|
|
return chapters, nil
|
|
}
|
|
|
|
var (
|
|
cdnHostRe = regexp.MustCompile("realUrl\\s*=\\s*`[^`]+//([^/`]+)")
|
|
cdnCleanRe = regexp.MustCompile(`\$\{[^}]*\}`)
|
|
)
|
|
|
|
func getCdnURL(doc *goquery.Document) string {
|
|
var cdnURL string
|
|
doc.Find("script").Each(func(_ int, el *goquery.Selection) {
|
|
if cdnURL != "" {
|
|
return
|
|
}
|
|
html, _ := el.Html()
|
|
if m := cdnHostRe.FindStringSubmatch(html); len(m) > 1 {
|
|
host := cdnCleanRe.ReplaceAllString(m[1], "")
|
|
cdnURL = "https://" + host + "/uploads"
|
|
}
|
|
})
|
|
return cdnURL
|
|
}
|
|
|
|
var oldCdnRe = regexp.MustCompile(`^(https?:)?//cdn\d*\.keyoapp\.com`)
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cdnURL := getCdnURL(doc)
|
|
// Primary: #pages > img[uid] with CDN URL
|
|
var pages []source.Page
|
|
if cdnURL != "" {
|
|
doc.Find("#pages > img[uid]").Each(func(i int, img *goquery.Selection) {
|
|
uid := img.AttrOr("uid", "")
|
|
if uid != "" {
|
|
pages = append(pages, source.Page{Index: i, ImageURL: cdnURL + "/" + uid})
|
|
}
|
|
})
|
|
}
|
|
if len(pages) > 0 {
|
|
return pages, nil
|
|
}
|
|
// Fallback: old CDN direct src
|
|
doc.Find("#pages > img").Each(func(i int, img *goquery.Selection) {
|
|
src := img.AttrOr("src", "")
|
|
if src == "" {
|
|
src = img.AttrOr("data-src", "")
|
|
}
|
|
if oldCdnRe.MatchString(src) {
|
|
pages = append(pages, source.Page{Index: i, ImageURL: src})
|
|
}
|
|
})
|
|
return pages, nil
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
|
|
func (s *Source) GetFilterList() []source.Filter { return nil }
|