// Package keyoapp implements the Keyoapp manga base. // HTML scraping; popular from homepage; pages via CDN URL extracted from inline JS. package keyoapp import ( "context" "fmt" "net/http" "net/url" "regexp" "strings" "github.com/PuerkitoBio/goquery" "goyomi/internal/httpclient/flare" "goyomi/internal/source" "goyomi/sources/base/util" ) type Config struct { Name string BaseURL string Lang string // Override popular manga selector. Empty means use default. PopularSelector string // Override description selector. Empty means use default. DescriptionSelector string // Override status selector. Empty means use default. StatusSelector string // Override author selector. Empty means use default. AuthorSelector string // Override artist selector. Empty means use default. ArtistSelector string } type Source struct { cfg Config client *flare.Client id int64 } func New(cfg Config) *Source { c := flare.NewClient(flare.WithRateLimit(1, 2)) return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} } func (s *Source) ID() int64 { return s.id } func (s *Source) Name() string { return s.cfg.Name } func (s *Source) Lang() string { return s.cfg.Lang } func (s *Source) SupportsLatest() bool { return true } func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) if err != nil { return nil, err } req.Header.Set("Referer", s.cfg.BaseURL+"/") resp, err := s.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("keyoapp: HTTP %d", resp.StatusCode) } return goquery.NewDocumentFromReader(resp.Body) } var imgURLRe = regexp.MustCompile(`url\(['"]?([^('")\s]+)`) func getImageURL(el *goquery.Selection, baseURL string) string { // Find any descendant with background-image style var u string el.Find("*[style]").Each(func(_ int, s *goquery.Selection) { if u != "" { return } style := s.AttrOr("style", "") if !strings.Contains(style, "background-image") { return } if m := imgURLRe.FindStringSubmatch(style); len(m) > 1 { raw := m[1] // strip w= query that keyoapp uses for thumbnail sizing if parsed, err := url.Parse(raw); err == nil { q := parsed.Query() q.Del("w") parsed.RawQuery = q.Encode() u = parsed.String() } else { u = raw } } }) if u != "" { return util.AbsURL(baseURL, u) } return "" } func relURL(raw, baseURL string) string { u, err := url.Parse(raw) if err != nil { return raw } base, err := url.Parse(baseURL) if err != nil { return raw } if u.Host == base.Host { return u.Path } return raw } func (s *Source) mangaFromElement(el *goquery.Selection) source.SManga { m := source.SManga{} m.ThumbnailURL = getImageURL(el, s.cfg.BaseURL) el.Find("a[href]").First().Each(func(_ int, a *goquery.Selection) { m.Title = a.AttrOr("title", "") href := a.AttrOr("href", "") m.URL = relURL(href, s.cfg.BaseURL) }) return m } func (s *Source) popularSelector() string { if s.cfg.PopularSelector != "" { return s.cfg.PopularSelector } var parts []string for _, label := range []string{"Popular", "Popularie", "Trending"} { parts = append(parts, fmt.Sprintf("div:contains(%s) + div .group.overflow-hidden.grid", label)) } return strings.Join(parts, ", ") } func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { doc, err := s.get(context.Background(), s.base()) if err != nil { return source.MangasPage{}, err } var mangas []source.SManga doc.Find(s.popularSelector()).Each(func(_ int, el *goquery.Selection) { m := s.mangaFromElement(el) if m.URL != "" && m.Title != "" { mangas = append(mangas, m) } }) return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil } func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { doc, err := s.get(context.Background(), s.base()+"/latest/") if err != nil { return source.MangasPage{}, err } var mangas []source.SManga doc.Find("div.grid > div.group").Each(func(_ int, el *goquery.Selection) { m := s.mangaFromElement(el) if m.URL != "" && m.Title != "" { mangas = append(mangas, m) } }) return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil } func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { u := fmt.Sprintf("%s/series/?q=%s", s.base(), query) doc, err := s.get(context.Background(), u) if err != nil { return source.MangasPage{}, err } var mangas []source.SManga // Filter client-side by title doc.Find("#searched_series_page > button").Each(func(_ int, el *goquery.Selection) { title := el.AttrOr("title", "") if query != "" && !strings.Contains(strings.ToLower(title), strings.ToLower(query)) { return } m := s.mangaFromElement(el) if m.URL == "" { el.Find("a[href]").First().Each(func(_ int, a *goquery.Selection) { m.URL = relURL(a.AttrOr("href", ""), s.cfg.BaseURL) m.Title = a.AttrOr("title", strings.TrimSpace(a.Text())) }) } if m.Title == "" { m.Title = title } if m.URL != "" { mangas = append(mangas, m) } }) return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil } func parseStatus(s *goquery.Selection) int { if s == nil || s.Length() == 0 { return source.StatusUnknown } switch strings.ToLower(strings.TrimSpace(s.Text())) { case "ongoing": return source.StatusOngoing case "dropped": return source.StatusCancelled case "paused": return source.StatusHiatus case "completed": return source.StatusCompleted default: return source.StatusUnknown } } func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) if err != nil { return manga, err } result := source.SManga{URL: manga.URL} // Thumbnail from div[class*=photoURL] background-image style result.ThumbnailURL = getImageURL(doc.Find("div[class*=photoURL]").First(), s.cfg.BaseURL) descSel := "div:containsOwn(Synopsis) ~ div" if s.cfg.DescriptionSelector != "" { descSel = s.cfg.DescriptionSelector } result.Description = strings.TrimSpace(doc.Find(descSel).First().Text()) statusSel := "div:has(span:containsOwn(Status)) ~ div" if s.cfg.StatusSelector != "" { statusSel = s.cfg.StatusSelector } result.Status = parseStatus(doc.Find(statusSel).First()) authorSel := "div:has(span:containsOwn(Author)) ~ div" if s.cfg.AuthorSelector != "" { authorSel = s.cfg.AuthorSelector } result.Author = strings.TrimSpace(doc.Find(authorSel).First().Text()) artistSel := "div:has(span:containsOwn(Artist)) ~ div" if s.cfg.ArtistSelector != "" { artistSel = s.cfg.ArtistSelector } result.Artist = strings.TrimSpace(doc.Find(artistSel).First().Text()) // Title from h1 inside the series header result.Title = strings.TrimSpace(doc.Find("h1").First().Text()) if result.Title == "" { result.Title = manga.Title } // Genres from grid links var genres []string doc.Find("div.grid:has(>h1) > div > a:not([title='Status'])").Each(func(_ int, a *goquery.Selection) { if t := strings.TrimSpace(a.Text()); t != "" { genres = append(genres, t) } }) result.Genre = strings.Join(genres, ", ") return result, nil } func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) if err != nil { return nil, err } // Exclude upcoming and (optionally) paid chapters sel := "#chapters > a:not(:has(.text-sm span))" var chapters []source.SChapter doc.Find(sel).Each(func(_ int, el *goquery.Selection) { ch := source.SChapter{} el.Find("a[href]").First().Each(func(_ int, a *goquery.Selection) { href := a.AttrOr("href", "") ch.URL = relURL(href, s.cfg.BaseURL) }) if ch.URL == "" { href := el.AttrOr("href", "") ch.URL = relURL(href, s.cfg.BaseURL) } ch.Name = strings.TrimSpace(el.Find(".text-sm").Text()) if dateEl := el.Find(".text-xs").First(); dateEl.Length() > 0 { ch.DateUpload = util.ParseRelativeDate(strings.TrimSpace(dateEl.Text())) if ch.DateUpload == 0 { ch.DateUpload = util.ParseAbsoluteDate(strings.TrimSpace(dateEl.Text()), "Jan 2, 2006") } } if ch.URL != "" { chapters = append(chapters, ch) } }) return chapters, nil } var ( cdnHostRe = regexp.MustCompile("realUrl\\s*=\\s*`[^`]+//([^/`]+)") cdnCleanRe = regexp.MustCompile(`\$\{[^}]*\}`) ) func getCdnURL(doc *goquery.Document) string { var cdnURL string doc.Find("script").Each(func(_ int, el *goquery.Selection) { if cdnURL != "" { return } html, _ := el.Html() if m := cdnHostRe.FindStringSubmatch(html); len(m) > 1 { host := cdnCleanRe.ReplaceAllString(m[1], "") cdnURL = "https://" + host + "/uploads" } }) return cdnURL } var oldCdnRe = regexp.MustCompile(`^(https?:)?//cdn\d*\.keyoapp\.com`) func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) if err != nil { return nil, err } cdnURL := getCdnURL(doc) // Primary: #pages > img[uid] with CDN URL var pages []source.Page if cdnURL != "" { doc.Find("#pages > img[uid]").Each(func(i int, img *goquery.Selection) { uid := img.AttrOr("uid", "") if uid != "" { pages = append(pages, source.Page{Index: i, ImageURL: cdnURL + "/" + uid}) } }) } if len(pages) > 0 { return pages, nil } // Fallback: old CDN direct src doc.Find("#pages > img").Each(func(i int, img *goquery.Selection) { src := img.AttrOr("src", "") if src == "" { src = img.AttrOr("data-src", "") } if oldCdnRe.MatchString(src) { pages = append(pages, source.Page{Index: i, ImageURL: src}) } }) return pages, nil } func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } func (s *Source) GetFilterList() []source.Filter { return nil }