9a42dd2ab1
- Remove global ProxyEnabled() logic from httpclient - Each source now explicitly chooses client at import time: - flare client: for JS-rendering/cloudflare sources - normal httpclient: for REST API sources - Updated 29 base sources based on Kotlin reference (network.cloudflareClient)
199 lines
6.5 KiB
Go
Executable File
199 lines
6.5 KiB
Go
Executable File
// Package paprika implements the Paprika manga base.
|
|
// HTML scraping; standard list/detail/chapter/page structure with Bootstrap media cards.
|
|
package paprika
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient/flare"
|
|
"goyomi/internal/source"
|
|
"goyomi/sources/base/util"
|
|
)
|
|
|
|
type Config struct {
|
|
Name string
|
|
BaseURL string
|
|
Lang string
|
|
}
|
|
|
|
type Source struct {
|
|
cfg Config
|
|
client *flare.Client
|
|
id int64
|
|
}
|
|
|
|
func New(cfg Config) *Source {
|
|
c := flare.NewClient(flare.WithRateLimit(1, 2))
|
|
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.cfg.Name }
|
|
func (s *Source) Lang() string { return s.cfg.Lang }
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
resp, err := s.client.Get(ctx, rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("paprika: HTTP %d", resp.StatusCode)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage {
|
|
var mangas []source.SManga
|
|
doc.Find("div.media").Each(func(_ int, el *goquery.Selection) {
|
|
a := el.Find("h4 a, h3 a, .media-heading a").First()
|
|
if a.Length() == 0 {
|
|
a = el.Find("a").First()
|
|
}
|
|
if a.Length() == 0 {
|
|
return
|
|
}
|
|
m := source.SManga{
|
|
URL: a.AttrOr("href", ""),
|
|
Title: strings.TrimSpace(a.Text()),
|
|
}
|
|
if img := el.Find("img").First(); img.Length() > 0 {
|
|
m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", img.AttrOr("data-src", "")))
|
|
}
|
|
if m.URL != "" && m.Title != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
hasNext := doc.Find(".pagination .next, li.next a, a[rel=next]").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), fmt.Sprintf("%s/popular-manga?page=%d", s.base(), page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), fmt.Sprintf("%s/latest-manga?page=%d", s.base(), page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), fmt.Sprintf("%s/search?q=%s&page=%d", s.base(), query, page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
result := source.SManga{URL: manga.URL}
|
|
result.Title = strings.TrimSpace(doc.Find("div.manga-detail h1").First().Text())
|
|
if result.Title == "" {
|
|
result.Title = manga.Title
|
|
}
|
|
if img := doc.Find("div.manga-detail img").First(); img.Length() > 0 {
|
|
result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", ""))
|
|
}
|
|
|
|
// Parse metadata paragraphs: "Label: value" lines inside div.media-body p
|
|
doc.Find("div.manga-detail div.media-body p, div.manga-detail .info p").Each(func(_ int, el *goquery.Selection) {
|
|
text := strings.TrimSpace(el.Text())
|
|
if strings.HasPrefix(strings.ToLower(text), "author") {
|
|
result.Author = strings.TrimSpace(strings.SplitN(text, ":", 2)[len(strings.SplitN(text, ":", 2))-1])
|
|
} else if strings.HasPrefix(strings.ToLower(text), "artist") {
|
|
result.Artist = strings.TrimSpace(strings.SplitN(text, ":", 2)[len(strings.SplitN(text, ":", 2))-1])
|
|
} else if strings.HasPrefix(strings.ToLower(text), "genre") || strings.HasPrefix(strings.ToLower(text), "categ") {
|
|
result.Genre = strings.TrimSpace(strings.SplitN(text, ":", 2)[len(strings.SplitN(text, ":", 2))-1])
|
|
} else if strings.HasPrefix(strings.ToLower(text), "status") {
|
|
result.Status = util.StatusFromString(strings.TrimSpace(strings.SplitN(text, ":", 2)[len(strings.SplitN(text, ":", 2))-1]))
|
|
}
|
|
})
|
|
|
|
result.Description = strings.TrimSpace(doc.Find("div.manga-detail .description, div.manga-detail .synopsis").First().Text())
|
|
return result, nil
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var chapters []source.SChapter
|
|
mangaTitle := strings.TrimSpace(doc.Find("div.manga-detail h1").First().Text())
|
|
doc.Find("div.total-chapter:has(h2) li").Each(func(_ int, el *goquery.Selection) {
|
|
a := el.Find("a").First()
|
|
href := a.AttrOr("href", "")
|
|
if href == "" {
|
|
return
|
|
}
|
|
name := strings.TrimSpace(a.Text())
|
|
// Strip manga title prefix if present
|
|
name = strings.TrimPrefix(name, mangaTitle)
|
|
name = strings.TrimSpace(name)
|
|
if name == "" {
|
|
name = strings.TrimSpace(a.Text())
|
|
}
|
|
var ts int64
|
|
if span := el.Find("span.date, small").First(); span.Length() > 0 {
|
|
ts = util.ParseRelativeDate(strings.TrimSpace(span.Text()))
|
|
}
|
|
chapters = append(chapters, source.SChapter{
|
|
URL: href,
|
|
Name: name,
|
|
DateUpload: ts,
|
|
})
|
|
})
|
|
return chapters, nil
|
|
}
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// Pages stored in hidden #arraydata element as comma-separated URLs
|
|
raw := strings.TrimSpace(doc.Find("#arraydata").Text())
|
|
if raw == "" {
|
|
// Fallback: direct images
|
|
var pages []source.Page
|
|
doc.Find("div.reading-content img, div.reader-area img").Each(func(i int, img *goquery.Selection) {
|
|
u := img.AttrOr("src", img.AttrOr("data-src", ""))
|
|
if u != "" {
|
|
pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)})
|
|
}
|
|
})
|
|
return pages, nil
|
|
}
|
|
|
|
var pages []source.Page
|
|
for i, u := range strings.Split(raw, ",") {
|
|
u = strings.TrimSpace(u)
|
|
if u != "" {
|
|
pages = append(pages, source.Page{Index: i, ImageURL: u})
|
|
}
|
|
}
|
|
return pages, nil
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
|
|
func (s *Source) GetFilterList() []source.Filter { return nil }
|