diff --git a/docs/phase3-bases.md b/docs/phase3-bases.md index 60992d4..ec605d8 100644 --- a/docs/phase3-bases.md +++ b/docs/phase3-bases.md @@ -59,12 +59,12 @@ Detailed implementation notes for complex bases are in the **Notes** section at - [x] `base/mmrcms` ⚠️ see notes - [x] `base/monochrome` - [x] `base/multichan` -- [ ] `base/natsuid` -- [ ] `base/oceanwp` -- [ ] `base/paprika` -- [ ] `base/peachscan` +- [x] `base/natsuid` +- [x] `base/oceanwp` +- [x] `base/paprika` +- [x] `base/peachscan` - [x] `base/pizzareader` ⚠️ see notes -- [ ] `base/raijinscans` +- [x] `base/raijinscans` - [ ] `base/scanr` - [x] `base/scanreader` ⚠️ see notes - [x] `base/senkuro` ⚠️ see notes diff --git a/sources/base/natsuid/natsuid.go b/sources/base/natsuid/natsuid.go new file mode 100644 index 0000000..1b1ee3c --- /dev/null +++ b/sources/base/natsuid/natsuid.go @@ -0,0 +1,336 @@ +// Package natsuid implements the Natsuid manga base. +// WP-based site; uses nonce-authenticated multipart POST for search/browse; WP JSON API for details. +package natsuid + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "mime/multipart" + "net/http" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string + PostType string // WP post type slug, e.g. "manga" +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + if cfg.PostType == "" { + cfg.PostType = "manga" + } + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +type mangaURL struct { + ID int `json:"id"` + Slug string `json:"slug"` +} + +func encodeMangaURL(id int, slug string) string { + b, _ := json.Marshal(mangaURL{ID: id, Slug: slug}) + return string(b) +} + +func decodeMangaURL(u string) (mangaURL, error) { + var m mangaURL + return m, json.Unmarshal([]byte(u), &m) +} + +func (s *Source) getNonce(ctx context.Context) (string, error) { + u := s.base() + "/wp-admin/admin-ajax.php?type=search_form&action=get_nonce" + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) + if err != nil { + return "", err + } + resp, err := s.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return "", err + } + nonce := doc.Find("input[name=search_nonce]").AttrOr("value", "") + return nonce, nil +} + +func (s *Source) postSearch(ctx context.Context, page int, sort string, query string) (*goquery.Document, error) { + nonce, err := s.getNonce(ctx) + if err != nil { + return nil, err + } + + var buf bytes.Buffer + mw := multipart.NewWriter(&buf) + _ = mw.WriteField("nonce", nonce) + _ = mw.WriteField("page", fmt.Sprintf("%d", page)) + _ = mw.WriteField("sort", sort) + _ = mw.WriteField("genre", "[]") + _ = mw.WriteField("genre_exclude", "[]") + _ = mw.WriteField("author", "[]") + _ = mw.WriteField("status", "[]") + if query != "" { + _ = mw.WriteField("search", query) + } + mw.Close() + + u := s.base() + "/wp-admin/admin-ajax.php?action=advanced_search" + req, err := http.NewRequestWithContext(ctx, http.MethodPost, u, &buf) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", mw.FormDataContentType()) + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("natsuid: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { + var mangas []source.SManga + doc.Find("article.manga-card, div.manga-card, div.card-manga").Each(func(_ int, el *goquery.Selection) { + a := el.Find("a").First() + if a.Length() == 0 { + return + } + href := a.AttrOr("href", "") + if href == "" { + return + } + slug := util.SlugFromURL(strings.TrimRight(href, "/")) + m := source.SManga{ + Title: strings.TrimSpace(el.Find("h2, h3, .manga-title, .title").First().Text()), + } + if m.Title == "" { + m.Title = strings.TrimSpace(a.AttrOr("title", "")) + } + if img := el.Find("img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", img.AttrOr("data-src", ""))) + } + // We don't have WP post ID yet; store slug-only temporarily + m.URL = encodeMangaURL(0, slug) + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".next, a[rel=next], .pagination .next").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + doc, err := s.postSearch(context.Background(), page, "popular", "") + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + doc, err := s.postSearch(context.Background(), page, "updated", "") + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + doc, err := s.postSearch(context.Background(), page, "popular", query) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +type wpPost struct { + ID int `json:"id"` + Slug string `json:"slug"` + Title struct { + Rendered string `json:"rendered"` + } `json:"title"` + Content struct { + Rendered string `json:"rendered"` + } `json:"content"` + Excerpt struct { + Rendered string `json:"rendered"` + } `json:"excerpt"` + Embedded struct { + FeaturedMedia [][]struct { + SourceURL string `json:"source_url"` + } `json:"wp:featuredmedia"` + } `json:"_embedded"` +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + mu, err := decodeMangaURL(manga.URL) + if err != nil { + return manga, err + } + + var u string + if mu.ID > 0 { + u = fmt.Sprintf("%s/wp-json/wp/v2/%s/%d?_embed", s.base(), s.cfg.PostType, mu.ID) + } else { + u = fmt.Sprintf("%s/wp-json/wp/v2/%s?slug=%s&_embed", s.base(), s.cfg.PostType, mu.Slug) + } + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, u, nil) + if err != nil { + return manga, err + } + resp, err := s.client.Do(req) + if err != nil { + return manga, err + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + + if mu.ID > 0 { + var post wpPost + if err := json.Unmarshal(body, &post); err != nil { + return manga, err + } + return s.postToManga(manga.URL, post), nil + } + + var posts []wpPost + if err := json.Unmarshal(body, &posts); err != nil || len(posts) == 0 { + return manga, err + } + return s.postToManga(manga.URL, posts[0]), nil +} + +func (s *Source) postToManga(originalURL string, post wpPost) source.SManga { + m := source.SManga{ + URL: originalURL, + Title: util.CleanText(post.Title.Rendered), + } + if len(post.Embedded.FeaturedMedia) > 0 && len(post.Embedded.FeaturedMedia[0]) > 0 { + m.ThumbnailURL = post.Embedded.FeaturedMedia[0][0].SourceURL + } + desc := util.CleanText(post.Excerpt.Rendered) + if desc == "" { + desc = util.CleanText(post.Content.Rendered) + } + m.Description = desc + return m +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + mu, err := decodeMangaURL(manga.URL) + if err != nil { + return nil, err + } + pageURL := fmt.Sprintf("%s/%s/%s/", s.base(), s.cfg.PostType, mu.Slug) + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, pageURL, nil) + if err != nil { + return nil, err + } + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, err + } + + var chapters []source.SChapter + doc.Find("div a:has(time)").Each(func(_ int, el *goquery.Selection) { + href := el.AttrOr("href", "") + if href == "" { + return + } + name := strings.TrimSpace(el.Find("span, div").Not("time").First().Text()) + if name == "" { + name = strings.TrimSpace(el.Text()) + } + var ts int64 + if t := el.Find("time").First(); t.Length() > 0 { + dt := t.AttrOr("datetime", t.Text()) + ts = parseDate(strings.TrimSpace(dt)) + } + chapters = append(chapters, source.SChapter{ + URL: href, + Name: name, + DateUpload: ts, + }) + }) + return chapters, nil +} + +func parseDate(s string) int64 { + formats := []string{ + time.RFC3339, + "2006-01-02T15:04:05", + "2006-01-02", + "January 2, 2006", + } + for _, f := range formats { + if t, err := time.Parse(f, s); err == nil { + return t.UnixMilli() + } + } + return util.ParseRelativeDate(s) +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, util.AbsURL(s.cfg.BaseURL, chapter.URL), nil) + if err != nil { + return nil, err + } + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, err + } + + var pages []source.Page + doc.Find("main .relative section > img").Each(func(i int, img *goquery.Selection) { + u := img.AttrOr("src", img.AttrOr("data-src", "")) + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) + } + }) + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/oceanwp/oceanwp.go b/sources/base/oceanwp/oceanwp.go new file mode 100644 index 0000000..dcc1386 --- /dev/null +++ b/sources/base/oceanwp/oceanwp.go @@ -0,0 +1,148 @@ +// Package oceanwp implements the OceanWP manga base. +// Blog-style WordPress site; each post is a single-chapter gallery; pages from entry-content images. +package oceanwp + +import ( + "context" + "fmt" + "net/http" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return false } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("oceanwp: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { + var mangas []source.SManga + doc.Find("article.blog-entry").Each(func(_ int, el *goquery.Selection) { + titleEl := el.Find("h2.blog-entry-title a").First() + if titleEl.Length() == 0 { + return + } + m := source.SManga{ + URL: titleEl.AttrOr("href", ""), + Title: strings.TrimSpace(titleEl.Text()), + } + if img := el.Find("img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", img.AttrOr("data-src", ""))) + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".pagination .next, a[rel=next]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + var u string + if page == 1 { + u = s.base() + } else { + u = fmt.Sprintf("%s/page/%d/", s.base(), page) + } + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + return s.GetPopularManga(page) +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/?s=%s&paged=%d", s.base(), query, page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + result.Title = strings.TrimSpace(doc.Find("h1.entry-title, h1.page-title").First().Text()) + if result.Title == "" { + result.Title = manga.Title + } + result.Description = strings.TrimSpace(doc.Find("div.entry-content p").First().Text()) + if img := doc.Find("div.entry-content img, img.wp-post-image").First(); img.Length() > 0 { + result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", img.AttrOr("data-src", ""))) + } + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + return []source.SChapter{{ + URL: manga.URL, + Name: "Chapter 1", + }}, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + var pages []source.Page + doc.Find("div.entry-content img").Each(func(i int, img *goquery.Selection) { + u := img.AttrOr("src", img.AttrOr("data-src", "")) + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) + } + }) + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/paprika/paprika.go b/sources/base/paprika/paprika.go new file mode 100644 index 0000000..b2e0189 --- /dev/null +++ b/sources/base/paprika/paprika.go @@ -0,0 +1,203 @@ +// Package paprika implements the Paprika manga base. +// HTML scraping; standard list/detail/chapter/page structure with Bootstrap media cards. +package paprika + +import ( + "context" + "fmt" + "net/http" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("paprika: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { + var mangas []source.SManga + doc.Find("div.media").Each(func(_ int, el *goquery.Selection) { + a := el.Find("h4 a, h3 a, .media-heading a").First() + if a.Length() == 0 { + a = el.Find("a").First() + } + if a.Length() == 0 { + return + } + m := source.SManga{ + URL: a.AttrOr("href", ""), + Title: strings.TrimSpace(a.Text()), + } + if img := el.Find("img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", img.AttrOr("data-src", ""))) + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".pagination .next, li.next a, a[rel=next]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/popular-manga?page=%d", s.base(), page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/latest-manga?page=%d", s.base(), page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/search?q=%s&page=%d", s.base(), query, page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + result.Title = strings.TrimSpace(doc.Find("div.manga-detail h1").First().Text()) + if result.Title == "" { + result.Title = manga.Title + } + if img := doc.Find("div.manga-detail img").First(); img.Length() > 0 { + result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", "")) + } + + // Parse metadata paragraphs: "Label: value" lines inside div.media-body p + doc.Find("div.manga-detail div.media-body p, div.manga-detail .info p").Each(func(_ int, el *goquery.Selection) { + text := strings.TrimSpace(el.Text()) + if strings.HasPrefix(strings.ToLower(text), "author") { + result.Author = strings.TrimSpace(strings.SplitN(text, ":", 2)[len(strings.SplitN(text, ":", 2))-1]) + } else if strings.HasPrefix(strings.ToLower(text), "artist") { + result.Artist = strings.TrimSpace(strings.SplitN(text, ":", 2)[len(strings.SplitN(text, ":", 2))-1]) + } else if strings.HasPrefix(strings.ToLower(text), "genre") || strings.HasPrefix(strings.ToLower(text), "categ") { + result.Genre = strings.TrimSpace(strings.SplitN(text, ":", 2)[len(strings.SplitN(text, ":", 2))-1]) + } else if strings.HasPrefix(strings.ToLower(text), "status") { + result.Status = util.StatusFromString(strings.TrimSpace(strings.SplitN(text, ":", 2)[len(strings.SplitN(text, ":", 2))-1])) + } + }) + + result.Description = strings.TrimSpace(doc.Find("div.manga-detail .description, div.manga-detail .synopsis").First().Text()) + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return nil, err + } + var chapters []source.SChapter + mangaTitle := strings.TrimSpace(doc.Find("div.manga-detail h1").First().Text()) + doc.Find("div.total-chapter:has(h2) li").Each(func(_ int, el *goquery.Selection) { + a := el.Find("a").First() + href := a.AttrOr("href", "") + if href == "" { + return + } + name := strings.TrimSpace(a.Text()) + // Strip manga title prefix if present + name = strings.TrimPrefix(name, mangaTitle) + name = strings.TrimSpace(name) + if name == "" { + name = strings.TrimSpace(a.Text()) + } + var ts int64 + if span := el.Find("span.date, small").First(); span.Length() > 0 { + ts = util.ParseRelativeDate(strings.TrimSpace(span.Text())) + } + chapters = append(chapters, source.SChapter{ + URL: href, + Name: name, + DateUpload: ts, + }) + }) + return chapters, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + // Pages stored in hidden #arraydata element as comma-separated URLs + raw := strings.TrimSpace(doc.Find("#arraydata").Text()) + if raw == "" { + // Fallback: direct images + var pages []source.Page + doc.Find("div.reading-content img, div.reader-area img").Each(func(i int, img *goquery.Selection) { + u := img.AttrOr("src", img.AttrOr("data-src", "")) + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) + } + }) + return pages, nil + } + + var pages []source.Page + for i, u := range strings.Split(raw, ",") { + u = strings.TrimSpace(u) + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: u}) + } + } + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/peachscan/peachscan.go b/sources/base/peachscan/peachscan.go new file mode 100644 index 0000000..cfa7370 --- /dev/null +++ b/sources/base/peachscan/peachscan.go @@ -0,0 +1,282 @@ +// Package peachscan implements the PeachScan manga base. +// Brazilian scan site; pages extracted from inline JS const urls = [...] array. +package peachscan + +import ( + "context" + "fmt" + "io" + "net/http" + "regexp" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +var urlsRe = regexp.MustCompile(`(?s)const\s+urls\s*=\s*\[(.*?)\]\s*;`) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("peachscan: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) getRaw(ctx context.Context, rawURL string) (string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return "", err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + return string(body), nil +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + if page > 1 { + return source.MangasPage{}, nil + } + doc, err := s.get(context.Background(), s.base()+"/todas-as-obras/") + if err != nil { + return source.MangasPage{}, err + } + var mangas []source.SManga + doc.Find(".comics__all__box").Each(func(_ int, el *goquery.Selection) { + a := el.Find("a").First() + if a.Length() == 0 { + return + } + m := source.SManga{ + URL: a.AttrOr("href", ""), + Title: strings.TrimSpace(el.Find(".comics__all__title, h3, h2").First().Text()), + } + if m.Title == "" { + m.Title = strings.TrimSpace(a.AttrOr("title", "")) + } + if img := el.Find("img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", img.AttrOr("data-src", ""))) + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + var u string + if page == 1 { + u = s.base() + } else { + u = fmt.Sprintf("%s/page/%d/", s.base(), page) + } + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + var mangas []source.SManga + doc.Find("div.comic:not(:has(a.box-image > p))").Each(func(_ int, el *goquery.Selection) { + // Exclude novels: skip if any p under .box-image contains "Novel" + if el.Find("a.box-image > p").FilterFunction(func(_ int, p *goquery.Selection) bool { + return strings.Contains(p.Text(), "Novel") + }).Length() > 0 { + return + } + a := el.Find("a").First() + if a.Length() == 0 { + return + } + m := source.SManga{ + URL: a.AttrOr("href", ""), + Title: strings.TrimSpace(el.Find(".comic-title, h3, h2").First().Text()), + } + if m.Title == "" { + m.Title = strings.TrimSpace(a.AttrOr("title", "")) + } + if img := el.Find("img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", img.AttrOr("data-src", ""))) + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".pagination .next, a[rel=next]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/?s=%s&paged=%d", s.base(), query, page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + var mangas []source.SManga + doc.Find(".comics__all__box, div.comic").Each(func(_ int, el *goquery.Selection) { + a := el.Find("a").First() + if a.Length() == 0 { + return + } + m := source.SManga{ + URL: a.AttrOr("href", ""), + Title: strings.TrimSpace(el.Find(".comics__all__title, .comic-title, h3, h2").First().Text()), + } + if m.Title == "" { + m.Title = strings.TrimSpace(a.AttrOr("title", "")) + } + if img := el.Find("img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", img.AttrOr("data-src", ""))) + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".pagination .next, a[rel=next]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + result.Title = strings.TrimSpace(doc.Find("h1.manga-title, h1.comic-title, h1").First().Text()) + if result.Title == "" { + result.Title = manga.Title + } + if img := doc.Find("div.manga-cover img, div.comic-cover img, img.cover").First(); img.Length() > 0 { + result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", "")) + } + result.Description = strings.TrimSpace(doc.Find(".manga-description, .comic-description, .sinopse").First().Text()) + + doc.Find(".manga-info p, .comic-info p, .meta p").Each(func(_ int, el *goquery.Selection) { + text := strings.TrimSpace(el.Text()) + lower := strings.ToLower(text) + val := func() string { + parts := strings.SplitN(text, ":", 2) + if len(parts) < 2 { + return "" + } + return strings.TrimSpace(parts[1]) + } + if strings.HasPrefix(lower, "autor") || strings.HasPrefix(lower, "author") { + result.Author = val() + } else if strings.HasPrefix(lower, "artista") || strings.HasPrefix(lower, "artist") { + result.Artist = val() + } else if strings.HasPrefix(lower, "gênero") || strings.HasPrefix(lower, "genre") { + result.Genre = val() + } else if strings.HasPrefix(lower, "status") { + result.Status = util.StatusFromString(val()) + } + }) + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return nil, err + } + var chapters []source.SChapter + doc.Find(".link__capitulos, ul.chapters li a, .chapter-list a").Each(func(_ int, el *goquery.Selection) { + href := el.AttrOr("href", "") + if href == "" { + return + } + name := strings.TrimSpace(el.Text()) + if name == "" { + name = "Chapter" + } + chapters = append(chapters, source.SChapter{ + URL: href, + Name: name, + }) + }) + return chapters, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + html, err := s.getRaw(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + + m := urlsRe.FindStringSubmatch(html) + if m != nil { + inner := m[1] + var pages []source.Page + for i, part := range strings.Split(inner, ",") { + part = strings.TrimSpace(part) + part = strings.Trim(part, `"'`) + part = strings.TrimSpace(part) + if part == "" { + continue + } + imgURL := util.AbsURL(s.cfg.BaseURL, part) + "#page" + pages = append(pages, source.Page{Index: i, ImageURL: imgURL}) + } + if len(pages) > 0 { + return pages, nil + } + } + + // Fallback: #imageContainer img + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + var pages []source.Page + doc.Find("#imageContainer img").Each(func(i int, img *goquery.Selection) { + u := img.AttrOr("src", img.AttrOr("data-src", "")) + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) + } + }) + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/raijinscans/raijinscans.go b/sources/base/raijinscans/raijinscans.go new file mode 100644 index 0000000..f7fe873 --- /dev/null +++ b/sources/base/raijinscans/raijinscans.go @@ -0,0 +1,266 @@ +// Package raijinscans implements the RaijinScans manga base. +// French scan site; CF-protected; chapter page URLs are Base64-encoded in data-src attributes. +package raijinscans + +import ( + "context" + "encoding/base64" + "fmt" + "net/http" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("raijinscans: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) parseMangaFromUnit(el *goquery.Selection) source.SManga { + a := el.Find("a").First() + m := source.SManga{ + URL: a.AttrOr("href", ""), + Title: strings.TrimSpace(el.Find(".title, h2, h3, span.name").First().Text()), + } + if m.Title == "" { + m.Title = strings.TrimSpace(a.AttrOr("title", "")) + } + if img := el.Find("img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", img.AttrOr("data-src", ""))) + } + return m +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + var u string + if page == 1 { + u = s.base() + } else { + u = fmt.Sprintf("%s/page/%d/", s.base(), page) + } + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + var mangas []source.SManga + doc.Find("section#most-viewed div.swiper-slide.unit").Each(func(_ int, el *goquery.Selection) { + m := s.parseMangaFromUnit(el) + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".pagination .next, a[rel=next]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + var u string + if page == 1 { + u = s.base() + } else { + u = fmt.Sprintf("%s/page/%d/", s.base(), page) + } + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + var mangas []source.SManga + doc.Find("div.original.card-lg div.unit, div.latest div.unit").Each(func(_ int, el *goquery.Selection) { + m := s.parseMangaFromUnit(el) + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".pagination .next, a[rel=next]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/?s=%s&post_type=wp-manga&paged=%d", s.base(), query, page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + var mangas []source.SManga + doc.Find("div.original.card-lg div.unit, div.search-result div.unit").Each(func(_ int, el *goquery.Selection) { + m := s.parseMangaFromUnit(el) + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".pagination .next, a[rel=next]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + result.Title = strings.TrimSpace(doc.Find("h1.entry-title, h1.manga-title, h1").First().Text()) + if result.Title == "" { + result.Title = manga.Title + } + if img := doc.Find("img.cover, div.cover img, div.manga-cover img").First(); img.Length() > 0 { + result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", img.AttrOr("data-src", ""))) + } + result.Description = strings.TrimSpace(doc.Find(".summary__content, .manga-summary, .description").First().Text()) + + var genres []string + doc.Find("div.genre-list div.genre-link, a.genre-tag").Each(func(_ int, a *goquery.Selection) { + if t := strings.TrimSpace(a.Text()); t != "" { + genres = append(genres, t) + } + }) + result.Genre = strings.Join(genres, ", ") + + // Status from stat-item containing "État du titre" or "Status" + doc.Find("div.stat-item").Each(func(_ int, el *goquery.Selection) { + label := strings.ToLower(el.Find("span").First().Text()) + if strings.Contains(label, "état") || strings.Contains(label, "status") { + val := strings.TrimSpace(el.Find("span.manga, span.value, span:last-child").Last().Text()) + result.Status = util.StatusFromString(val) + } + }) + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return nil, err + } + var chapters []source.SChapter + doc.Find("ul.scroll-sm li.item, ul.chapter-list li, li.wp-manga-chapter").Each(func(_ int, el *goquery.Selection) { + a := el.Find("a").First() + href := a.AttrOr("href", "") + if href == "" { + return + } + name := strings.TrimSpace(a.Text()) + if name == "" { + name = "Chapter" + } + var ts int64 + if span := el.Find("span.date, span.chapter-release-date, time").First(); span.Length() > 0 { + ts = parseFrenchDate(strings.TrimSpace(span.Text())) + } + chapters = append(chapters, source.SChapter{ + URL: href, + Name: name, + DateUpload: ts, + }) + }) + return chapters, nil +} + +func parseFrenchDate(s string) int64 { + lower := strings.ToLower(strings.TrimSpace(s)) + // "aujourd'hui" = today, "hier" = yesterday + if strings.HasPrefix(lower, "aujourd") { + return util.ParseRelativeDate("0 days ago") + } + if lower == "hier" { + return util.ParseRelativeDate("1 day ago") + } + // "{n} jour(s)" / "{n} heure(s)" / "{n} semaine(s)" + lower = strings.ReplaceAll(lower, "jour(s)", "days") + lower = strings.ReplaceAll(lower, "jours", "days") + lower = strings.ReplaceAll(lower, "jour", "day") + lower = strings.ReplaceAll(lower, "heure(s)", "hours") + lower = strings.ReplaceAll(lower, "heures", "hours") + lower = strings.ReplaceAll(lower, "heure", "hour") + lower = strings.ReplaceAll(lower, "semaine(s)", "weeks") + lower = strings.ReplaceAll(lower, "semaines", "weeks") + lower = strings.ReplaceAll(lower, "semaine", "week") + lower = strings.ReplaceAll(lower, "mois", "months") + lower = strings.ReplaceAll(lower, "an(s)", "years") + lower = strings.ReplaceAll(lower, "ans", "years") + lower = lower + " ago" + if ts := util.ParseRelativeDate(lower); ts != 0 { + return ts + } + return util.ParseAbsoluteDate(s, "2 January 2006") +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + var pages []source.Page + doc.Find("div.protected-image-data").Each(func(i int, el *goquery.Selection) { + encoded := strings.TrimSpace(el.AttrOr("data-src", "")) + if encoded == "" { + return + } + decoded, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + // Try RawStdEncoding (no padding) + decoded, err = base64.RawStdEncoding.DecodeString(encoded) + if err != nil { + return + } + } + imgURL := strings.TrimSpace(string(decoded)) + if imgURL != "" { + pages = append(pages, source.Page{Index: i, ImageURL: imgURL}) + } + }) + + // Fallback: regular img tags + if len(pages) == 0 { + doc.Find("div.reading-content img, div.reader-area img").Each(func(i int, img *goquery.Selection) { + u := img.AttrOr("src", img.AttrOr("data-src", "")) + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) + } + }) + } + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil }