phase3: implement fansubscat, fuzzydoodle, galleryadults, gattsu, goda

This commit is contained in:
achmad
2026-05-11 06:45:48 +07:00
parent 1e6d72b046
commit 224266ffe3
6 changed files with 1178 additions and 5 deletions
+5 -5
View File
@@ -16,15 +16,15 @@ Detailed implementation notes for complex bases are in the **Notes** section at
- [x] `base/comiciviewer`
- [x] `base/eromuse`
- [x] `base/ezmanhwa`
- [ ] `base/fansubscat`
- [x] `base/fansubscat`
- [x] `base/fmreader` ⚠️ see notes
- [x] `base/foolslide` ⚠️ see notes
- [ ] `base/fuzzydoodle`
- [ ] `base/galleryadults`
- [ ] `base/gattsu`
- [x] `base/fuzzydoodle`
- [x] `base/galleryadults`
- [x] `base/gattsu`
- [x] `base/gigaviewer` ⚠️ see notes
- [x] `base/gmanga` ⚠️ see notes
- [ ] `base/goda`
- [x] `base/goda`
- [ ] `base/gravureblogger`
- [ ] `base/greenshit`
- [x] `base/grouple` ⚠️ see notes
+198
View File
@@ -0,0 +1,198 @@
// Package fansubscat implements the FansubsCat manga base.
// JSON REST API: GET {api}/manga/popular/{page}, /recent/{page}, /search/{page}
package fansubscat
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"goyomi/internal/httpclient"
"goyomi/internal/source"
"goyomi/sources/base/util"
)
type Config struct {
Name string
BaseURL string
APIURL string
Lang string
IsHentaiSite bool
}
type Source struct {
cfg Config
client *httpclient.Client
id int64
}
func New(cfg Config) *Source {
if cfg.APIURL == "" {
cfg.APIURL = cfg.BaseURL
}
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
}
func (s *Source) ID() int64 { return s.id }
func (s *Source) Name() string { return s.cfg.Name }
func (s *Source) Lang() string { return s.cfg.Lang }
func (s *Source) SupportsLatest() bool { return true }
func (s *Source) api() string { return strings.TrimRight(s.cfg.APIURL, "/") }
func (s *Source) getJSON(ctx context.Context, rawURL string, out any) error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return err
}
req.Header.Set("Accept", "application/json")
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("fansubscat: HTTP %d", resp.StatusCode)
}
body, _ := io.ReadAll(resp.Body)
return json.Unmarshal(body, out)
}
// resultWrapper is the common API envelope.
type resultWrapper struct {
Result json.RawMessage `json:"result"`
}
type mangaDTO struct {
Slug string `json:"slug"`
Name string `json:"name"`
ThumbnailURL string `json:"thumbnail_url"`
Synopsis string `json:"synopsis"`
Status string `json:"status"`
Genres string `json:"genres"`
Author string `json:"author"`
}
type chapterDTO struct {
Slug string `json:"slug"`
Name string `json:"name"`
Date int64 `json:"date"`
}
type pageDTO struct {
URL string `json:"url"`
}
func toSManga(m mangaDTO) source.SManga {
return source.SManga{
URL: m.Slug,
Title: m.Name,
Author: m.Author,
Description: m.Synopsis,
Genre: m.Genres,
Status: util.StatusFromString(m.Status),
ThumbnailURL: m.ThumbnailURL,
}
}
func (s *Source) fetchList(ctx context.Context, endpoint string) (source.MangasPage, error) {
var wrap resultWrapper
if err := s.getJSON(ctx, endpoint, &wrap); err != nil {
return source.MangasPage{}, err
}
var dtos []mangaDTO
if err := json.Unmarshal(wrap.Result, &dtos); err != nil {
return source.MangasPage{}, err
}
mangas := make([]source.SManga, len(dtos))
for i, m := range dtos {
mangas[i] = toSManga(m)
}
return source.MangasPage{Mangas: mangas, HasNextPage: len(dtos) >= 20}, nil
}
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
return s.fetchList(context.Background(), fmt.Sprintf("%s/manga/popular/%d", s.api(), page))
}
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
return s.fetchList(context.Background(), fmt.Sprintf("%s/manga/recent/%d", s.api(), page))
}
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
u := fmt.Sprintf("%s/manga/search/%d?type=all", s.api(), page)
if query != "" {
u += "&query=" + query
}
return s.fetchList(context.Background(), u)
}
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
var wrap resultWrapper
slug := manga.URL
if idx := strings.LastIndex(slug, "/"); idx >= 0 {
slug = slug[idx+1:]
}
if err := s.getJSON(context.Background(), fmt.Sprintf("%s/manga/details/%s", s.api(), slug), &wrap); err != nil {
return manga, err
}
var dto mangaDTO
if err := json.Unmarshal(wrap.Result, &dto); err != nil {
return manga, err
}
out := toSManga(dto)
out.URL = manga.URL
return out, nil
}
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
slug := manga.URL
if idx := strings.LastIndex(slug, "/"); idx >= 0 {
slug = slug[idx+1:]
}
var wrap resultWrapper
if err := s.getJSON(context.Background(), fmt.Sprintf("%s/manga/chapters/%s", s.api(), slug), &wrap); err != nil {
return nil, err
}
var dtos []chapterDTO
if err := json.Unmarshal(wrap.Result, &dtos); err != nil {
return nil, err
}
chapters := make([]source.SChapter, len(dtos))
for i, ch := range dtos {
chapters[i] = source.SChapter{
URL: ch.Slug,
Name: ch.Name,
DateUpload: ch.Date * 1000,
}
}
return chapters, nil
}
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
slug := chapter.URL
if idx := strings.LastIndex(slug, "/"); idx >= 0 {
slug = slug[idx+1:]
}
var wrap resultWrapper
if err := s.getJSON(context.Background(), fmt.Sprintf("%s/manga/pages/%s", s.api(), slug), &wrap); err != nil {
return nil, err
}
var dtos []pageDTO
if err := json.Unmarshal(wrap.Result, &dtos); err != nil {
return nil, err
}
pages := make([]source.Page, len(dtos))
for i, p := range dtos {
pages[i] = source.Page{Index: i, ImageURL: p.URL}
}
return pages, nil
}
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
func (s *Source) GetFilterList() []source.Filter { return nil }
+259
View File
@@ -0,0 +1,259 @@
// Package fuzzydoodle implements the FuzzyDoodle manga base.
// https://github.com/jhin1m/fuzzy-doodle — HTML scraping, CF-protected.
package fuzzydoodle
import (
"context"
"fmt"
"net/http"
"strings"
"github.com/PuerkitoBio/goquery"
"goyomi/internal/httpclient"
"goyomi/internal/source"
"goyomi/sources/base/util"
)
type Config struct {
Name string
BaseURL string
Lang string
}
type Source struct {
cfg Config
client *httpclient.Client
id int64
}
func New(cfg Config) *Source {
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
}
func (s *Source) ID() int64 { return s.id }
func (s *Source) Name() string { return s.cfg.Name }
func (s *Source) Lang() string { return s.cfg.Lang }
func (s *Source) SupportsLatest() bool { return true }
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("fuzzydoodle: HTTP %d", resp.StatusCode)
}
return goquery.NewDocumentFromReader(resp.Body)
}
func imgAttr(img *goquery.Selection, baseURL string) string {
for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} {
if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") {
return util.AbsURL(baseURL, v)
}
}
// srcset: take first entry
if v, ok := img.Attr("srcset"); ok && v != "" {
return util.AbsURL(baseURL, strings.Fields(v)[0])
}
return ""
}
func mangaFromElement(el *goquery.Selection, baseURL string) source.SManga {
m := source.SManga{}
el.Find("a").First().Each(func(_ int, a *goquery.Selection) {
m.URL, _ = a.Attr("href")
})
m.Title = strings.TrimSpace(el.Find("h2.text-sm").Text())
if m.Title == "" {
m.Title = strings.TrimSpace(el.Find("h2, h3").First().Text())
}
el.Find("img").First().Each(func(_ int, img *goquery.Selection) {
m.ThumbnailURL = imgAttr(img, baseURL)
})
return m
}
func hasNextPage(doc *goquery.Document) bool {
return doc.Find("ul.pagination > li:last-child:not(.pagination-disabled)").Length() > 0
}
func (s *Source) parsePage(doc *goquery.Document) source.MangasPage {
var mangas []source.SManga
doc.Find("div#card-real").Each(func(_ int, el *goquery.Selection) {
m := mangaFromElement(el, s.cfg.BaseURL)
if m.URL != "" {
mangas = append(mangas, m)
}
})
return source.MangasPage{Mangas: mangas, HasNextPage: hasNextPage(doc)}
}
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
u := fmt.Sprintf("%s/manga?page=%d", s.base(), page)
doc, err := s.get(context.Background(), u)
if err != nil {
return source.MangasPage{}, err
}
return s.parsePage(doc), nil
}
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
u := fmt.Sprintf("%s/latest?page=%d", s.base(), page)
doc, err := s.get(context.Background(), u)
if err != nil {
return source.MangasPage{}, err
}
return s.parsePage(doc), nil
}
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
u := fmt.Sprintf("%s/manga?title=%s&page=%d", s.base(), query, page)
doc, err := s.get(context.Background(), u)
if err != nil {
return source.MangasPage{}, err
}
return s.parsePage(doc), nil
}
func getInfo(doc *goquery.Selection, label string) string {
// p:has(span:containsOwn({label})) span.capitalize
var result string
doc.Find("p").Each(func(_ int, p *goquery.Selection) {
if strings.Contains(p.Find("span").First().Text(), label) {
result = strings.TrimSpace(p.Find("span.capitalize").Text())
}
})
return result
}
func parseStatus(s string) int {
switch strings.ToLower(strings.TrimSpace(s)) {
case "ongoing", "en cours":
return source.StatusOngoing
case "completed", "completed (season)", "terminé":
return source.StatusCompleted
case "hiatus", "on hiatus":
return source.StatusHiatus
default:
return source.StatusUnknown
}
}
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
if err != nil {
return manga, err
}
result := source.SManga{URL: manga.URL}
result.Title = strings.TrimSpace(doc.Find("h1").First().Text())
if result.Title == "" {
result.Title = manga.Title
}
result.ThumbnailURL = imgAttr(doc.Find("div.relative img").First(), s.cfg.BaseURL)
var genres []string
doc.Find("div.flex > a.inline-block").Each(func(_ int, el *goquery.Selection) {
if t := strings.TrimSpace(el.Text()); t != "" {
genres = append(genres, t)
}
})
// description
var descParts []string
doc.Find("div:has(> p#description)").First().Each(func(_ int, el *goquery.Selection) {
descParts = append(descParts, strings.TrimSpace(el.Text()))
})
result.Description = strings.Join(descParts, "\n\n")
if t := getInfo(doc.Selection, "Status"); t != "" || getInfo(doc.Selection, "Statut") != "" {
st := t
if st == "" {
st = getInfo(doc.Selection, "Statut")
}
result.Status = parseStatus(st)
}
if a := getInfo(doc.Selection, "Artist"); a != "" {
result.Artist = a
} else if a := getInfo(doc.Selection, "Artiste"); a != "" {
result.Artist = a
}
if a := getInfo(doc.Selection, "Author"); a != "" {
result.Author = a
} else if a := getInfo(doc.Selection, "Auteur"); a != "" {
result.Author = a
}
if t := getInfo(doc.Selection, "Type"); t != "" {
genres = append([]string{t}, genres...)
}
result.Genre = strings.Join(genres, ", ")
return result, nil
}
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
mangaURL := util.AbsURL(s.cfg.BaseURL, manga.URL)
var chapters []source.SChapter
page := 1
for {
u := mangaURL
if page > 1 {
u = fmt.Sprintf("%s?page=%d", mangaURL, page)
}
doc, err := s.get(context.Background(), u)
if err != nil {
if page == 1 {
return nil, err
}
break
}
doc.Find("div#chapters-list > a[href]").Each(func(_ int, a *goquery.Selection) {
ch := source.SChapter{}
ch.URL, _ = a.Attr("href")
ch.Name = strings.TrimSpace(a.Find("#item-title, span").First().Text())
if ch.Name == "" {
ch.Name = strings.TrimSpace(a.Text())
}
// date
a.Find("p, span").Each(func(_ int, el *goquery.Selection) {
if t := strings.TrimSpace(el.Text()); t != "" && ch.DateUpload == 0 {
ch.DateUpload = util.ParseRelativeDate(t)
}
})
if ch.URL != "" {
chapters = append(chapters, ch)
}
})
if doc.Find("ul.pagination > li:last-child:not(.pagination-disabled)").Length() == 0 {
break
}
page++
}
return chapters, nil
}
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL))
if err != nil {
return nil, err
}
var pages []source.Page
doc.Find("div#chapter-container > img").Each(func(i int, img *goquery.Selection) {
if u := imgAttr(img, s.cfg.BaseURL); u != "" {
pages = append(pages, source.Page{Index: i, ImageURL: u})
}
})
return pages, nil
}
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
func (s *Source) GetFilterList() []source.Filter { return nil }
+210
View File
@@ -0,0 +1,210 @@
// Package galleryadults implements the GalleryAdults adult gallery base.
// HTML scraping; popular: GET {base}/language/{lang}/popular/?page={n}.
package galleryadults
import (
"context"
"fmt"
"net/http"
"regexp"
"strings"
"github.com/PuerkitoBio/goquery"
"goyomi/internal/httpclient"
"goyomi/internal/source"
"goyomi/sources/base/util"
)
type Config struct {
Name string
BaseURL string
Lang string
MangaLang string // language path segment, e.g. "english"; empty = all
}
type Source struct {
cfg Config
client *httpclient.Client
id int64
}
func New(cfg Config) *Source {
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
}
func (s *Source) ID() int64 { return s.id }
func (s *Source) Name() string { return s.cfg.Name }
func (s *Source) Lang() string { return s.cfg.Lang }
func (s *Source) SupportsLatest() bool { return true }
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("galleryadults: HTTP %d", resp.StatusCode)
}
return goquery.NewDocumentFromReader(resp.Body)
}
var shortenTitleRe = regexp.MustCompile(`(\[[^\]]*\]|[({][^)}]*[)}])`)
var tagCountRe = regexp.MustCompile(`\s*\(\d+\)\s*$`)
func (s *Source) mangaTitle(el *goquery.Selection, selector string) string {
raw := strings.TrimSpace(el.Find(selector).Text())
return strings.TrimSpace(shortenTitleRe.ReplaceAllString(raw, ""))
}
func imgAttr(img *goquery.Selection, baseURL string) string {
for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} {
if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") {
return util.AbsURL(baseURL, v)
}
}
return ""
}
// thumbnailToFull converts thumbnail URLs to full image URLs by removing
// the "t" before the file extension (e.g. "1t.jpg" → "1.jpg").
func thumbnailToFull(u string) string {
ext := u[strings.LastIndex(u, "."):]
return strings.Replace(u, "t"+ext, ext, 1)
}
func (s *Source) mangaFromElement(el *goquery.Selection) source.SManga {
m := source.SManga{}
el.Find(".inner_thumb a").First().Each(func(_ int, a *goquery.Selection) {
m.URL, _ = a.Attr("href")
})
m.Title = s.mangaTitle(el, ".caption")
el.Find(".inner_thumb img").First().Each(func(_ int, img *goquery.Selection) {
m.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
})
return m
}
func (s *Source) parsePage(doc *goquery.Document) source.MangasPage {
var mangas []source.SManga
doc.Find("div.thumb").Each(func(_ int, el *goquery.Selection) {
m := s.mangaFromElement(el)
if m.URL != "" && m.Title != "" {
mangas = append(mangas, m)
}
})
hasNext := doc.Find(".next.page-numbers, a[aria-label=Next]").Length() > 0
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
}
func (s *Source) langPath() string {
if s.cfg.MangaLang != "" {
return "language/" + s.cfg.MangaLang + "/"
}
return ""
}
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
u := fmt.Sprintf("%s/%spopular/?page=%d", s.base(), s.langPath(), page)
doc, err := s.get(context.Background(), u)
if err != nil {
return source.MangasPage{}, err
}
return s.parsePage(doc), nil
}
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
u := fmt.Sprintf("%s/%s?page=%d", s.base(), s.langPath(), page)
doc, err := s.get(context.Background(), u)
if err != nil {
return source.MangasPage{}, err
}
return s.parsePage(doc), nil
}
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
u := fmt.Sprintf("%s/search/?q=%s&page=%d", s.base(), query, page)
doc, err := s.get(context.Background(), u)
if err != nil {
return source.MangasPage{}, err
}
return s.parsePage(doc), nil
}
// getInfo extracts tag links from elements matching ".tags" that contain "{tag}:".
func getInfo(el *goquery.Selection, tag string) string {
var items []string
el.Find(".tags").Each(func(_ int, tags *goquery.Selection) {
if strings.Contains(tags.Text(), tag+":") {
tags.Find("a.tag_btn").Each(func(_ int, a *goquery.Selection) {
t := tagCountRe.ReplaceAllString(strings.TrimSpace(a.Text()), "")
if t != "" {
items = append(items, t)
}
})
}
})
return strings.Join(items, ", ")
}
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
if err != nil {
return manga, err
}
result := source.SManga{URL: manga.URL, Status: source.StatusCompleted}
top := doc.Find(".gallery_top")
result.Title = s.mangaTitle(top, "h1")
if result.Title == "" {
result.Title = manga.Title
}
top.Find(".cover img").First().Each(func(_ int, img *goquery.Selection) {
result.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
})
result.Genre = getInfo(top, "Tags")
result.Author = getInfo(top, "Artists")
return result, nil
}
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
// Galleries have a single chapter: the gallery itself.
return []source.SChapter{{
URL: manga.URL,
Name: "Chapter",
}}, nil
}
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL))
if err != nil {
return nil, err
}
var pages []source.Page
// Try thumbnail → full conversion first.
doc.Find(".gallery_thumb a img").Each(func(i int, img *goquery.Selection) {
if u := imgAttr(img, s.cfg.BaseURL); u != "" {
pages = append(pages, source.Page{Index: i, ImageURL: thumbnailToFull(u)})
}
})
if len(pages) == 0 {
// Fallback: linked images directly.
doc.Find(".gallery_thumb a").Each(func(i int, a *goquery.Selection) {
if u, ok := a.Attr("href"); ok && u != "" {
pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)})
}
})
}
return pages, nil
}
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
func (s *Source) GetFilterList() []source.Filter { return nil }
+192
View File
@@ -0,0 +1,192 @@
// Package gattsu implements the Gattsu Brazilian adult manga base.
// Popular = Latest: GET {base}/page/{n}; no separate popular endpoint.
package gattsu
import (
"context"
"fmt"
"net/http"
"regexp"
"strings"
"github.com/PuerkitoBio/goquery"
"goyomi/internal/httpclient"
"goyomi/internal/source"
"goyomi/sources/base/util"
)
type Config struct {
Name string
BaseURL string
Lang string
}
type Source struct {
cfg Config
client *httpclient.Client
id int64
}
func New(cfg Config) *Source {
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
}
func (s *Source) ID() int64 { return s.id }
func (s *Source) Name() string { return s.cfg.Name }
func (s *Source) Lang() string { return s.cfg.Lang }
func (s *Source) SupportsLatest() bool { return true }
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("gattsu: HTTP %d", resp.StatusCode)
}
return goquery.NewDocumentFromReader(resp.Body)
}
// thumbSizeRe matches WordPress size suffix like "-150x150." and replaces with ".".
var thumbSizeRe = regexp.MustCompile(`-\d+x\d+\.`)
func withoutSize(u string) string {
return thumbSizeRe.ReplaceAllString(u, ".")
}
func imgAttr(img *goquery.Selection, baseURL string) string {
for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} {
if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") {
return util.AbsURL(baseURL, v)
}
}
return ""
}
func (s *Source) mangaFromElement(el *goquery.Selection) source.SManga {
m := source.SManga{}
m.URL, _ = el.Attr("href")
m.Title = strings.TrimSpace(el.Find("span.thumb-titulo").Text())
el.Find("span.thumb-imagem img.wp-post-image").First().Each(func(_ int, img *goquery.Selection) {
if src, ok := img.Attr("src"); ok {
m.ThumbnailURL = withoutSize(util.AbsURL(s.cfg.BaseURL, src))
}
})
return m
}
func (s *Source) parseList(doc *goquery.Document) source.MangasPage {
var mangas []source.SManga
prefix := s.base()
sel := fmt.Sprintf("div.meio div.lista ul li a[href^=%s]", prefix)
doc.Find(sel).Each(func(_ int, el *goquery.Selection) {
m := s.mangaFromElement(el)
if m.URL != "" {
mangas = append(mangas, m)
}
})
hasNext := doc.Find(".next.page-numbers, a.next").Length() > 0
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
}
func (s *Source) fetchPage(page int) (source.MangasPage, error) {
var u string
if page == 1 {
u = s.base() + "/"
} else {
u = fmt.Sprintf("%s/page/%d", s.base(), page)
}
doc, err := s.get(context.Background(), u)
if err != nil {
return source.MangasPage{}, err
}
return s.parseList(doc), nil
}
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { return s.fetchPage(page) }
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { return s.fetchPage(page) }
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
u := fmt.Sprintf("%s/?s=%s&post_type=post", s.base(), query)
if page > 1 {
u = fmt.Sprintf("%s/page/%d/?s=%s&post_type=post", s.base(), page, query)
}
doc, err := s.get(context.Background(), u)
if err != nil {
return source.MangasPage{}, err
}
return s.parseList(doc), nil
}
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
if err != nil {
return manga, err
}
result := source.SManga{URL: manga.URL, Status: source.StatusCompleted}
postBox := doc.Find("div.meio div.post-box").First()
result.Title = strings.TrimSpace(postBox.Find("h1.post-titulo").Text())
if result.Title == "" {
result.Title = manga.Title
}
result.Author = strings.TrimSpace(postBox.Find("ul.post-itens li:contains(Artista) a").First().Text())
var genres []string
postBox.Find("ul.post-itens li:contains(Tags) a").Each(func(_ int, a *goquery.Selection) {
if t := strings.TrimSpace(a.Text()); t != "" {
genres = append(genres, t)
}
})
result.Genre = strings.Join(genres, ", ")
var descParts []string
postBox.Find("div.post-texto p").Each(func(_ int, p *goquery.Selection) {
t := strings.TrimSpace(p.Text())
t = strings.TrimPrefix(t, "Sinopse :")
if t = strings.TrimSpace(t); t != "" {
descParts = append(descParts, t)
}
})
result.Description = strings.Join(descParts, "\n\n")
postBox.Find("div.post-capa > img.wp-post-image").First().Each(func(_ int, img *goquery.Selection) {
if src, ok := img.Attr("src"); ok {
result.ThumbnailURL = withoutSize(util.AbsURL(s.cfg.BaseURL, src))
}
})
return result, nil
}
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
// Gattsu pages are single-chapter galleries; the manga page is the chapter.
return []source.SChapter{{
URL: manga.URL,
Name: manga.Title,
}}, nil
}
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL))
if err != nil {
return nil, err
}
sel := "div.meio div.post-box ul.post-fotos li a > img, " +
"div.meio div.post-box.listaImagens div.galeriaHtml img"
var pages []source.Page
doc.Find(sel).Each(func(i int, img *goquery.Selection) {
if u := imgAttr(img, s.cfg.BaseURL); u != "" {
pages = append(pages, source.Page{Index: i, ImageURL: withoutSize(u)})
}
})
return pages, nil
}
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
func (s *Source) GetFilterList() []source.Filter { return nil }
+314
View File
@@ -0,0 +1,314 @@
// Package goda implements the GoDa manga base.
// Popular: GET {base}/hots/page/{n}; Chapter list via {base}/manga/get?mid={id}&mode=all.
package goda
import (
"context"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
"goyomi/internal/httpclient"
"goyomi/internal/source"
)
type Config struct {
Name string
BaseURL string
Lang string
}
type Source struct {
cfg Config
client *httpclient.Client
id int64
}
func New(cfg Config) *Source {
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
}
func (s *Source) ID() int64 { return s.id }
func (s *Source) Name() string { return s.cfg.Name }
func (s *Source) Lang() string { return s.cfg.Lang }
func (s *Source) SupportsLatest() bool { return true }
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("goda: HTTP %d", resp.StatusCode)
}
return goquery.NewDocumentFromReader(resp.Body)
}
func (s *Source) getRaw(ctx context.Context, rawURL string) ([]byte, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
return io.ReadAll(resp.Body)
}
// getKey extracts the manga key from a URL: strip /manga/ prefix and trailing /.
func getKey(href string) string {
u, err := url.Parse(href)
if err != nil {
return href
}
path := u.Path
if idx := strings.Index(path, "/manga/"); idx >= 0 {
path = path[idx+len("/manga/"):]
}
return strings.TrimSuffix(path, "/")
}
// getMangaID returns the manga ID from the #mangachapters element.
func getMangaID(doc *goquery.Document) string {
return doc.Find("#mangachapters").AttrOr("data-mid", "")
}
func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage {
var mangas []source.SManga
doc.Find(".container > .cardlist .pb-2 a").Each(func(_ int, el *goquery.Selection) {
m := source.SManga{}
href, _ := el.Attr("href")
m.URL = getKey(href)
m.Title = strings.TrimSpace(el.Find("h3").Text())
if img := el.Find("img").First(); img.Length() > 0 {
src := img.AttrOr("src", "")
// Some sites proxy images as ?url=...
if strings.Contains(src, "url=") {
if parsed, err := url.Parse(src); err == nil {
if u := parsed.Query().Get("url"); u != "" {
src = u
}
}
}
m.ThumbnailURL = src
}
if m.URL != "" && m.Title != "" {
mangas = append(mangas, m)
}
})
nextLabel := "NEXT"
if s.cfg.Lang == "zh" {
nextLabel = "下一頁"
}
hasNext := doc.Find(fmt.Sprintf("a[aria-label=%s] button", nextLabel)).Length() > 0
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
}
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
doc, err := s.get(context.Background(), fmt.Sprintf("%s/hots/page/%d", s.base(), page))
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangaList(doc), nil
}
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
doc, err := s.get(context.Background(), fmt.Sprintf("%s/newss/page/%d", s.base(), page))
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangaList(doc), nil
}
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
return s.GetPopularManga(page)
}
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
doc, err := s.get(context.Background(), fmt.Sprintf("%s/manga/%s", s.base(), manga.URL))
if err != nil {
return manga, err
}
result := source.SManga{URL: manga.URL}
main := doc.Find("main").First()
titleEl := main.Find("h1").First()
result.Title = titleEl.Text()
if span := titleEl.Find("span").First(); span.Length() > 0 {
result.Title = strings.TrimSpace(strings.Replace(result.Title, span.Text(), "", 1))
}
if result.Title == "" {
result.Title = manga.Title
}
// Status badge is first child of h1
statusText := strings.TrimSpace(titleEl.Children().First().Text())
switch statusText {
case "連載中", "Ongoing":
result.Status = source.StatusOngoing
case "完結", "Completed":
result.Status = source.StatusCompleted
case "停止更新", "Cancelled":
result.Status = source.StatusCancelled
case "休刊", "On Hiatus":
result.Status = source.StatusHiatus
default:
result.Status = source.StatusUnknown
}
// Siblings structure: h1, then elements for author, genre1, tags, description
parent := titleEl.Parent()
children := parent.Children()
if children.Length() >= 2 {
var authorParts []string
children.Eq(1).Children().Each(func(i int, el *goquery.Selection) {
if i == 0 {
return // skip label
}
t := strings.TrimSuffix(strings.TrimSpace(el.Text()), " ,")
if t != "" {
authorParts = append(authorParts, t)
}
})
result.Author = strings.Join(authorParts, ", ")
}
var genres []string
if children.Length() >= 3 {
children.Eq(2).Children().Each(func(i int, el *goquery.Selection) {
if i == 0 {
return // skip label
}
t := strings.TrimSuffix(strings.TrimSpace(el.Text()), " ,")
if t != "" {
genres = append(genres, t)
}
})
}
if children.Length() >= 4 {
children.Eq(3).Children().Each(func(_ int, el *goquery.Selection) {
t := strings.TrimPrefix(strings.TrimSpace(el.Text()), "#")
if t != "" {
genres = append(genres, t)
}
})
}
result.Genre = strings.Join(genres, ", ")
mangaID := getMangaID(doc)
var descParts []string
if children.Length() >= 5 {
descParts = append(descParts, strings.TrimSpace(children.Eq(4).Text()))
}
if mangaID != "" {
descParts = append(descParts, "ID: "+mangaID)
}
result.Description = strings.Join(descParts, "\n\n")
main.Find("img.object-cover").First().Each(func(_ int, img *goquery.Selection) {
result.ThumbnailURL, _ = img.Attr("src")
})
return result, nil
}
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
// Extract manga ID from description or re-fetch.
mangaID := ""
if desc := manga.Description; desc != "" {
if idx := strings.LastIndex(desc, "ID: "); idx >= 0 {
mangaID = strings.TrimSpace(desc[idx+4:])
}
}
if mangaID == "" {
doc, err := s.get(context.Background(), fmt.Sprintf("%s/manga/%s", s.base(), manga.URL))
if err != nil {
return nil, err
}
mangaID = getMangaID(doc)
}
if mangaID == "" {
return nil, fmt.Errorf("goda: could not find manga ID for %s", manga.URL)
}
body, err := s.getRaw(context.Background(), fmt.Sprintf("%s/manga/get?mid=%s&mode=all", s.base(), mangaID))
if err != nil {
return nil, err
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body)))
if err != nil {
return nil, err
}
var chapters []source.SChapter
doc.Find(".chapteritem").Each(func(_ int, el *goquery.Selection) {
a := el.Find("a").First()
href, _ := a.Attr("href")
key := getKey(href)
chKey := a.AttrOr("data-cs", "")
name := a.AttrOr("data-ct", "")
if key == "" {
return
}
chapters = append(chapters, source.SChapter{
URL: key + "#" + mangaID + "/" + chKey,
Name: name,
})
})
// reverse: chapters come latest-first from API
for i, j := 0, len(chapters)-1; i < j; i, j = i+1, j-1 {
chapters[i], chapters[j] = chapters[j], chapters[i]
}
return chapters, nil
}
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
// URL format: {key}#{mangaId}/{chapterId}
id := ""
if idx := strings.LastIndex(chapter.URL, "#"); idx >= 0 {
id = chapter.URL[idx+1:]
}
mangaID := ""
chapterID := ""
if slashIdx := strings.Index(id, "/"); slashIdx >= 0 {
mangaID = id[:slashIdx]
chapterID = id[slashIdx+1:]
}
if mangaID == "" || chapterID == "" {
return nil, fmt.Errorf("goda: invalid chapter URL: %s", chapter.URL)
}
doc, err := s.get(context.Background(),
fmt.Sprintf("%s/chapter/getcontent?m=%s&c=%s", s.base(), mangaID, chapterID))
if err != nil {
return nil, err
}
var pages []source.Page
doc.Find("#chapcontent > div > img").Each(func(i int, img *goquery.Selection) {
u := img.AttrOr("data-src", "")
if u == "" {
u, _ = img.Attr("src")
}
if u != "" {
pages = append(pages, source.Page{Index: i, ImageURL: u})
}
})
return pages, nil
}
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
func (s *Source) GetFilterList() []source.Filter { return nil }