diff --git a/docs/phase3-bases.md b/docs/phase3-bases.md index f24c5a1..04220f0 100644 --- a/docs/phase3-bases.md +++ b/docs/phase3-bases.md @@ -16,15 +16,15 @@ Detailed implementation notes for complex bases are in the **Notes** section at - [x] `base/comiciviewer` - [x] `base/eromuse` - [x] `base/ezmanhwa` -- [ ] `base/fansubscat` +- [x] `base/fansubscat` - [x] `base/fmreader` ⚠️ see notes - [x] `base/foolslide` ⚠️ see notes -- [ ] `base/fuzzydoodle` -- [ ] `base/galleryadults` -- [ ] `base/gattsu` +- [x] `base/fuzzydoodle` +- [x] `base/galleryadults` +- [x] `base/gattsu` - [x] `base/gigaviewer` ⚠️ see notes - [x] `base/gmanga` ⚠️ see notes -- [ ] `base/goda` +- [x] `base/goda` - [ ] `base/gravureblogger` - [ ] `base/greenshit` - [x] `base/grouple` ⚠️ see notes diff --git a/sources/base/fansubscat/fansubscat.go b/sources/base/fansubscat/fansubscat.go new file mode 100644 index 0000000..cc81f3e --- /dev/null +++ b/sources/base/fansubscat/fansubscat.go @@ -0,0 +1,198 @@ +// Package fansubscat implements the FansubsCat manga base. +// JSON REST API: GET {api}/manga/popular/{page}, /recent/{page}, /search/{page} +package fansubscat + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + APIURL string + Lang string + IsHentaiSite bool +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + if cfg.APIURL == "" { + cfg.APIURL = cfg.BaseURL + } + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) api() string { return strings.TrimRight(s.cfg.APIURL, "/") } + +func (s *Source) getJSON(ctx context.Context, rawURL string, out any) error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return err + } + req.Header.Set("Accept", "application/json") + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("fansubscat: HTTP %d", resp.StatusCode) + } + body, _ := io.ReadAll(resp.Body) + return json.Unmarshal(body, out) +} + +// resultWrapper is the common API envelope. +type resultWrapper struct { + Result json.RawMessage `json:"result"` +} + +type mangaDTO struct { + Slug string `json:"slug"` + Name string `json:"name"` + ThumbnailURL string `json:"thumbnail_url"` + Synopsis string `json:"synopsis"` + Status string `json:"status"` + Genres string `json:"genres"` + Author string `json:"author"` +} + +type chapterDTO struct { + Slug string `json:"slug"` + Name string `json:"name"` + Date int64 `json:"date"` +} + +type pageDTO struct { + URL string `json:"url"` +} + +func toSManga(m mangaDTO) source.SManga { + return source.SManga{ + URL: m.Slug, + Title: m.Name, + Author: m.Author, + Description: m.Synopsis, + Genre: m.Genres, + Status: util.StatusFromString(m.Status), + ThumbnailURL: m.ThumbnailURL, + } +} + +func (s *Source) fetchList(ctx context.Context, endpoint string) (source.MangasPage, error) { + var wrap resultWrapper + if err := s.getJSON(ctx, endpoint, &wrap); err != nil { + return source.MangasPage{}, err + } + var dtos []mangaDTO + if err := json.Unmarshal(wrap.Result, &dtos); err != nil { + return source.MangasPage{}, err + } + mangas := make([]source.SManga, len(dtos)) + for i, m := range dtos { + mangas[i] = toSManga(m) + } + return source.MangasPage{Mangas: mangas, HasNextPage: len(dtos) >= 20}, nil +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + return s.fetchList(context.Background(), fmt.Sprintf("%s/manga/popular/%d", s.api(), page)) +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + return s.fetchList(context.Background(), fmt.Sprintf("%s/manga/recent/%d", s.api(), page)) +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/manga/search/%d?type=all", s.api(), page) + if query != "" { + u += "&query=" + query + } + return s.fetchList(context.Background(), u) +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + var wrap resultWrapper + slug := manga.URL + if idx := strings.LastIndex(slug, "/"); idx >= 0 { + slug = slug[idx+1:] + } + if err := s.getJSON(context.Background(), fmt.Sprintf("%s/manga/details/%s", s.api(), slug), &wrap); err != nil { + return manga, err + } + var dto mangaDTO + if err := json.Unmarshal(wrap.Result, &dto); err != nil { + return manga, err + } + out := toSManga(dto) + out.URL = manga.URL + return out, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + slug := manga.URL + if idx := strings.LastIndex(slug, "/"); idx >= 0 { + slug = slug[idx+1:] + } + var wrap resultWrapper + if err := s.getJSON(context.Background(), fmt.Sprintf("%s/manga/chapters/%s", s.api(), slug), &wrap); err != nil { + return nil, err + } + var dtos []chapterDTO + if err := json.Unmarshal(wrap.Result, &dtos); err != nil { + return nil, err + } + chapters := make([]source.SChapter, len(dtos)) + for i, ch := range dtos { + chapters[i] = source.SChapter{ + URL: ch.Slug, + Name: ch.Name, + DateUpload: ch.Date * 1000, + } + } + return chapters, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + slug := chapter.URL + if idx := strings.LastIndex(slug, "/"); idx >= 0 { + slug = slug[idx+1:] + } + var wrap resultWrapper + if err := s.getJSON(context.Background(), fmt.Sprintf("%s/manga/pages/%s", s.api(), slug), &wrap); err != nil { + return nil, err + } + var dtos []pageDTO + if err := json.Unmarshal(wrap.Result, &dtos); err != nil { + return nil, err + } + pages := make([]source.Page, len(dtos)) + for i, p := range dtos { + pages[i] = source.Page{Index: i, ImageURL: p.URL} + } + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/fuzzydoodle/fuzzydoodle.go b/sources/base/fuzzydoodle/fuzzydoodle.go new file mode 100644 index 0000000..4761762 --- /dev/null +++ b/sources/base/fuzzydoodle/fuzzydoodle.go @@ -0,0 +1,259 @@ +// Package fuzzydoodle implements the FuzzyDoodle manga base. +// https://github.com/jhin1m/fuzzy-doodle — HTML scraping, CF-protected. +package fuzzydoodle + +import ( + "context" + "fmt" + "net/http" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("fuzzydoodle: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func imgAttr(img *goquery.Selection, baseURL string) string { + for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} { + if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") { + return util.AbsURL(baseURL, v) + } + } + // srcset: take first entry + if v, ok := img.Attr("srcset"); ok && v != "" { + return util.AbsURL(baseURL, strings.Fields(v)[0]) + } + return "" +} + +func mangaFromElement(el *goquery.Selection, baseURL string) source.SManga { + m := source.SManga{} + el.Find("a").First().Each(func(_ int, a *goquery.Selection) { + m.URL, _ = a.Attr("href") + }) + m.Title = strings.TrimSpace(el.Find("h2.text-sm").Text()) + if m.Title == "" { + m.Title = strings.TrimSpace(el.Find("h2, h3").First().Text()) + } + el.Find("img").First().Each(func(_ int, img *goquery.Selection) { + m.ThumbnailURL = imgAttr(img, baseURL) + }) + return m +} + +func hasNextPage(doc *goquery.Document) bool { + return doc.Find("ul.pagination > li:last-child:not(.pagination-disabled)").Length() > 0 +} + +func (s *Source) parsePage(doc *goquery.Document) source.MangasPage { + var mangas []source.SManga + doc.Find("div#card-real").Each(func(_ int, el *goquery.Selection) { + m := mangaFromElement(el, s.cfg.BaseURL) + if m.URL != "" { + mangas = append(mangas, m) + } + }) + return source.MangasPage{Mangas: mangas, HasNextPage: hasNextPage(doc)} +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + u := fmt.Sprintf("%s/manga?page=%d", s.base(), page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parsePage(doc), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + u := fmt.Sprintf("%s/latest?page=%d", s.base(), page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parsePage(doc), nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/manga?title=%s&page=%d", s.base(), query, page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parsePage(doc), nil +} + +func getInfo(doc *goquery.Selection, label string) string { + // p:has(span:containsOwn({label})) span.capitalize + var result string + doc.Find("p").Each(func(_ int, p *goquery.Selection) { + if strings.Contains(p.Find("span").First().Text(), label) { + result = strings.TrimSpace(p.Find("span.capitalize").Text()) + } + }) + return result +} + +func parseStatus(s string) int { + switch strings.ToLower(strings.TrimSpace(s)) { + case "ongoing", "en cours": + return source.StatusOngoing + case "completed", "completed (season)", "terminé": + return source.StatusCompleted + case "hiatus", "on hiatus": + return source.StatusHiatus + default: + return source.StatusUnknown + } +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + result.Title = strings.TrimSpace(doc.Find("h1").First().Text()) + if result.Title == "" { + result.Title = manga.Title + } + result.ThumbnailURL = imgAttr(doc.Find("div.relative img").First(), s.cfg.BaseURL) + + var genres []string + doc.Find("div.flex > a.inline-block").Each(func(_ int, el *goquery.Selection) { + if t := strings.TrimSpace(el.Text()); t != "" { + genres = append(genres, t) + } + }) + + // description + var descParts []string + doc.Find("div:has(> p#description)").First().Each(func(_ int, el *goquery.Selection) { + descParts = append(descParts, strings.TrimSpace(el.Text())) + }) + result.Description = strings.Join(descParts, "\n\n") + + if t := getInfo(doc.Selection, "Status"); t != "" || getInfo(doc.Selection, "Statut") != "" { + st := t + if st == "" { + st = getInfo(doc.Selection, "Statut") + } + result.Status = parseStatus(st) + } + if a := getInfo(doc.Selection, "Artist"); a != "" { + result.Artist = a + } else if a := getInfo(doc.Selection, "Artiste"); a != "" { + result.Artist = a + } + if a := getInfo(doc.Selection, "Author"); a != "" { + result.Author = a + } else if a := getInfo(doc.Selection, "Auteur"); a != "" { + result.Author = a + } + if t := getInfo(doc.Selection, "Type"); t != "" { + genres = append([]string{t}, genres...) + } + result.Genre = strings.Join(genres, ", ") + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + mangaURL := util.AbsURL(s.cfg.BaseURL, manga.URL) + var chapters []source.SChapter + page := 1 + for { + u := mangaURL + if page > 1 { + u = fmt.Sprintf("%s?page=%d", mangaURL, page) + } + doc, err := s.get(context.Background(), u) + if err != nil { + if page == 1 { + return nil, err + } + break + } + doc.Find("div#chapters-list > a[href]").Each(func(_ int, a *goquery.Selection) { + ch := source.SChapter{} + ch.URL, _ = a.Attr("href") + ch.Name = strings.TrimSpace(a.Find("#item-title, span").First().Text()) + if ch.Name == "" { + ch.Name = strings.TrimSpace(a.Text()) + } + // date + a.Find("p, span").Each(func(_ int, el *goquery.Selection) { + if t := strings.TrimSpace(el.Text()); t != "" && ch.DateUpload == 0 { + ch.DateUpload = util.ParseRelativeDate(t) + } + }) + if ch.URL != "" { + chapters = append(chapters, ch) + } + }) + if doc.Find("ul.pagination > li:last-child:not(.pagination-disabled)").Length() == 0 { + break + } + page++ + } + return chapters, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + var pages []source.Page + doc.Find("div#chapter-container > img").Each(func(i int, img *goquery.Selection) { + if u := imgAttr(img, s.cfg.BaseURL); u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: u}) + } + }) + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/galleryadults/galleryadults.go b/sources/base/galleryadults/galleryadults.go new file mode 100644 index 0000000..4f4c3f1 --- /dev/null +++ b/sources/base/galleryadults/galleryadults.go @@ -0,0 +1,210 @@ +// Package galleryadults implements the GalleryAdults adult gallery base. +// HTML scraping; popular: GET {base}/language/{lang}/popular/?page={n}. +package galleryadults + +import ( + "context" + "fmt" + "net/http" + "regexp" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string + MangaLang string // language path segment, e.g. "english"; empty = all +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("galleryadults: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +var shortenTitleRe = regexp.MustCompile(`(\[[^\]]*\]|[({][^)}]*[)}])`) +var tagCountRe = regexp.MustCompile(`\s*\(\d+\)\s*$`) + +func (s *Source) mangaTitle(el *goquery.Selection, selector string) string { + raw := strings.TrimSpace(el.Find(selector).Text()) + return strings.TrimSpace(shortenTitleRe.ReplaceAllString(raw, "")) +} + +func imgAttr(img *goquery.Selection, baseURL string) string { + for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} { + if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") { + return util.AbsURL(baseURL, v) + } + } + return "" +} + +// thumbnailToFull converts thumbnail URLs to full image URLs by removing +// the "t" before the file extension (e.g. "1t.jpg" → "1.jpg"). +func thumbnailToFull(u string) string { + ext := u[strings.LastIndex(u, "."):] + return strings.Replace(u, "t"+ext, ext, 1) +} + +func (s *Source) mangaFromElement(el *goquery.Selection) source.SManga { + m := source.SManga{} + el.Find(".inner_thumb a").First().Each(func(_ int, a *goquery.Selection) { + m.URL, _ = a.Attr("href") + }) + m.Title = s.mangaTitle(el, ".caption") + el.Find(".inner_thumb img").First().Each(func(_ int, img *goquery.Selection) { + m.ThumbnailURL = imgAttr(img, s.cfg.BaseURL) + }) + return m +} + +func (s *Source) parsePage(doc *goquery.Document) source.MangasPage { + var mangas []source.SManga + doc.Find("div.thumb").Each(func(_ int, el *goquery.Selection) { + m := s.mangaFromElement(el) + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".next.page-numbers, a[aria-label=Next]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) langPath() string { + if s.cfg.MangaLang != "" { + return "language/" + s.cfg.MangaLang + "/" + } + return "" +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + u := fmt.Sprintf("%s/%spopular/?page=%d", s.base(), s.langPath(), page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parsePage(doc), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + u := fmt.Sprintf("%s/%s?page=%d", s.base(), s.langPath(), page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parsePage(doc), nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/search/?q=%s&page=%d", s.base(), query, page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parsePage(doc), nil +} + +// getInfo extracts tag links from elements matching ".tags" that contain "{tag}:". +func getInfo(el *goquery.Selection, tag string) string { + var items []string + el.Find(".tags").Each(func(_ int, tags *goquery.Selection) { + if strings.Contains(tags.Text(), tag+":") { + tags.Find("a.tag_btn").Each(func(_ int, a *goquery.Selection) { + t := tagCountRe.ReplaceAllString(strings.TrimSpace(a.Text()), "") + if t != "" { + items = append(items, t) + } + }) + } + }) + return strings.Join(items, ", ") +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL, Status: source.StatusCompleted} + top := doc.Find(".gallery_top") + result.Title = s.mangaTitle(top, "h1") + if result.Title == "" { + result.Title = manga.Title + } + top.Find(".cover img").First().Each(func(_ int, img *goquery.Selection) { + result.ThumbnailURL = imgAttr(img, s.cfg.BaseURL) + }) + result.Genre = getInfo(top, "Tags") + result.Author = getInfo(top, "Artists") + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + // Galleries have a single chapter: the gallery itself. + return []source.SChapter{{ + URL: manga.URL, + Name: "Chapter", + }}, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + var pages []source.Page + // Try thumbnail → full conversion first. + doc.Find(".gallery_thumb a img").Each(func(i int, img *goquery.Selection) { + if u := imgAttr(img, s.cfg.BaseURL); u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: thumbnailToFull(u)}) + } + }) + if len(pages) == 0 { + // Fallback: linked images directly. + doc.Find(".gallery_thumb a").Each(func(i int, a *goquery.Selection) { + if u, ok := a.Attr("href"); ok && u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) + } + }) + } + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/gattsu/gattsu.go b/sources/base/gattsu/gattsu.go new file mode 100644 index 0000000..132e29e --- /dev/null +++ b/sources/base/gattsu/gattsu.go @@ -0,0 +1,192 @@ +// Package gattsu implements the Gattsu Brazilian adult manga base. +// Popular = Latest: GET {base}/page/{n}; no separate popular endpoint. +package gattsu + +import ( + "context" + "fmt" + "net/http" + "regexp" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("gattsu: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +// thumbSizeRe matches WordPress size suffix like "-150x150." and replaces with ".". +var thumbSizeRe = regexp.MustCompile(`-\d+x\d+\.`) + +func withoutSize(u string) string { + return thumbSizeRe.ReplaceAllString(u, ".") +} + +func imgAttr(img *goquery.Selection, baseURL string) string { + for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} { + if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") { + return util.AbsURL(baseURL, v) + } + } + return "" +} + +func (s *Source) mangaFromElement(el *goquery.Selection) source.SManga { + m := source.SManga{} + m.URL, _ = el.Attr("href") + m.Title = strings.TrimSpace(el.Find("span.thumb-titulo").Text()) + el.Find("span.thumb-imagem img.wp-post-image").First().Each(func(_ int, img *goquery.Selection) { + if src, ok := img.Attr("src"); ok { + m.ThumbnailURL = withoutSize(util.AbsURL(s.cfg.BaseURL, src)) + } + }) + return m +} + +func (s *Source) parseList(doc *goquery.Document) source.MangasPage { + var mangas []source.SManga + prefix := s.base() + sel := fmt.Sprintf("div.meio div.lista ul li a[href^=%s]", prefix) + doc.Find(sel).Each(func(_ int, el *goquery.Selection) { + m := s.mangaFromElement(el) + if m.URL != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".next.page-numbers, a.next").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) fetchPage(page int) (source.MangasPage, error) { + var u string + if page == 1 { + u = s.base() + "/" + } else { + u = fmt.Sprintf("%s/page/%d", s.base(), page) + } + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parseList(doc), nil +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { return s.fetchPage(page) } +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { return s.fetchPage(page) } + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/?s=%s&post_type=post", s.base(), query) + if page > 1 { + u = fmt.Sprintf("%s/page/%d/?s=%s&post_type=post", s.base(), page, query) + } + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parseList(doc), nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL, Status: source.StatusCompleted} + postBox := doc.Find("div.meio div.post-box").First() + result.Title = strings.TrimSpace(postBox.Find("h1.post-titulo").Text()) + if result.Title == "" { + result.Title = manga.Title + } + result.Author = strings.TrimSpace(postBox.Find("ul.post-itens li:contains(Artista) a").First().Text()) + var genres []string + postBox.Find("ul.post-itens li:contains(Tags) a").Each(func(_ int, a *goquery.Selection) { + if t := strings.TrimSpace(a.Text()); t != "" { + genres = append(genres, t) + } + }) + result.Genre = strings.Join(genres, ", ") + var descParts []string + postBox.Find("div.post-texto p").Each(func(_ int, p *goquery.Selection) { + t := strings.TrimSpace(p.Text()) + t = strings.TrimPrefix(t, "Sinopse :") + if t = strings.TrimSpace(t); t != "" { + descParts = append(descParts, t) + } + }) + result.Description = strings.Join(descParts, "\n\n") + postBox.Find("div.post-capa > img.wp-post-image").First().Each(func(_ int, img *goquery.Selection) { + if src, ok := img.Attr("src"); ok { + result.ThumbnailURL = withoutSize(util.AbsURL(s.cfg.BaseURL, src)) + } + }) + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + // Gattsu pages are single-chapter galleries; the manga page is the chapter. + return []source.SChapter{{ + URL: manga.URL, + Name: manga.Title, + }}, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + sel := "div.meio div.post-box ul.post-fotos li a > img, " + + "div.meio div.post-box.listaImagens div.galeriaHtml img" + var pages []source.Page + doc.Find(sel).Each(func(i int, img *goquery.Selection) { + if u := imgAttr(img, s.cfg.BaseURL); u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: withoutSize(u)}) + } + }) + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/goda/goda.go b/sources/base/goda/goda.go new file mode 100644 index 0000000..0dd3dc0 --- /dev/null +++ b/sources/base/goda/goda.go @@ -0,0 +1,314 @@ +// Package goda implements the GoDa manga base. +// Popular: GET {base}/hots/page/{n}; Chapter list via {base}/manga/get?mid={id}&mode=all. +package goda + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" +) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("goda: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) getRaw(ctx context.Context, rawURL string) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + return io.ReadAll(resp.Body) +} + +// getKey extracts the manga key from a URL: strip /manga/ prefix and trailing /. +func getKey(href string) string { + u, err := url.Parse(href) + if err != nil { + return href + } + path := u.Path + if idx := strings.Index(path, "/manga/"); idx >= 0 { + path = path[idx+len("/manga/"):] + } + return strings.TrimSuffix(path, "/") +} + +// getMangaID returns the manga ID from the #mangachapters element. +func getMangaID(doc *goquery.Document) string { + return doc.Find("#mangachapters").AttrOr("data-mid", "") +} + +func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { + var mangas []source.SManga + doc.Find(".container > .cardlist .pb-2 a").Each(func(_ int, el *goquery.Selection) { + m := source.SManga{} + href, _ := el.Attr("href") + m.URL = getKey(href) + m.Title = strings.TrimSpace(el.Find("h3").Text()) + if img := el.Find("img").First(); img.Length() > 0 { + src := img.AttrOr("src", "") + // Some sites proxy images as ?url=... + if strings.Contains(src, "url=") { + if parsed, err := url.Parse(src); err == nil { + if u := parsed.Query().Get("url"); u != "" { + src = u + } + } + } + m.ThumbnailURL = src + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + nextLabel := "NEXT" + if s.cfg.Lang == "zh" { + nextLabel = "下一頁" + } + hasNext := doc.Find(fmt.Sprintf("a[aria-label=%s] button", nextLabel)).Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/hots/page/%d", s.base(), page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/newss/page/%d", s.base(), page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + return s.GetPopularManga(page) +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/manga/%s", s.base(), manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + main := doc.Find("main").First() + titleEl := main.Find("h1").First() + result.Title = titleEl.Text() + if span := titleEl.Find("span").First(); span.Length() > 0 { + result.Title = strings.TrimSpace(strings.Replace(result.Title, span.Text(), "", 1)) + } + if result.Title == "" { + result.Title = manga.Title + } + + // Status badge is first child of h1 + statusText := strings.TrimSpace(titleEl.Children().First().Text()) + switch statusText { + case "連載中", "Ongoing": + result.Status = source.StatusOngoing + case "完結", "Completed": + result.Status = source.StatusCompleted + case "停止更新", "Cancelled": + result.Status = source.StatusCancelled + case "休刊", "On Hiatus": + result.Status = source.StatusHiatus + default: + result.Status = source.StatusUnknown + } + + // Siblings structure: h1, then elements for author, genre1, tags, description + parent := titleEl.Parent() + children := parent.Children() + if children.Length() >= 2 { + var authorParts []string + children.Eq(1).Children().Each(func(i int, el *goquery.Selection) { + if i == 0 { + return // skip label + } + t := strings.TrimSuffix(strings.TrimSpace(el.Text()), " ,") + if t != "" { + authorParts = append(authorParts, t) + } + }) + result.Author = strings.Join(authorParts, ", ") + } + + var genres []string + if children.Length() >= 3 { + children.Eq(2).Children().Each(func(i int, el *goquery.Selection) { + if i == 0 { + return // skip label + } + t := strings.TrimSuffix(strings.TrimSpace(el.Text()), " ,") + if t != "" { + genres = append(genres, t) + } + }) + } + if children.Length() >= 4 { + children.Eq(3).Children().Each(func(_ int, el *goquery.Selection) { + t := strings.TrimPrefix(strings.TrimSpace(el.Text()), "#") + if t != "" { + genres = append(genres, t) + } + }) + } + result.Genre = strings.Join(genres, ", ") + + mangaID := getMangaID(doc) + var descParts []string + if children.Length() >= 5 { + descParts = append(descParts, strings.TrimSpace(children.Eq(4).Text())) + } + if mangaID != "" { + descParts = append(descParts, "ID: "+mangaID) + } + result.Description = strings.Join(descParts, "\n\n") + + main.Find("img.object-cover").First().Each(func(_ int, img *goquery.Selection) { + result.ThumbnailURL, _ = img.Attr("src") + }) + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + // Extract manga ID from description or re-fetch. + mangaID := "" + if desc := manga.Description; desc != "" { + if idx := strings.LastIndex(desc, "ID: "); idx >= 0 { + mangaID = strings.TrimSpace(desc[idx+4:]) + } + } + if mangaID == "" { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/manga/%s", s.base(), manga.URL)) + if err != nil { + return nil, err + } + mangaID = getMangaID(doc) + } + if mangaID == "" { + return nil, fmt.Errorf("goda: could not find manga ID for %s", manga.URL) + } + + body, err := s.getRaw(context.Background(), fmt.Sprintf("%s/manga/get?mid=%s&mode=all", s.base(), mangaID)) + if err != nil { + return nil, err + } + doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body))) + if err != nil { + return nil, err + } + + var chapters []source.SChapter + doc.Find(".chapteritem").Each(func(_ int, el *goquery.Selection) { + a := el.Find("a").First() + href, _ := a.Attr("href") + key := getKey(href) + chKey := a.AttrOr("data-cs", "") + name := a.AttrOr("data-ct", "") + if key == "" { + return + } + chapters = append(chapters, source.SChapter{ + URL: key + "#" + mangaID + "/" + chKey, + Name: name, + }) + }) + // reverse: chapters come latest-first from API + for i, j := 0, len(chapters)-1; i < j; i, j = i+1, j-1 { + chapters[i], chapters[j] = chapters[j], chapters[i] + } + return chapters, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + // URL format: {key}#{mangaId}/{chapterId} + id := "" + if idx := strings.LastIndex(chapter.URL, "#"); idx >= 0 { + id = chapter.URL[idx+1:] + } + mangaID := "" + chapterID := "" + if slashIdx := strings.Index(id, "/"); slashIdx >= 0 { + mangaID = id[:slashIdx] + chapterID = id[slashIdx+1:] + } + if mangaID == "" || chapterID == "" { + return nil, fmt.Errorf("goda: invalid chapter URL: %s", chapter.URL) + } + + doc, err := s.get(context.Background(), + fmt.Sprintf("%s/chapter/getcontent?m=%s&c=%s", s.base(), mangaID, chapterID)) + if err != nil { + return nil, err + } + var pages []source.Page + doc.Find("#chapcontent > div > img").Each(func(i int, img *goquery.Selection) { + u := img.AttrOr("data-src", "") + if u == "" { + u, _ = img.Attr("src") + } + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: u}) + } + }) + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil }