diff --git a/docs/phase3-bases.md b/docs/phase3-bases.md index 049de1d..a970db7 100644 --- a/docs/phase3-bases.md +++ b/docs/phase3-bases.md @@ -41,15 +41,15 @@ Detailed implementation notes for complex bases are in the **Notes** section at - [x] `base/liliana` ⚠️ see notes - [x] `base/madara` ⚠️ see notes - [x] `base/madtheme` ⚠️ see notes -- [ ] `base/manga18` -- [ ] `base/mangabox` -- [ ] `base/mangacatalog` +- [x] `base/manga18` +- [x] `base/mangabox` +- [x] `base/mangacatalog` - [x] `base/mangadventure` ⚠️ see notes - [x] `base/mangahub` ⚠️ see notes - [x] `base/mangareader` ⚠️ see notes -- [ ] `base/mangataro` +- [x] `base/mangataro` - [x] `base/mangathemesia` ⚠️ see notes -- [ ] `base/mangawork` +- [x] `base/mangawork` - [x] `base/mangaworld` ⚠️ see notes - [x] `base/mangotheme` ⚠️ see notes - [ ] `base/manhwaz` diff --git a/sources/base/manga18/manga18.go b/sources/base/manga18/manga18.go new file mode 100644 index 0000000..dcc3949 --- /dev/null +++ b/sources/base/manga18/manga18.go @@ -0,0 +1,245 @@ +// Package manga18 implements the Manga18 manga base. +// HTML scraping; popular: GET {base}/list-manga/{page}?order_by=views; pages via Base64-encoded URLs in inline JS. +package manga18 + +import ( + "context" + "encoding/base64" + "fmt" + "net/http" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("manga18: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) mangaFromElement(el *goquery.Selection) source.SManga { + m := source.SManga{} + el.Find("a").First().Each(func(_ int, a *goquery.Selection) { + m.URL = a.AttrOr("href", "") + }) + m.Title = strings.TrimSpace(el.Find("div.mg_info > div.mg_name a").Text()) + if thumb := el.Find("img").First().AttrOr("src", ""); thumb != "" { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, thumb) + } + return m +} + +func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { + var mangas []source.SManga + doc.Find("div.story_item").Each(func(_ int, el *goquery.Selection) { + m := s.mangaFromElement(el) + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".pagination > li:last-child:not(.active)").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/list-manga/%d?order_by=views", s.base(), page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/list-manga/%d", s.base(), page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/list-manga/%d?search=%s", s.base(), page, query) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + result.Title = strings.TrimSpace(doc.Find("div.detail_name > h1").Text()) + if result.Title == "" { + result.Title = manga.Title + } + if thumb := doc.Find("div.detail_avatar > img").AttrOr("src", ""); thumb != "" { + result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, thumb) + } + result.Description = strings.TrimSpace(doc.Find("div.detail_reviewContent").Text()) + + info := doc.Find("div.detail_listInfo") + statusText := strings.TrimSpace(info.Find("div.item div.info_value").FilterFunction(func(_ int, el *goquery.Selection) bool { + return strings.Contains(el.Parent().Text(), "Status") + }).Text()) + switch { + case strings.Contains(statusText, "Ongoing"): + result.Status = source.StatusOngoing + case strings.Contains(statusText, "Completed"): + result.Status = source.StatusCompleted + default: + result.Status = source.StatusUnknown + } + + author := strings.TrimSpace(info.Find("div.info_value").FilterFunction(func(_ int, el *goquery.Selection) bool { + prev := el.Prev() + t := strings.ToLower(prev.Text()) + return strings.Contains(t, "author") || strings.Contains(t, "autor") + }).Text()) + if author != "Updating" { + result.Author = author + } + artist := strings.TrimSpace(info.Find("div.info_value").FilterFunction(func(_ int, el *goquery.Selection) bool { + return strings.Contains(strings.ToLower(el.Prev().Text()), "artist") + }).Text()) + if artist != "Updating" { + result.Artist = artist + } + + var genres []string + info.Find("div.info_value > a[href*=/manga-list/]").Each(func(_ int, a *goquery.Selection) { + if t := strings.TrimSpace(a.Text()); t != "" { + genres = append(genres, t) + } + }) + result.Genre = strings.Join(genres, ", ") + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return nil, err + } + var chapters []source.SChapter + doc.Find("div.chapter_box .item").Each(func(_ int, el *goquery.Selection) { + ch := source.SChapter{} + el.Find("a").First().Each(func(_ int, a *goquery.Selection) { + ch.URL = a.AttrOr("href", "") + ch.Name = strings.TrimSpace(a.Text()) + }) + if dateEl := el.Find("p").First(); dateEl.Length() > 0 { + ch.DateUpload = parseDate(strings.TrimSpace(dateEl.Text())) + } + if ch.URL != "" { + chapters = append(chapters, ch) + } + }) + return chapters, nil +} + +func parseDate(s string) int64 { + t, err := time.Parse("02-01-2006", s) + if err != nil { + return 0 + } + return t.UnixMilli() +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + + var scriptData string + doc.Find("script").Each(func(_ int, el *goquery.Selection) { + if scriptData != "" { + return + } + html, _ := el.Html() + if strings.Contains(html, "slides_p_path") { + scriptData = html + } + }) + if scriptData == "" { + return nil, fmt.Errorf("manga18: slides_p_path script not found") + } + + start := strings.Index(scriptData, "[") + end := strings.LastIndex(scriptData, "]") + if start < 0 || end <= start { + return nil, fmt.Errorf("manga18: image array not found") + } + // Trim trailing comma before closing bracket + inner := strings.TrimRight(strings.TrimSpace(scriptData[start+1:end]), ",") + parts := strings.Split(inner, ",") + + var pages []source.Page + for i, part := range parts { + encoded := strings.Trim(strings.TrimSpace(part), `"`) + if encoded == "" { + continue + } + decoded, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + decoded, err = base64.URLEncoding.DecodeString(encoded) + if err != nil { + continue + } + } + imgURL := string(decoded) + if strings.HasPrefix(imgURL, "/") { + imgURL = s.base() + imgURL + } + pages = append(pages, source.Page{Index: i, ImageURL: imgURL}) + } + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/mangabox/mangabox.go b/sources/base/mangabox/mangabox.go new file mode 100644 index 0000000..692f452 --- /dev/null +++ b/sources/base/mangabox/mangabox.go @@ -0,0 +1,372 @@ +// Package mangabox implements the MangaBox manga base. +// HTML scraping for lists; JSON API for chapter list; chapter pages via HTML + JS array extraction. +package mangabox + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "regexp" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string + PopularURLPath string // default: "manga-list/hot-manga" + LatestURLPath string // default: "manga-list/latest-manga" + SimpleQueryPath string // default: "search/story" +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + if cfg.PopularURLPath == "" { + cfg.PopularURLPath = "manga-list/hot-manga" + } + if cfg.LatestURLPath == "" { + cfg.LatestURLPath = "manga-list/latest-manga" + } + if cfg.SimpleQueryPath == "" { + cfg.SimpleQueryPath = "search/story" + } + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("mangabox: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) getJSON(ctx context.Context, rawURL string, out any) error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + req.Header.Set("Accept", "application/json") + resp, err := s.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("mangabox: HTTP %d", resp.StatusCode) + } + body, _ := io.ReadAll(resp.Body) + return json.Unmarshal(body, out) +} + +func mangaFromElement(el *goquery.Selection, baseURL string) source.SManga { + m := source.SManga{} + a := el.Find("h3 a, h2 a").First() + if a.Length() == 0 { + a = el.Find("a").First() + } + m.URL = a.AttrOr("href", "") + m.Title = strings.TrimSpace(a.Text()) + if thumb := el.Find("img").First().AttrOr("src", ""); thumb != "" { + m.ThumbnailURL = util.AbsURL(baseURL, thumb) + } + return m +} + +func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { + var mangas []source.SManga + sel := "div.truyen-list > div.list-truyen-item-wrap, div.comic-list > .list-comic-item-wrap" + doc.Find(sel).Each(func(_ int, el *goquery.Selection) { + m := mangaFromElement(el, s.cfg.BaseURL) + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find("div.group_page a:not([href]) + a:not(:contains(Last)), a.page_select + a:not(.page_last), a.page-select + a:not(.page-last)").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/%s?page=%d", s.base(), s.cfg.PopularURLPath, page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/%s?page=%d", s.base(), s.cfg.LatestURLPath, page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + slug := normalizeSearchQuery(query) + u := fmt.Sprintf("%s/%s/%s?page=%d", s.base(), s.cfg.SimpleQueryPath, slug, page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + var mangas []source.SManga + doc.Find(".panel_story_list .story_item, div.list-truyen-item-wrap, div.list-comic-item-wrap").Each(func(_ int, el *goquery.Selection) { + m := mangaFromElement(el, s.cfg.BaseURL) + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find("a.page_select + a:not(.page_last), a.page-select + a:not(.page-last)").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + main := doc.Find("div.manga-info-top, div.panel-story-info").First() + + result.Title = strings.TrimSpace(main.Find("h1").Text()) + if result.Title == "" { + result.Title = manga.Title + } + if thumb := doc.Find("div.manga-info-pic img, span.info-image img").First().AttrOr("src", ""); thumb != "" { + result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, thumb) + } + result.Description = strings.TrimSpace(doc.Find("div#noidungm, div#panel-story-info-description, div#contentBox").First().Text()) + + result.Author = strings.TrimSpace(main.Find("li:contains(author) a, td:contains(author) + td a").First().Text()) + + statusText := strings.TrimSpace(main.Find("li:contains(status), td:contains(status) + td").First().Text()) + switch { + case strings.Contains(statusText, "Ongoing"): + result.Status = source.StatusOngoing + case strings.Contains(statusText, "Completed"): + result.Status = source.StatusCompleted + default: + result.Status = source.StatusUnknown + } + + // Genres from Kakalot style or Nelo style + var genres []string + main.Find("div.manga-info-top li:contains(genres) a").Each(func(_ int, a *goquery.Selection) { + if t := strings.TrimSpace(a.Text()); t != "" { + genres = append(genres, t) + } + }) + if len(genres) == 0 { + main.Find("td:contains(genres) + td a").Each(func(_ int, a *goquery.Selection) { + if t := strings.TrimSpace(a.Text()); t != "" { + genres = append(genres, t) + } + }) + } + result.Genre = strings.Join(genres, ", ") + + // Alt name appended to description + if altEl := doc.Find(".story-alternative, tr:has(.info-alternative) h2").First(); altEl.Length() > 0 { + alt := strings.TrimSpace(altEl.Text()) + if alt != "" { + if result.Description == "" { + result.Description = "Alternative Name: " + alt + } else { + result.Description += "\n\nAlternative Name: " + alt + } + } + } + return result, nil +} + +// JSON DTOs for chapter list API + +type apiResponse struct { + Data apiDataResponse `json:"data"` +} + +type apiDataResponse struct { + Chapters []apiChapter `json:"chapters"` + Pagination apiPagination `json:"pagination"` +} + +type apiChapter struct { + ChapterName string `json:"chapter_name"` + ChapterSlug string `json:"chapter_slug"` + ChapterNum float32 `json:"chapter_num"` + UpdatedAt string `json:"updated_at"` +} + +type apiPagination struct { + HasMore bool `json:"has_more"` +} + +var dateFormats = []string{ + "2006-01-02T15:04:05.000000Z", + "2006-01-02T15:04:05Z", + "2006-01-02", +} + +func parseChapterDate(s string) int64 { + for _, f := range dateFormats { + if t, err := time.Parse(f, s); err == nil { + return t.UnixMilli() + } + } + return 0 +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + // Extract slug from manga URL: last path segment + slug := util.SlugFromURL(manga.URL) + if slug == "" { + // Fallback: use last non-empty path part + parts := strings.Split(strings.TrimRight(manga.URL, "/"), "/") + slug = parts[len(parts)-1] + } + + offset := 0 + const limit = 500 + var chapters []source.SChapter + + for { + u := fmt.Sprintf("%s/api/manga/%s/chapters?limit=%d&offset=%d", s.base(), slug, limit, offset) + var apiResp apiResponse + if err := s.getJSON(context.Background(), u, &apiResp); err != nil { + return nil, err + } + for _, ch := range apiResp.Data.Chapters { + chURL := fmt.Sprintf("%s/manga/%s/%s", s.base(), slug, ch.ChapterSlug) + chapters = append(chapters, source.SChapter{ + URL: chURL, + Name: ch.ChapterName, + DateUpload: parseChapterDate(ch.UpdatedAt), + }) + } + if !apiResp.Data.Pagination.HasMore { + break + } + offset += limit + } + return chapters, nil +} + +var arrayRe = regexp.MustCompile(`(?s)(\w+)\s*=\s*\[([^\]]+)\]`) + +func extractJSArray(content, name string) []string { + re := regexp.MustCompile(`(?s)` + regexp.QuoteMeta(name) + `\s*=\s*\[([^\]]+)\]`) + m := re.FindStringSubmatch(content) + if len(m) < 2 { + return nil + } + var result []string + for _, part := range strings.Split(m[1], ",") { + val := strings.TrimSpace(part) + val = strings.Trim(val, `"'`) + val = strings.ReplaceAll(val, `\/`, "/") + val = strings.TrimRight(val, "/") + if val != "" { + result = append(result, val) + } + } + return result +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + + // Try extracting from script with cdns and chapterImages + var scriptContent string + doc.Find("script").Each(func(_ int, el *goquery.Selection) { + html, _ := el.Html() + if strings.Contains(html, "cdns") && strings.Contains(html, "chapterImages") { + scriptContent += html + "\n" + } + }) + + if scriptContent != "" { + cdns := extractJSArray(scriptContent, "cdns") + if len(cdns) == 0 { + cdns = extractJSArray(scriptContent, "backupImage") + } + chapterImages := extractJSArray(scriptContent, "chapterImages") + + if len(cdns) > 0 && len(chapterImages) > 0 { + pages := make([]source.Page, len(chapterImages)) + for i, img := range chapterImages { + cdn := cdns[i%len(cdns)] + var imageURL string + if strings.HasPrefix(img, "http") { + imageURL = img + } else { + imageURL = strings.TrimRight(cdn, "/") + "/" + strings.TrimLeft(img, "/") + } + pages[i] = source.Page{Index: i, ImageURL: imageURL} + } + return pages, nil + } + } + + // Fallback: div.container-chapter-reader > img + var pages []source.Page + doc.Find("div.container-chapter-reader > img").Each(func(i int, img *goquery.Selection) { + u := img.AttrOr("src", "") + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) + } + }) + return pages, nil +} + +// normalizeSearchQuery mimics the change_alias JS function from Mangakakalot. +func normalizeSearchQuery(query string) string { + q := strings.ToLower(query) + var b strings.Builder + for _, r := range q { + switch { + case (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9'): + b.WriteRune(r) + case r == ' ': + b.WriteByte('_') + } + } + return b.String() +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/mangacatalog/mangacatalog.go b/sources/base/mangacatalog/mangacatalog.go new file mode 100644 index 0000000..70ef4cc --- /dev/null +++ b/sources/base/mangacatalog/mangacatalog.go @@ -0,0 +1,142 @@ +// Package mangacatalog implements the MangaCatalog manga base. +// Single-franchise site network: popular list = static sourceList; chapters via HTML scraping. +package mangacatalog + +import ( + "context" + "fmt" + "net/http" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type SourceEntry struct { + Name string + URL string +} + +type Config struct { + Name string + BaseURL string + Lang string + SourceList []SourceEntry +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + if len(cfg.SourceList) == 0 { + cfg.SourceList = []SourceEntry{{Name: cfg.Name, URL: cfg.BaseURL}} + } + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return false } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("mangacatalog: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + mangas := make([]source.SManga, len(s.cfg.SourceList)) + for i, entry := range s.cfg.SourceList { + mangas[i] = source.SManga{Title: entry.Name, URL: entry.URL} + } + return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + return source.MangasPage{}, nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + var mangas []source.SManga + for _, entry := range s.cfg.SourceList { + if query == "" || strings.Contains(strings.ToLower(entry.Name), strings.ToLower(query)) { + mangas = append(mangas, source.SManga{Title: entry.Name, URL: entry.URL}) + } + } + return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL, Title: manga.Title} + if thumb := doc.Find("img[itemprop=image], [itemprop=image] img").First().AttrOr("src", ""); thumb != "" { + result.ThumbnailURL = util.AbsURL(manga.URL, thumb) + } + result.Description = strings.TrimSpace(doc.Find("div.description, div#description").Text()) + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return nil, err + } + var chapters []source.SChapter + // Selector: div.w-full > div.bg-bg-secondary > div.grid + doc.Find("div.w-full > div.bg-bg-secondary > div.grid").Each(func(_ int, el *goquery.Selection) { + ch := source.SChapter{} + link := el.Find(".col-span-4 > a").First() + ch.URL = link.AttrOr("href", "") + name1 := strings.TrimSpace(link.Text()) + name2 := strings.TrimSpace(el.Find(".text-xs:not(a)").Text()) + if name2 == "" { + ch.Name = name1 + } else { + ch.Name = name1 + " - " + name2 + } + if ch.URL != "" { + chapters = append(chapters, ch) + } + }) + return chapters, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + var pages []source.Page + doc.Find("img[data-src]").Each(func(i int, img *goquery.Selection) { + u := img.AttrOr("data-src", "") + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(chapter.URL, u)}) + } + }) + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/mangataro/mangataro.go b/sources/base/mangataro/mangataro.go new file mode 100644 index 0000000..e73b0eb --- /dev/null +++ b/sources/base/mangataro/mangataro.go @@ -0,0 +1,417 @@ +// Package mangataro implements the MangaTaro manga base. +// WP JSON API (browse/details) + custom auth endpoints with MD5 token (chapters/pages); CF-protected. +package mangataro + +import ( + "bytes" + "context" + "crypto/md5" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) doGet(ctx context.Context, rawURL string, out any) error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + req.Header.Set("Accept", "application/json") + resp, err := s.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("mangataro: HTTP %d for %s", resp.StatusCode, rawURL) + } + body, _ := io.ReadAll(resp.Body) + return json.Unmarshal(body, out) +} + +func (s *Source) doPost(ctx context.Context, rawURL string, payload any, out any) error { + body, err := json.Marshal(payload) + if err != nil { + return err + } + req, err := http.NewRequestWithContext(ctx, http.MethodPost, rawURL, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + resp, err := s.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("mangataro: HTTP %d for %s", resp.StatusCode, rawURL) + } + respBody, _ := io.ReadAll(resp.Body) + return json.Unmarshal(respBody, out) +} + +// mangaURLDTO is stored as the manga.URL field (JSON-encoded). +type mangaURLDTO struct { + ID string `json:"id"` + Slug string `json:"slug"` +} + +func encodeMangaURL(id, slug string) string { + b, _ := json.Marshal(mangaURLDTO{ID: id, Slug: slug}) + return string(b) +} + +func decodeMangaURL(raw string) mangaURLDTO { + var dto mangaURLDTO + _ = json.Unmarshal([]byte(raw), &dto) + return dto +} + +// Search/browse DTOs + +type searchPayload struct { + Page int `json:"page"` + Search string `json:"search"` + Years string `json:"years"` + Genres string `json:"genres"` + Types string `json:"types"` + Statuses string `json:"statuses"` + Sort string `json:"sort"` + GenreMatchMode string `json:"genreMatchMode"` +} + +type browseManga struct { + ID string `json:"id"` + URL string `json:"url"` // slug + Title string `json:"title"` + Cover string `json:"cover"` + Type string `json:"type"` + Description string `json:"description"` + Status string `json:"status"` +} + +func (s *Source) browse(ctx context.Context, page int, search, sort string) (source.MangasPage, error) { + payload := searchPayload{ + Page: page, + Search: search, + Years: "[]", + Genres: "[]", + Types: "[]", + Statuses: "[]", + Sort: sort, + GenreMatchMode: "and", + } + var items []browseManga + if err := s.doPost(ctx, s.base()+"/wp-json/manga/v1/load", payload, &items); err != nil { + return source.MangasPage{}, err + } + var mangas []source.SManga + for _, item := range items { + if item.Type == "Novel" || item.URL == "" { + continue + } + mangas = append(mangas, source.SManga{ + URL: encodeMangaURL(item.ID, item.URL), + Title: unescapeHTML(item.Title), + ThumbnailURL: item.Cover, + Description: unescapeHTML(item.Description), + Status: parseStatus(item.Status), + }) + } + // hasNextPage: Kotlin checks data.size == 24 + hasNext := len(items) == 24 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + return s.browse(context.Background(), page, "", "popular_desc") +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + return s.browse(context.Background(), page, "", "post_desc") +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + return s.browse(context.Background(), page, query, "popular_desc") +} + +// Manga details DTOs (WP JSON API) + +type mangaDetails struct { + ID int `json:"id"` + Slug string `json:"slug"` + Title rendered `json:"title"` + Content rendered `json:"content"` + Type string `json:"type"` + Embedded embedded `json:"_embedded"` +} + +type rendered struct { + Rendered string `json:"rendered"` +} + +type embedded struct { + FeaturedMedia []thumbnail `json:"wp:featuredmedia"` + Terms [][]term `json:"wp:term"` +} + +func (e embedded) getTerms(taxonomy string) []string { + for _, group := range e.Terms { + if len(group) > 0 && group[0].Taxonomy == taxonomy { + names := make([]string, len(group)) + for i, t := range group { + names[i] = t.Name + } + return names + } + } + return nil +} + +type thumbnail struct { + URL string `json:"source_url"` +} + +type term struct { + Name string `json:"name"` + Taxonomy string `json:"taxonomy"` +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + dto := decodeMangaURL(manga.URL) + if dto.ID == "" { + return manga, fmt.Errorf("mangataro: cannot decode manga URL: %s", manga.URL) + } + u := fmt.Sprintf("%s/wp-json/wp/v2/manga/%s?_embed", s.base(), dto.ID) + var data mangaDetails + if err := s.doGet(context.Background(), u, &data); err != nil { + return manga, err + } + + result := source.SManga{URL: manga.URL} + result.URL = encodeMangaURL(fmt.Sprint(data.ID), data.Slug) + result.Title = unescapeHTML(data.Title.Rendered) + result.Description = plainText(data.Content.Rendered) + + tags := data.Embedded.getTerms("post_tag") + genreSet := make(map[string]bool) + for _, t := range tags { + genreSet[t] = true + } + knownTypes := []string{"Manhwa", "Manhua", "Manga"} + hasKnown := false + for _, kt := range knownTypes { + if genreSet[kt] { + hasKnown = true + break + } + } + if !hasKnown && data.Type != "" { + genreSet[data.Type] = true + } + var genres []string + for g := range genreSet { + genres = append(genres, g) + } + result.Genre = strings.Join(genres, ", ") + result.Author = strings.Join(data.Embedded.getTerms("manga_author"), ", ") + + if len(data.Embedded.FeaturedMedia) > 0 { + result.ThumbnailURL = data.Embedded.FeaturedMedia[0].URL + } + result.Status = manga.Status // preserved from browse + return result, nil +} + +// Chapter list DTOs + +type chapterList struct { + Chapters []chapter `json:"chapters"` +} + +type chapter struct { + URL string `json:"url"` + Chapter string `json:"chapter"` + Title *string `json:"title"` + Date string `json:"date"` + GroupName *string `json:"group_name"` + Language string `json:"language"` +} + +func md5Token(timestamp int64) string { + date := time.Unix(timestamp, 0).UTC().Format("2006-01-02") + input := fmt.Sprintf("%dmng_ch_%s", timestamp, date) + sum := md5.Sum([]byte(input)) + return fmt.Sprintf("%x", sum)[:16] +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + dto := decodeMangaURL(manga.URL) + if dto.ID == "" { + return nil, fmt.Errorf("mangataro: cannot decode manga URL: %s", manga.URL) + } + + ts := time.Now().Unix() + token := md5Token(ts) + + u, _ := url.Parse(s.base() + "/auth/manga-chapters") + q := u.Query() + q.Set("manga_id", dto.ID) + q.Set("offset", "0") + q.Set("limit", "9999") + q.Set("order", "DESC") + q.Set("_t", token) + u.RawQuery = q.Encode() + + var data chapterList + if err := s.doGet(context.Background(), u.String(), &data); err != nil { + return nil, err + } + + placeholders := map[string]bool{"": true, "N/A": true, "—": true} + var chapters []source.SChapter + for _, ch := range data.Chapters { + if !strings.EqualFold(ch.Language, s.cfg.Lang) { + continue + } + name := "Chapter " + ch.Chapter + if ch.Title != nil && !placeholders[*ch.Title] { + name += ": " + unescapeHTML(*ch.Title) + } + chURL := ch.URL + if !strings.HasPrefix(chURL, "http") { + chURL = s.base() + chURL + } + chapters = append(chapters, source.SChapter{ + URL: chURL, + Name: name, + DateUpload: util.ParseRelativeDate(ch.Date), + }) + } + return chapters, nil +} + +// Pages DTO + +type pagesDTO struct { + Images []string `json:"images"` +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + // chapterId = last path segment of chapter URL, after last "-" + chapterURL := chapter.URL + if !strings.HasPrefix(chapterURL, "http") { + chapterURL = s.base() + chapterURL + } + parsed, err := url.Parse(chapterURL) + if err != nil { + return nil, err + } + segs := strings.Split(strings.TrimRight(parsed.Path, "/"), "/") + lastSeg := segs[len(segs)-1] + chapterID := lastSeg + if idx := strings.LastIndex(lastSeg, "-"); idx >= 0 { + chapterID = lastSeg[idx+1:] + } + + u := fmt.Sprintf("%s/auth/chapter-content?chapter_id=%s", s.base(), chapterID) + var data pagesDTO + if err := s.doGet(context.Background(), u, &data); err != nil { + return nil, err + } + + pages := make([]source.Page, len(data.Images)) + for i, img := range data.Images { + pages[i] = source.Page{Index: i, ImageURL: img} + } + return pages, nil +} + +func parseStatus(s string) int { + switch strings.ToLower(s) { + case "ongoing": + return source.StatusOngoing + case "completed", "complete": + return source.StatusCompleted + case "hiatus", "on hold", "on-hold": + return source.StatusHiatus + case "cancelled", "canceled": + return source.StatusCancelled + } + return source.StatusUnknown +} + +func unescapeHTML(s string) string { + // Basic HTML entity unescaping + r := strings.NewReplacer( + "&", "&", + "<", "<", + ">", ">", + """, `"`, + "'", "'", + "'", "'", + ) + prev := "" + for prev != s { + prev = s + s = r.Replace(s) + } + return s +} + +// plainText strips HTML tags from a string. +func plainText(html string) string { + // Quick approximation: remove tags + var b strings.Builder + inTag := false + for _, r := range html { + switch { + case r == '<': + inTag = true + case r == '>': + inTag = false + case !inTag: + b.WriteRune(r) + } + } + return strings.TrimSpace(b.String()) +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/mangawork/mangawork.go b/sources/base/mangawork/mangawork.go new file mode 100644 index 0000000..e854693 --- /dev/null +++ b/sources/base/mangawork/mangawork.go @@ -0,0 +1,402 @@ +// Package mangawork implements the MangaWork manga base. +// HTML scraping for browse/details; multipart POST to wp-admin/admin-ajax.php for chapter list; CF-protected. +package mangawork + +import ( + "bytes" + "context" + "fmt" + "io" + "mime/multipart" + "net/http" + "net/url" + "regexp" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string + SeriesPath string // default: "series" + MangaPath string // default: "manga" + ChapterDateFmt string // default: "02/01/2006" + AuthorLabel string // default: "Autor(es)" + PopularOrder string // default: "popular" + LatestOrder string // default: "update" +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + if cfg.SeriesPath == "" { + cfg.SeriesPath = "series" + } + if cfg.MangaPath == "" { + cfg.MangaPath = "manga" + } + if cfg.ChapterDateFmt == "" { + cfg.ChapterDateFmt = "02/01/2006" + } + if cfg.AuthorLabel == "" { + cfg.AuthorLabel = "Autor(es)" + } + if cfg.PopularOrder == "" { + cfg.PopularOrder = "popular" + } + if cfg.LatestOrder == "" { + cfg.LatestOrder = "update" + } + c := httpclient.NewClient(httpclient.WithRateLimit(2, 3)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("mangawork: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) buildSeriesURL(page int, query, order, status, typ string) string { + u, _ := url.Parse(s.base() + "/" + s.cfg.SeriesPath + "/") + q := u.Query() + q.Set("title", query) + q.Set("order", order) + if status != "" { + q.Set("status", status) + } + if typ != "" { + q.Set("type", typ) + } + q.Set("page", fmt.Sprint(page)) + u.RawQuery = q.Encode() + return u.String() +} + +func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { + mangaSelector := fmt.Sprintf("div.w-full.h-full:has(a[href*='/%s/'])", s.cfg.MangaPath) + anchorSelector := fmt.Sprintf("a[href*='/%s/']", s.cfg.MangaPath) + var mangas []source.SManga + doc.Find(mangaSelector).Each(func(_ int, el *goquery.Selection) { + anchor := el.Find(anchorSelector).First() + u := anchor.AttrOr("href", "") + if u == "" { + return + } + m := source.SManga{URL: u} + m.Title = strings.TrimSpace(anchor.Find("h1").Text()) + if m.Title == "" { + m.Title = strings.TrimSpace(anchor.AttrOr("title", "")) + } + if thumb := anchor.Find("img").First().AttrOr("src", ""); thumb != "" { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, thumb) + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".pagination .page-numbers.current + a[href]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), s.buildSeriesURL(page, "", s.cfg.PopularOrder, "", "")) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), s.buildSeriesURL(page, "", s.cfg.LatestOrder, "", "")) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + doc, err := s.get(context.Background(), s.buildSeriesURL(page, query, "title", "", "")) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + result.Title = strings.TrimSpace(doc.Find("h1.text-4xl.font-bold.mb-2").Text()) + if result.Title == "" { + result.Title = manga.Title + } + if thumb := doc.Find("img[itemprop=image], [itemprop=image] img").First().AttrOr("src", ""); thumb != "" { + result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, thumb) + } + result.Description = strings.TrimSpace(doc.Find("div.text-base.leading-relaxed.mb-6.text-muted-foreground").Text()) + + genres := doc.Find("[itemprop=genre]") + var genreNames []string + genres.Each(func(_ int, el *goquery.Selection) { + if t := strings.TrimSpace(el.Text()); t != "" { + genreNames = append(genreNames, t) + } + }) + result.Genre = strings.Join(genreNames, ", ") + + // Status from the element before the first genre + if first := genres.First(); first.Length() > 0 { + statusLabel := strings.TrimSpace(first.Prev().Text()) + result.Status = parseStatus(statusLabel) + } + + // Author via info item search + result.Author = findInfoValue(doc, s.cfg.AuthorLabel) + return result, nil +} + +// findInfoValue finds the value of an info item with the given label. +// Info items: "div.grid.grid-cols-2.gap-4.text-sm.text-gray-600.mb-6 > div" +// Each item has a label and

value. +func findInfoValue(doc *goquery.Document, label string) string { + const infoItemSelector = "div.grid.grid-cols-2.gap-4.text-sm.text-gray-600.mb-6 > div" + var result string + doc.Find(infoItemSelector).Each(func(_ int, el *goquery.Selection) { + if result != "" { + return + } + if strings.TrimSpace(el.Find("strong").Text()) == label { + result = strings.TrimSpace(el.Find("p").Text()) + } + }) + return result +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return nil, err + } + + container := doc.Find("#chapter_list.chapter_list_container").First() + var chapters []source.SChapter + + // Collect chapters already present on the page + container.Find("li").Each(func(_ int, el *goquery.Selection) { + if ch, ok := s.chapterFromElement(el); ok { + chapters = append(chapters, ch) + } + }) + + postID := strings.TrimSpace(container.AttrOr("data-post-id", "")) + if postID == "" { + return chapters, nil + } + count := strings.TrimSpace(container.AttrOr("data-count", "1000")) + + // Paginate via admin-ajax if there are more chapters + currentPage := 1 + for { + nextBtn := doc.Find("button.load-chapters[data-paged]").First() + if nextBtn.Length() == 0 { + break + } + nextPage := strings.TrimSpace(nextBtn.AttrOr("data-paged", "")) + if nextPage == "" || nextPage == fmt.Sprint(currentPage) { + break + } + order := strings.TrimSpace(nextBtn.AttrOr("data-order", "DESC")) + + ajaxDoc, err := s.postChapterListPage(manga.URL, postID, count, nextPage, order) + if err != nil { + break + } + doc = ajaxDoc + ajaxDoc.Find("li").Each(func(_ int, el *goquery.Selection) { + if ch, ok := s.chapterFromElement(el); ok { + chapters = append(chapters, ch) + } + }) + if p, _ := fmt.Sscan(nextPage, ¤tPage); p == 0 { + break + } + } + + // Deduplicate + seen := map[string]bool{} + unique := chapters[:0] + for _, ch := range chapters { + if !seen[ch.URL] { + seen[ch.URL] = true + unique = append(unique, ch) + } + } + return unique, nil +} + +var chapterNumRe = regexp.MustCompile(`(\d+(?:[.,]\d+)?)`) + +func (s *Source) chapterFromElement(el *goquery.Selection) (source.SChapter, bool) { + anchor := el.Find("a[href]").First() + if anchor.Length() == 0 { + return source.SChapter{}, false + } + u := anchor.AttrOr("href", "") + if u == "" { + return source.SChapter{}, false + } + + nameEl := el.Find("span.m-0, span.line-clamp-1").First() + name := strings.TrimSpace(nameEl.Text()) + if name == "" { + name = strings.TrimSpace(anchor.Text()) + } + + // Date from last span + var dateStr string + el.Find("span").Each(func(_ int, sp *goquery.Selection) { + t := strings.TrimSpace(sp.Text()) + if t != "" && t != name { + dateStr = t + } + }) + + return source.SChapter{ + URL: u, + Name: name, + DateUpload: parseChapterDate(dateStr, s.cfg.ChapterDateFmt), + }, true +} + +func (s *Source) postChapterListPage(referer, postID, count, page, order string) (*goquery.Document, error) { + var buf bytes.Buffer + w := multipart.NewWriter(&buf) + _ = w.WriteField("action", "load_chapters") + _ = w.WriteField("post_id", postID) + _ = w.WriteField("count", count) + _ = w.WriteField("paged", page) + _ = w.WriteField("order", order) + w.Close() + + ajaxURL := s.base() + "/wp-admin/admin-ajax.php" + req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, ajaxURL, &buf) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", w.FormDataContentType()) + req.Header.Set("Referer", util.AbsURL(s.cfg.BaseURL, referer)) + req.Header.Set("Origin", s.cfg.BaseURL) + req.Header.Set("Accept", "*/*") + + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + return goquery.NewDocumentFromReader(bytes.NewReader(body)) +} + +var pageImageRe = regexp.MustCompile(`"image"\s*:\s*"([^"]+)"`) + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + + // Primary: div.reader-area img + var pages []source.Page + doc.Find("div.reader-area img#imagech, div.reader-area img[src*='/manga_auto_capitulos/']").Each(func(i int, img *goquery.Selection) { + u := imgAttr(img) + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) + } + }) + if len(pages) > 0 { + return pages, nil + } + + // Fallback: extract "image": "url" from inline script + var scriptData string + doc.Find("script").Each(func(_ int, el *goquery.Selection) { + html, _ := el.Html() + if strings.Contains(html, `"image"`) { + scriptData += html + } + }) + for i, m := range pageImageRe.FindAllStringSubmatch(scriptData, -1) { + imgURL := strings.ReplaceAll(m[1], `\/`, "/") + pages = append(pages, source.Page{Index: i, ImageURL: imgURL}) + } + return pages, nil +} + +func imgAttr(img *goquery.Selection) string { + for _, attr := range []string{"data-lazy-src", "data-src", "src"} { + if v, ok := img.Attr(attr); ok && v != "" { + return v + } + } + return "" +} + +func parseChapterDate(s, format string) int64 { + if s == "" { + return 0 + } + t, err := time.Parse(format, s) + if err != nil { + return 0 + } + return t.UnixMilli() +} + +func parseStatus(s string) int { + switch strings.ToLower(s) { + case "publishing", "ongoing", "em andamento": + return source.StatusOngoing + case "finished", "completed", "concluído", "concluido", "finalizado": + return source.StatusCompleted + case "on hold", "on-hold", "hiatus", "em hiato": + return source.StatusHiatus + case "cancelled", "canceled", "cancelado": + return source.StatusCancelled + } + return source.StatusUnknown +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil }