// Package mangabox implements the MangaBox manga base. // HTML scraping for lists; JSON API for chapter list; chapter pages via HTML + JS array extraction. package mangabox import ( "context" "encoding/json" "fmt" "io" "net/http" "regexp" "strings" "time" "github.com/PuerkitoBio/goquery" "goyomi/internal/httpclient" "goyomi/internal/source" "goyomi/sources/base/util" ) type Config struct { Name string BaseURL string Lang string PopularURLPath string // default: "manga-list/hot-manga" LatestURLPath string // default: "manga-list/latest-manga" SimpleQueryPath string // default: "search/story" } type Source struct { cfg Config client *httpclient.Client id int64 } func New(cfg Config) *Source { if cfg.PopularURLPath == "" { cfg.PopularURLPath = "manga-list/hot-manga" } if cfg.LatestURLPath == "" { cfg.LatestURLPath = "manga-list/latest-manga" } if cfg.SimpleQueryPath == "" { cfg.SimpleQueryPath = "search/story" } c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} } func (s *Source) ID() int64 { return s.id } func (s *Source) Name() string { return s.cfg.Name } func (s *Source) Lang() string { return s.cfg.Lang } func (s *Source) SupportsLatest() bool { return true } func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) if err != nil { return nil, err } req.Header.Set("Referer", s.cfg.BaseURL+"/") resp, err := s.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("mangabox: HTTP %d", resp.StatusCode) } return goquery.NewDocumentFromReader(resp.Body) } func (s *Source) getJSON(ctx context.Context, rawURL string, out any) error { req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) if err != nil { return err } req.Header.Set("Referer", s.cfg.BaseURL+"/") req.Header.Set("Accept", "application/json") resp, err := s.client.Do(req) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return fmt.Errorf("mangabox: HTTP %d", resp.StatusCode) } body, _ := io.ReadAll(resp.Body) return json.Unmarshal(body, out) } func mangaFromElement(el *goquery.Selection, baseURL string) source.SManga { m := source.SManga{} a := el.Find("h3 a, h2 a").First() if a.Length() == 0 { a = el.Find("a").First() } m.URL = a.AttrOr("href", "") m.Title = strings.TrimSpace(a.Text()) if thumb := el.Find("img").First().AttrOr("src", ""); thumb != "" { m.ThumbnailURL = util.AbsURL(baseURL, thumb) } return m } func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { var mangas []source.SManga sel := "div.truyen-list > div.list-truyen-item-wrap, div.comic-list > .list-comic-item-wrap" doc.Find(sel).Each(func(_ int, el *goquery.Selection) { m := mangaFromElement(el, s.cfg.BaseURL) if m.URL != "" && m.Title != "" { mangas = append(mangas, m) } }) hasNext := doc.Find("div.group_page a:not([href]) + a:not(:contains(Last)), a.page_select + a:not(.page_last), a.page-select + a:not(.page-last)").Length() > 0 return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} } func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { doc, err := s.get(context.Background(), fmt.Sprintf("%s/%s?page=%d", s.base(), s.cfg.PopularURLPath, page)) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc), nil } func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { doc, err := s.get(context.Background(), fmt.Sprintf("%s/%s?page=%d", s.base(), s.cfg.LatestURLPath, page)) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc), nil } func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { slug := normalizeSearchQuery(query) u := fmt.Sprintf("%s/%s/%s?page=%d", s.base(), s.cfg.SimpleQueryPath, slug, page) doc, err := s.get(context.Background(), u) if err != nil { return source.MangasPage{}, err } var mangas []source.SManga doc.Find(".panel_story_list .story_item, div.list-truyen-item-wrap, div.list-comic-item-wrap").Each(func(_ int, el *goquery.Selection) { m := mangaFromElement(el, s.cfg.BaseURL) if m.URL != "" && m.Title != "" { mangas = append(mangas, m) } }) hasNext := doc.Find("a.page_select + a:not(.page_last), a.page-select + a:not(.page-last)").Length() > 0 return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil } func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) if err != nil { return manga, err } result := source.SManga{URL: manga.URL} main := doc.Find("div.manga-info-top, div.panel-story-info").First() result.Title = strings.TrimSpace(main.Find("h1").Text()) if result.Title == "" { result.Title = manga.Title } if thumb := doc.Find("div.manga-info-pic img, span.info-image img").First().AttrOr("src", ""); thumb != "" { result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, thumb) } result.Description = strings.TrimSpace(doc.Find("div#noidungm, div#panel-story-info-description, div#contentBox").First().Text()) result.Author = strings.TrimSpace(main.Find("li:contains(author) a, td:contains(author) + td a").First().Text()) statusText := strings.TrimSpace(main.Find("li:contains(status), td:contains(status) + td").First().Text()) switch { case strings.Contains(statusText, "Ongoing"): result.Status = source.StatusOngoing case strings.Contains(statusText, "Completed"): result.Status = source.StatusCompleted default: result.Status = source.StatusUnknown } // Genres from Kakalot style or Nelo style var genres []string main.Find("div.manga-info-top li:contains(genres) a").Each(func(_ int, a *goquery.Selection) { if t := strings.TrimSpace(a.Text()); t != "" { genres = append(genres, t) } }) if len(genres) == 0 { main.Find("td:contains(genres) + td a").Each(func(_ int, a *goquery.Selection) { if t := strings.TrimSpace(a.Text()); t != "" { genres = append(genres, t) } }) } result.Genre = strings.Join(genres, ", ") // Alt name appended to description if altEl := doc.Find(".story-alternative, tr:has(.info-alternative) h2").First(); altEl.Length() > 0 { alt := strings.TrimSpace(altEl.Text()) if alt != "" { if result.Description == "" { result.Description = "Alternative Name: " + alt } else { result.Description += "\n\nAlternative Name: " + alt } } } return result, nil } // JSON DTOs for chapter list API type apiResponse struct { Data apiDataResponse `json:"data"` } type apiDataResponse struct { Chapters []apiChapter `json:"chapters"` Pagination apiPagination `json:"pagination"` } type apiChapter struct { ChapterName string `json:"chapter_name"` ChapterSlug string `json:"chapter_slug"` ChapterNum float32 `json:"chapter_num"` UpdatedAt string `json:"updated_at"` } type apiPagination struct { HasMore bool `json:"has_more"` } var dateFormats = []string{ "2006-01-02T15:04:05.000000Z", "2006-01-02T15:04:05Z", "2006-01-02", } func parseChapterDate(s string) int64 { for _, f := range dateFormats { if t, err := time.Parse(f, s); err == nil { return t.UnixMilli() } } return 0 } func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { // Extract slug from manga URL: last path segment slug := util.SlugFromURL(manga.URL) if slug == "" { // Fallback: use last non-empty path part parts := strings.Split(strings.TrimRight(manga.URL, "/"), "/") slug = parts[len(parts)-1] } offset := 0 const limit = 500 var chapters []source.SChapter for { u := fmt.Sprintf("%s/api/manga/%s/chapters?limit=%d&offset=%d", s.base(), slug, limit, offset) var apiResp apiResponse if err := s.getJSON(context.Background(), u, &apiResp); err != nil { return nil, err } for _, ch := range apiResp.Data.Chapters { chURL := fmt.Sprintf("%s/manga/%s/%s", s.base(), slug, ch.ChapterSlug) chapters = append(chapters, source.SChapter{ URL: chURL, Name: ch.ChapterName, DateUpload: parseChapterDate(ch.UpdatedAt), }) } if !apiResp.Data.Pagination.HasMore { break } offset += limit } return chapters, nil } var arrayRe = regexp.MustCompile(`(?s)(\w+)\s*=\s*\[([^\]]+)\]`) func extractJSArray(content, name string) []string { re := regexp.MustCompile(`(?s)` + regexp.QuoteMeta(name) + `\s*=\s*\[([^\]]+)\]`) m := re.FindStringSubmatch(content) if len(m) < 2 { return nil } var result []string for _, part := range strings.Split(m[1], ",") { val := strings.TrimSpace(part) val = strings.Trim(val, `"'`) val = strings.ReplaceAll(val, `\/`, "/") val = strings.TrimRight(val, "/") if val != "" { result = append(result, val) } } return result } func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) if err != nil { return nil, err } // Try extracting from script with cdns and chapterImages var scriptContent string doc.Find("script").Each(func(_ int, el *goquery.Selection) { html, _ := el.Html() if strings.Contains(html, "cdns") && strings.Contains(html, "chapterImages") { scriptContent += html + "\n" } }) if scriptContent != "" { cdns := extractJSArray(scriptContent, "cdns") if len(cdns) == 0 { cdns = extractJSArray(scriptContent, "backupImage") } chapterImages := extractJSArray(scriptContent, "chapterImages") if len(cdns) > 0 && len(chapterImages) > 0 { pages := make([]source.Page, len(chapterImages)) for i, img := range chapterImages { cdn := cdns[i%len(cdns)] var imageURL string if strings.HasPrefix(img, "http") { imageURL = img } else { imageURL = strings.TrimRight(cdn, "/") + "/" + strings.TrimLeft(img, "/") } pages[i] = source.Page{Index: i, ImageURL: imageURL} } return pages, nil } } // Fallback: div.container-chapter-reader > img var pages []source.Page doc.Find("div.container-chapter-reader > img").Each(func(i int, img *goquery.Selection) { u := img.AttrOr("src", "") if u != "" { pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) } }) return pages, nil } // normalizeSearchQuery mimics the change_alias JS function from Mangakakalot. func normalizeSearchQuery(query string) string { q := strings.ToLower(query) var b strings.Builder for _, r := range q { switch { case (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9'): b.WriteRune(r) case r == ' ': b.WriteByte('_') } } return b.String() } func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } func (s *Source) GetFilterList() []source.Filter { return nil }