// Package mangawork implements the MangaWork manga base. // HTML scraping for browse/details; multipart POST to wp-admin/admin-ajax.php for chapter list; CF-protected. package mangawork import ( "bytes" "context" "fmt" "io" "mime/multipart" "net/http" "net/url" "regexp" "strings" "time" "github.com/PuerkitoBio/goquery" "goyomi/internal/httpclient" "goyomi/internal/source" "goyomi/sources/base/util" ) type Config struct { Name string BaseURL string Lang string SeriesPath string // default: "series" MangaPath string // default: "manga" ChapterDateFmt string // default: "02/01/2006" AuthorLabel string // default: "Autor(es)" PopularOrder string // default: "popular" LatestOrder string // default: "update" } type Source struct { cfg Config client *httpclient.Client id int64 } func New(cfg Config) *Source { if cfg.SeriesPath == "" { cfg.SeriesPath = "series" } if cfg.MangaPath == "" { cfg.MangaPath = "manga" } if cfg.ChapterDateFmt == "" { cfg.ChapterDateFmt = "02/01/2006" } if cfg.AuthorLabel == "" { cfg.AuthorLabel = "Autor(es)" } if cfg.PopularOrder == "" { cfg.PopularOrder = "popular" } if cfg.LatestOrder == "" { cfg.LatestOrder = "update" } c := httpclient.NewClient(httpclient.WithRateLimit(2, 3)) return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} } func (s *Source) ID() int64 { return s.id } func (s *Source) Name() string { return s.cfg.Name } func (s *Source) Lang() string { return s.cfg.Lang } func (s *Source) SupportsLatest() bool { return true } func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) if err != nil { return nil, err } req.Header.Set("Referer", s.cfg.BaseURL+"/") resp, err := s.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("mangawork: HTTP %d", resp.StatusCode) } return goquery.NewDocumentFromReader(resp.Body) } func (s *Source) buildSeriesURL(page int, query, order, status, typ string) string { u, _ := url.Parse(s.base() + "/" + s.cfg.SeriesPath + "/") q := u.Query() q.Set("title", query) q.Set("order", order) if status != "" { q.Set("status", status) } if typ != "" { q.Set("type", typ) } q.Set("page", fmt.Sprint(page)) u.RawQuery = q.Encode() return u.String() } func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { mangaSelector := fmt.Sprintf("div.w-full.h-full:has(a[href*='/%s/'])", s.cfg.MangaPath) anchorSelector := fmt.Sprintf("a[href*='/%s/']", s.cfg.MangaPath) var mangas []source.SManga doc.Find(mangaSelector).Each(func(_ int, el *goquery.Selection) { anchor := el.Find(anchorSelector).First() u := anchor.AttrOr("href", "") if u == "" { return } m := source.SManga{URL: u} m.Title = strings.TrimSpace(anchor.Find("h1").Text()) if m.Title == "" { m.Title = strings.TrimSpace(anchor.AttrOr("title", "")) } if thumb := anchor.Find("img").First().AttrOr("src", ""); thumb != "" { m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, thumb) } if m.URL != "" && m.Title != "" { mangas = append(mangas, m) } }) hasNext := doc.Find(".pagination .page-numbers.current + a[href]").Length() > 0 return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} } func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { doc, err := s.get(context.Background(), s.buildSeriesURL(page, "", s.cfg.PopularOrder, "", "")) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc), nil } func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { doc, err := s.get(context.Background(), s.buildSeriesURL(page, "", s.cfg.LatestOrder, "", "")) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc), nil } func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { doc, err := s.get(context.Background(), s.buildSeriesURL(page, query, "title", "", "")) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc), nil } func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) if err != nil { return manga, err } result := source.SManga{URL: manga.URL} result.Title = strings.TrimSpace(doc.Find("h1.text-4xl.font-bold.mb-2").Text()) if result.Title == "" { result.Title = manga.Title } if thumb := doc.Find("img[itemprop=image], [itemprop=image] img").First().AttrOr("src", ""); thumb != "" { result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, thumb) } result.Description = strings.TrimSpace(doc.Find("div.text-base.leading-relaxed.mb-6.text-muted-foreground").Text()) genres := doc.Find("[itemprop=genre]") var genreNames []string genres.Each(func(_ int, el *goquery.Selection) { if t := strings.TrimSpace(el.Text()); t != "" { genreNames = append(genreNames, t) } }) result.Genre = strings.Join(genreNames, ", ") // Status from the element before the first genre if first := genres.First(); first.Length() > 0 { statusLabel := strings.TrimSpace(first.Prev().Text()) result.Status = parseStatus(statusLabel) } // Author via info item search result.Author = findInfoValue(doc, s.cfg.AuthorLabel) return result, nil } // findInfoValue finds the value of an info item with the given label. // Info items: "div.grid.grid-cols-2.gap-4.text-sm.text-gray-600.mb-6 > div" // Each item has a label and

value. func findInfoValue(doc *goquery.Document, label string) string { const infoItemSelector = "div.grid.grid-cols-2.gap-4.text-sm.text-gray-600.mb-6 > div" var result string doc.Find(infoItemSelector).Each(func(_ int, el *goquery.Selection) { if result != "" { return } if strings.TrimSpace(el.Find("strong").Text()) == label { result = strings.TrimSpace(el.Find("p").Text()) } }) return result } func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) if err != nil { return nil, err } container := doc.Find("#chapter_list.chapter_list_container").First() var chapters []source.SChapter // Collect chapters already present on the page container.Find("li").Each(func(_ int, el *goquery.Selection) { if ch, ok := s.chapterFromElement(el); ok { chapters = append(chapters, ch) } }) postID := strings.TrimSpace(container.AttrOr("data-post-id", "")) if postID == "" { return chapters, nil } count := strings.TrimSpace(container.AttrOr("data-count", "1000")) // Paginate via admin-ajax if there are more chapters currentPage := 1 for { nextBtn := doc.Find("button.load-chapters[data-paged]").First() if nextBtn.Length() == 0 { break } nextPage := strings.TrimSpace(nextBtn.AttrOr("data-paged", "")) if nextPage == "" || nextPage == fmt.Sprint(currentPage) { break } order := strings.TrimSpace(nextBtn.AttrOr("data-order", "DESC")) ajaxDoc, err := s.postChapterListPage(manga.URL, postID, count, nextPage, order) if err != nil { break } doc = ajaxDoc ajaxDoc.Find("li").Each(func(_ int, el *goquery.Selection) { if ch, ok := s.chapterFromElement(el); ok { chapters = append(chapters, ch) } }) if p, _ := fmt.Sscan(nextPage, ¤tPage); p == 0 { break } } // Deduplicate seen := map[string]bool{} unique := chapters[:0] for _, ch := range chapters { if !seen[ch.URL] { seen[ch.URL] = true unique = append(unique, ch) } } return unique, nil } var chapterNumRe = regexp.MustCompile(`(\d+(?:[.,]\d+)?)`) func (s *Source) chapterFromElement(el *goquery.Selection) (source.SChapter, bool) { anchor := el.Find("a[href]").First() if anchor.Length() == 0 { return source.SChapter{}, false } u := anchor.AttrOr("href", "") if u == "" { return source.SChapter{}, false } nameEl := el.Find("span.m-0, span.line-clamp-1").First() name := strings.TrimSpace(nameEl.Text()) if name == "" { name = strings.TrimSpace(anchor.Text()) } // Date from last span var dateStr string el.Find("span").Each(func(_ int, sp *goquery.Selection) { t := strings.TrimSpace(sp.Text()) if t != "" && t != name { dateStr = t } }) return source.SChapter{ URL: u, Name: name, DateUpload: parseChapterDate(dateStr, s.cfg.ChapterDateFmt), }, true } func (s *Source) postChapterListPage(referer, postID, count, page, order string) (*goquery.Document, error) { var buf bytes.Buffer w := multipart.NewWriter(&buf) _ = w.WriteField("action", "load_chapters") _ = w.WriteField("post_id", postID) _ = w.WriteField("count", count) _ = w.WriteField("paged", page) _ = w.WriteField("order", order) w.Close() ajaxURL := s.base() + "/wp-admin/admin-ajax.php" req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, ajaxURL, &buf) if err != nil { return nil, err } req.Header.Set("Content-Type", w.FormDataContentType()) req.Header.Set("Referer", util.AbsURL(s.cfg.BaseURL, referer)) req.Header.Set("Origin", s.cfg.BaseURL) req.Header.Set("Accept", "*/*") resp, err := s.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() body, _ := io.ReadAll(resp.Body) return goquery.NewDocumentFromReader(bytes.NewReader(body)) } var pageImageRe = regexp.MustCompile(`"image"\s*:\s*"([^"]+)"`) func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) if err != nil { return nil, err } // Primary: div.reader-area img var pages []source.Page doc.Find("div.reader-area img#imagech, div.reader-area img[src*='/manga_auto_capitulos/']").Each(func(i int, img *goquery.Selection) { u := imgAttr(img) if u != "" { pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) } }) if len(pages) > 0 { return pages, nil } // Fallback: extract "image": "url" from inline script var scriptData string doc.Find("script").Each(func(_ int, el *goquery.Selection) { html, _ := el.Html() if strings.Contains(html, `"image"`) { scriptData += html } }) for i, m := range pageImageRe.FindAllStringSubmatch(scriptData, -1) { imgURL := strings.ReplaceAll(m[1], `\/`, "/") pages = append(pages, source.Page{Index: i, ImageURL: imgURL}) } return pages, nil } func imgAttr(img *goquery.Selection) string { for _, attr := range []string{"data-lazy-src", "data-src", "src"} { if v, ok := img.Attr(attr); ok && v != "" { return v } } return "" } func parseChapterDate(s, format string) int64 { if s == "" { return 0 } t, err := time.Parse(format, s) if err != nil { return 0 } return t.UnixMilli() } func parseStatus(s string) int { switch strings.ToLower(s) { case "publishing", "ongoing", "em andamento": return source.StatusOngoing case "finished", "completed", "concluĂ­do", "concluido", "finalizado": return source.StatusCompleted case "on hold", "on-hold", "hiatus", "em hiato": return source.StatusHiatus case "cancelled", "canceled", "cancelado": return source.StatusCancelled } return source.StatusUnknown } func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } func (s *Source) GetFilterList() []source.Filter { return nil }