diff --git a/docs/phase3-bases.md b/docs/phase3-bases.md index a970db7..60992d4 100644 --- a/docs/phase3-bases.md +++ b/docs/phase3-bases.md @@ -52,13 +52,13 @@ Detailed implementation notes for complex bases are in the **Notes** section at - [x] `base/mangawork` - [x] `base/mangaworld` ⚠️ see notes - [x] `base/mangotheme` ⚠️ see notes -- [ ] `base/manhwaz` -- [ ] `base/masonry` -- [ ] `base/mccms` +- [x] `base/manhwaz` +- [x] `base/masonry` +- [x] `base/mccms` - [x] `base/mmlook` ⚠️ see notes - [x] `base/mmrcms` ⚠️ see notes -- [ ] `base/monochrome` -- [ ] `base/multichan` +- [x] `base/monochrome` +- [x] `base/multichan` - [ ] `base/natsuid` - [ ] `base/oceanwp` - [ ] `base/paprika` diff --git a/sources/base/manhwaz/manhwaz.go b/sources/base/manhwaz/manhwaz.go new file mode 100644 index 0000000..0e1329c --- /dev/null +++ b/sources/base/manhwaz/manhwaz.go @@ -0,0 +1,256 @@ +// Package manhwaz implements the ManhwaZ manga base. +// HTML scraping; popular from homepage; latest paginated; pages via div.page-break img. +package manhwaz + +import ( + "context" + "fmt" + "net/http" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string + SearchPath string // default: "search" + AuthorHeading string // default: "author(s)" + StatusHeading string // default: "status" +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + if cfg.SearchPath == "" { + cfg.SearchPath = "search" + } + if cfg.AuthorHeading == "" { + cfg.AuthorHeading = "author(s)" + } + if cfg.StatusHeading == "" { + cfg.StatusHeading = "status" + } + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + req.Header.Set("Origin", s.cfg.BaseURL) + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("manhwaz: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func imgAttr(img *goquery.Selection) string { + for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} { + if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") { + return v + } + } + return "" +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + // Popular comes from homepage; no pagination + doc, err := s.get(context.Background(), s.base()) + if err != nil { + return source.MangasPage{}, err + } + var mangas []source.SManga + doc.Find("#slide-top > .item").Each(func(_ int, el *goquery.Selection) { + a := el.Find(".info-item a").First() + if a.Length() == 0 { + return + } + m := source.SManga{} + m.Title = strings.TrimSpace(a.Text()) + m.URL = a.AttrOr("href", "") + if img := el.Find(".img-item img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, imgAttr(img)) + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/?page=%d", s.base(), page)) + if err != nil { + return source.MangasPage{}, err + } + var mangas []source.SManga + doc.Find(".page-item-detail").Each(func(_ int, el *goquery.Selection) { + a := el.Find(".item-summary a").First() + if a.Length() == 0 { + return + } + m := source.SManga{} + m.Title = strings.TrimSpace(a.Text()) + m.URL = a.AttrOr("href", "") + if img := el.Find(".item-thumb img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, imgAttr(img)) + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find("ul.pager a[rel=next]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/%s?s=%s&page=%d", s.base(), s.cfg.SearchPath, query, page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + var mangas []source.SManga + doc.Find(".page-item-detail").Each(func(_ int, el *goquery.Selection) { + a := el.Find(".item-summary a").First() + if a.Length() == 0 { + return + } + m := source.SManga{} + m.Title = strings.TrimSpace(a.Text()) + m.URL = a.AttrOr("href", "") + if img := el.Find(".item-thumb img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, imgAttr(img)) + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find("ul.pager a[rel=next]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + result.Title = strings.TrimSpace(doc.Find("div.post-title h1").Text()) + if result.Title == "" { + result.Title = manga.Title + } + + statusHeading := s.cfg.StatusHeading + statusText := strings.TrimSpace(doc.Find("div.summary-heading").FilterFunction(func(_ int, el *goquery.Selection) bool { + return strings.Contains(strings.ToLower(el.Text()), statusHeading) + }).Next().Text()) + + ongoingTerms := []string{"ongoing", "đang ra", "on going", "publishing"} + completedTerms := []string{"completed", "hoàn thành", "truyện full", "complete"} + statusLower := strings.ToLower(statusText) + switch { + case containsAny(statusLower, ongoingTerms): + result.Status = source.StatusOngoing + case containsAny(statusLower, completedTerms): + result.Status = source.StatusCompleted + default: + result.Status = source.StatusUnknown + } + + authorHeading := s.cfg.AuthorHeading + result.Author = strings.TrimSpace(doc.Find("div.summary-heading").FilterFunction(func(_ int, el *goquery.Selection) bool { + return strings.Contains(strings.ToLower(el.Text()), authorHeading) + }).Next().Text()) + + result.Description = strings.TrimSpace(doc.Find("div.summary__content").Text()) + + var genres []string + doc.Find("div.genres-content a[rel=tag]").Each(func(_ int, a *goquery.Selection) { + if t := strings.TrimSpace(a.Text()); t != "" { + genres = append(genres, t) + } + }) + result.Genre = strings.Join(genres, ", ") + + if img := doc.Find("div.summary_image img").First(); img.Length() > 0 { + result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, imgAttr(img)) + } + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return nil, err + } + var chapters []source.SChapter + doc.Find("li.wp-manga-chapter").Each(func(_ int, el *goquery.Selection) { + a := el.Find("a").First() + if a.Length() == 0 { + return + } + ch := source.SChapter{ + URL: a.AttrOr("href", ""), + Name: strings.TrimSpace(a.Text()), + } + if dateEl := el.Find("span.chapter-release-date").First(); dateEl.Length() > 0 { + ch.DateUpload = util.ParseRelativeDate(strings.TrimSpace(dateEl.Text())) + } + if ch.URL != "" { + chapters = append(chapters, ch) + } + }) + return chapters, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + var pages []source.Page + doc.Find("div.page-break img").Each(func(i int, img *goquery.Selection) { + u := imgAttr(img) + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) + } + }) + return pages, nil +} + +func containsAny(s string, terms []string) bool { + for _, t := range terms { + if strings.Contains(s, t) { + return true + } + } + return false +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/masonry/masonry.go b/sources/base/masonry/masonry.go new file mode 100644 index 0000000..67232cc --- /dev/null +++ b/sources/base/masonry/masonry.go @@ -0,0 +1,182 @@ +// Package masonry implements the Masonry manga base. +// Gallery-style site: each entry is a single-chapter gallery; pages via .list-gallery CDN links. +package masonry + +import ( + "context" + "fmt" + "net/http" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("masonry: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func imgAttr(img *goquery.Selection) string { + if v, ok := img.Attr("srcset"); ok && v != "" { + return strings.Fields(v)[0] + } + for _, attr := range []string{"data-cfsrc", "data-src", "data-lazy-src", "src"} { + if v, ok := img.Attr(attr); ok && v != "" { + return v + } + } + return "" +} + +func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { + // Exclude static galleries and broken entries + const sel = ".list-gallery:not(.static) figure:not(:has(a[href*=cdn.]))" + var mangas []source.SManga + doc.Find(sel).Each(func(_ int, el *goquery.Selection) { + a := el.Find("a").First() + if a.Length() == 0 { + return + } + m := source.SManga{} + m.URL = a.AttrOr("href", "") + m.Title = strings.TrimSpace(a.AttrOr("title", "")) + if m.Title == "" { + m.Title = strings.TrimSpace(a.Text()) + } + if img := el.Find("img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, imgAttr(img)) + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(".pagination .next, a.next-page, a[rel=next]").Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + var u string + switch page { + case 1: + u = s.base() + case 2: + u = s.base() + "/archive/" + default: + u = fmt.Sprintf("%s/archive/page/%d/", s.base(), page-1) + } + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/updates/sort/newest/mpage/%d/", s.base(), page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/updates/mpage/%d/?s=%s", s.base(), page, query) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc), nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + result.Title = strings.TrimSpace(doc.Find("h1.entry-title, h1.post-title").First().Text()) + if result.Title == "" { + result.Title = manga.Title + } + result.Description = strings.TrimSpace(doc.Find("div.entry-content p").First().Text()) + if img := doc.Find("img.attachment-post-thumbnail, img.wp-post-image").First(); img.Length() > 0 { + result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, imgAttr(img)) + } + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + // Each masonry entry IS a single chapter (the gallery itself) + return []source.SChapter{{ + URL: manga.URL, + Name: "Gallery", + }}, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + var pages []source.Page + doc.Find(".list-gallery a").Each(func(i int, a *goquery.Selection) { + href := a.AttrOr("href", "") + // Only CDN image links + if strings.HasPrefix(href, "https://cdn.") || strings.Contains(href, "/cdn.") { + pages = append(pages, source.Page{Index: i, ImageURL: href}) + } + }) + // Fallback: any direct image links in gallery + if len(pages) == 0 { + doc.Find(".list-gallery img").Each(func(i int, img *goquery.Selection) { + u := imgAttr(img) + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) + } + }) + } + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/mccms/mccms.go b/sources/base/mccms/mccms.go new file mode 100644 index 0000000..f0a61db --- /dev/null +++ b/sources/base/mccms/mccms.go @@ -0,0 +1,207 @@ +// Package mccms implements the MCCMS (Chinese manga CMS) manga base. +// HTML scraping; popular/latest via category pages; pages via data-original img attr. +package mccms + +import ( + "context" + "fmt" + "net/http" + "strings" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string + LazyLoadAttr string // default: "data-original" + UseMobilePageList bool +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + if cfg.LazyLoadAttr == "" { + cfg.LazyLoadAttr = "data-original" + } + c := httpclient.NewClient(httpclient.WithRateLimit(2, 3)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +var pcUA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0" + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", pcUA) + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("mccms: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) parseListing(doc *goquery.Document) source.MangasPage { + var mangas []source.SManga + doc.Find(".common-comic-item").Each(func(_ int, el *goquery.Selection) { + titleEl := el.Find(".comic__title").First().Find("a").First() + if titleEl.Length() == 0 { + return + } + href := titleEl.AttrOr("href", "") + // strip /index.php prefix + href = strings.TrimPrefix(href, "/index.php") + m := source.SManga{ + URL: href, + Title: strings.TrimSpace(titleEl.Text()), + } + if img := el.Find("img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("data-original", img.AttrOr("src", ""))) + } + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + + // Pagination: last two links in #Pagination or .NewPages differ → has next + hasNext := false + doc.Find("#Pagination a, .NewPages a").Each(func(_ int, _ *goquery.Selection) {}) + buttons := doc.Find("#Pagination a, .NewPages a") + n := buttons.Length() + if n >= 2 { + last := strings.TrimSpace(buttons.Eq(n - 1).AttrOr("href", "a")) + secondLast := strings.TrimSpace(buttons.Eq(n - 2).AttrOr("href", "b")) + hasNext = last != secondLast + } + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/category/order/hits/page/%d", s.base(), page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseListing(doc), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/category/order/addtime/page/%d", s.base(), page)) + if err != nil { + return source.MangasPage{}, err + } + return s.parseListing(doc), nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/search/%s/%d", s.base(), query, page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parseListing(doc), nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + + box := doc.Find(".de-info__box").First() + result.Title = strings.TrimSpace(box.Find(".comic-title").First().Text()) + if result.Title == "" { + result.Title = manga.Title + } + if img := box.Find("img").First(); img.Length() > 0 { + result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", "")) + } + result.Author = strings.TrimSpace(box.Find(".name").First().Text()) + result.Description = strings.TrimSpace(box.Find(".intro-total").First().Text()) + + var genres []string + box.Find(".comic-status a").Each(func(_ int, a *goquery.Selection) { + if t := strings.TrimSpace(a.Text()); t != "" { + genres = append(genres, t) + } + }) + result.Genre = strings.Join(genres, ", ") + return result, nil +} + +func removePathPrefix(href string) string { + return strings.TrimPrefix(href, "/index.php") +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return nil, err + } + var chapters []source.SChapter + doc.Find(".chapter__list-box > li").Each(func(_ int, el *goquery.Selection) { + link := el.Children().First() + href := removePathPrefix(link.AttrOr("href", "")) + if href == "" { + return + } + chapters = append(chapters, source.SChapter{ + URL: href, + Name: strings.TrimSpace(link.Text()), + }) + }) + // Reverse to get descending order (latest first) + for i, j := 0, len(chapters)-1; i < j; i, j = i+1, j-1 { + chapters[i], chapters[j] = chapters[j], chapters[i] + } + return chapters, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + var pages []source.Page + if s.cfg.UseMobilePageList { + doc.Find(".comic-list img").Each(func(i int, img *goquery.Selection) { + u := img.AttrOr("src", "") + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)}) + } + }) + } else { + doc.Find(fmt.Sprintf("img[%s]", s.cfg.LazyLoadAttr)).Each(func(i int, img *goquery.Selection) { + u := img.AttrOr(s.cfg.LazyLoadAttr, "") + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: u}) + } + }) + } + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/monochrome/monochrome.go b/sources/base/monochrome/monochrome.go new file mode 100644 index 0000000..6567670 --- /dev/null +++ b/sources/base/monochrome/monochrome.go @@ -0,0 +1,231 @@ +// Package monochrome implements the Monochrome manga base. +// JSON REST API at api.{host}; search/browse via /manga endpoint; pages generated from chapter metadata. +package monochrome + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" + + "goyomi/internal/httpclient" + "goyomi/internal/source" +) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + apiURL string + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + // apiUrl: insert "api." after "://" + apiURL := strings.Replace(cfg.BaseURL, "://", "://api.", 1) + apiURL = strings.TrimRight(apiURL, "/") + + c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) + return &Source{ + cfg: cfg, + apiURL: apiURL, + client: c, + id: source.GenerateSourceID(cfg.Name, cfg.Lang), + } +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return false } + +func (s *Source) doGet(ctx context.Context, rawURL string, out any) error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return err + } + req.Header.Set("Referer", s.cfg.BaseURL+"/") + resp, err := s.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("monochrome: HTTP %d for %s", resp.StatusCode, rawURL) + } + body, _ := io.ReadAll(resp.Body) + return json.Unmarshal(body, out) +} + +// DTOs + +type results struct { + Offset int `json:"offset"` + Limit int `json:"limit"` + Results []mangaDTO `json:"results"` + Total int `json:"total"` +} + +func (r results) hasNext() bool { + return r.Total > len(r.Results)+r.Offset*r.Limit +} + +type mangaDTO struct { + Title string `json:"title"` + Description string `json:"description"` + Author string `json:"author"` + Artist string `json:"artist"` + Status string `json:"status"` + ID string `json:"id"` + Version int `json:"version"` +} + +func (m mangaDTO) coverURL(apiURL string) string { + return fmt.Sprintf("%s/media/%s/cover.jpg?version=%d", apiURL, m.ID, m.Version) +} + +type chapterDTO struct { + Name string `json:"name"` + Volume *int `json:"volume"` + Number float32 `json:"number"` + ScanGroup string `json:"scanGroup"` + ID string `json:"id"` + Version int `json:"version"` + Length int `json:"length"` + UploadTime string `json:"uploadTime"` +} + +func (c chapterDTO) title() string { + var b strings.Builder + if c.Volume != nil { + fmt.Fprintf(&b, "Vol %d ", *c.Volume) + } + fmt.Fprintf(&b, "Chapter %.2g", c.Number) + if c.Name != "" { + fmt.Fprintf(&b, " - %s", c.Name) + } + return b.String() +} + +func (c chapterDTO) timestamp() int64 { + t, err := time.Parse("2006-01-02T15:04:05.999999", c.UploadTime) + if err != nil { + t, err = time.Parse("2006-01-02T15:04:05", c.UploadTime) + } + if err != nil { + return 0 + } + return t.UnixMilli() +} + +// chapterURL: stored as "{mangaUUID}/{chapterID}|{version}|{length}" +// (mirrors Kotlin's manga.url + ch.parts where parts = "/{id}|{version}|{length}") +func buildChapterURL(mangaUUID, chapterID string, version, length int) string { + return fmt.Sprintf("%s/%s|%d|%d", mangaUUID, chapterID, version, length) +} + +func (s *Source) mangaFromDTO(m mangaDTO) source.SManga { + sm := source.SManga{ + URL: m.ID, + Title: m.Title, + Description: m.Description, + Author: m.Author, + Artist: m.Artist, + ThumbnailURL: m.coverURL(s.apiURL), + } + switch strings.ToLower(m.Status) { + case "ongoing", "hiatus": + sm.Status = source.StatusOngoing + case "completed", "cancelled": + sm.Status = source.StatusCompleted + default: + sm.Status = source.StatusUnknown + } + return sm +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + return s.GetSearchManga(page, "", nil) +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + return s.GetSearchManga(page, "", nil) +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + offset := 10 * (page - 1) + u := fmt.Sprintf("%s/manga?limit=10&offset=%d&title=%s", s.apiURL, offset, query) + var res results + if err := s.doGet(context.Background(), u, &res); err != nil { + return source.MangasPage{}, err + } + mangas := make([]source.SManga, len(res.Results)) + for i, m := range res.Results { + mangas[i] = s.mangaFromDTO(m) + } + return source.MangasPage{Mangas: mangas, HasNextPage: res.hasNext()}, nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + var m mangaDTO + if err := s.doGet(context.Background(), fmt.Sprintf("%s/manga/%s", s.apiURL, manga.URL), &m); err != nil { + return manga, err + } + result := s.mangaFromDTO(m) + result.URL = manga.URL + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + var chapters []chapterDTO + if err := s.doGet(context.Background(), fmt.Sprintf("%s/manga/%s/chapters", s.apiURL, manga.URL), &chapters); err != nil { + return nil, err + } + result := make([]source.SChapter, len(chapters)) + for i, ch := range chapters { + result[i] = source.SChapter{ + URL: buildChapterURL(manga.URL, ch.ID, ch.Version, ch.Length), + Name: ch.title(), + DateUpload: ch.timestamp(), + } + } + return result, nil +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + // URL format: "{mangaUUID}/{chapterID}|{version}|{length}" + pipeIdx := strings.Index(chapter.URL, "|") + if pipeIdx < 0 { + return nil, fmt.Errorf("monochrome: malformed chapter URL: %s", chapter.URL) + } + uuidPart := chapter.URL[:pipeIdx] // "mangaUUID/chapterID" + rest := chapter.URL[pipeIdx+1:] // "version|length" + + parts := strings.SplitN(rest, "|", 2) + if len(parts) < 2 { + return nil, fmt.Errorf("monochrome: malformed chapter URL: %s", chapter.URL) + } + version := parts[0] + var length int + fmt.Sscan(parts[1], &length) + + pages := make([]source.Page, length) + for i := range pages { + pages[i] = source.Page{ + Index: i, + ImageURL: fmt.Sprintf("%s/media/%s/%d.jpg?version=%s", s.apiURL, uuidPart, i+1, version), + } + } + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil } diff --git a/sources/base/multichan/multichan.go b/sources/base/multichan/multichan.go new file mode 100644 index 0000000..3a839d2 --- /dev/null +++ b/sources/base/multichan/multichan.go @@ -0,0 +1,262 @@ +// Package multichan implements the MultiChan manga base. +// Russian manga site; pages extracted from fullimg JSON array in HTML source. +package multichan + +import ( + "context" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + + "goyomi/internal/httpclient" + "goyomi/internal/source" + "goyomi/sources/base/util" +) + +type Config struct { + Name string + BaseURL string + Lang string +} + +type Source struct { + cfg Config + client *httpclient.Client + id int64 +} + +func New(cfg Config) *Source { + c := httpclient.NewClient(httpclient.WithRateLimit(2, 3)) + return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} +} + +func (s *Source) ID() int64 { return s.id } +func (s *Source) Name() string { return s.cfg.Name } +func (s *Source) Lang() string { return s.cfg.Lang } +func (s *Source) SupportsLatest() bool { return true } + +func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } + +func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("Referer", s.cfg.BaseURL) + resp, err := s.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("multichan: HTTP %d", resp.StatusCode) + } + return goquery.NewDocumentFromReader(resp.Body) +} + +func (s *Source) getRaw(ctx context.Context, rawURL string) (string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) + if err != nil { + return "", err + } + req.Header.Set("Referer", s.cfg.BaseURL) + resp, err := s.client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + return string(body), nil +} + +func mangaFromElement(el *goquery.Selection, baseURL string) source.SManga { + m := source.SManga{} + m.Title = strings.TrimSpace(el.AttrOr("title", "")) + el.Find("a").First().Each(func(_ int, a *goquery.Selection) { + m.URL = a.AttrOr("href", "") + if m.Title == "" { + m.Title = strings.TrimSpace(a.Text()) + } + }) + if img := el.Find("img").First(); img.Length() > 0 { + m.ThumbnailURL = util.AbsURL(baseURL, img.AttrOr("src", "")) + } + return m +} + +func (s *Source) parseMangaList(doc *goquery.Document, nextSel string) source.MangasPage { + var mangas []source.SManga + doc.Find("div.content_row").Each(func(_ int, el *goquery.Selection) { + m := mangaFromElement(el, s.cfg.BaseURL) + if m.URL != "" && m.Title != "" { + mangas = append(mangas, m) + } + }) + hasNext := doc.Find(nextSel).Length() > 0 + return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} +} + +func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/mostfavorites?offset=%d", s.base(), 20*(page-1))) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc, "a:contains(Вперед)"), nil +} + +func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { + doc, err := s.get(context.Background(), fmt.Sprintf("%s/manga/new?offset=%d", s.base(), 20*(page-1))) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc, "a:contains(Вперед)"), nil +} + +func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { + u := fmt.Sprintf("%s/?do=search&subaction=search&story=%s&search_start=%d", s.base(), query, page) + doc, err := s.get(context.Background(), u) + if err != nil { + return source.MangasPage{}, err + } + return s.parseMangaList(doc, "a:contains(Далее)"), nil +} + +func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return manga, err + } + result := source.SManga{URL: manga.URL} + + // Title from page title (strip " » ..." suffix) + pageTitle := doc.Find("title").Text() + if idx := strings.Index(pageTitle, " »"); idx > 0 { + result.Title = strings.TrimSpace(pageTitle[:idx]) + } else { + result.Title = strings.TrimSpace(pageTitle) + } + if result.Title == "" { + result.Title = manga.Title + } + + infoEl := doc.Find("#info_wrap tr, #info_wrap > div") + result.Author = strings.TrimSpace(infoEl.Find(":contains(Автор) .item2").Text()) + + rawCat := strings.ToLower(strings.TrimSpace(infoEl.Find(":contains(Тип) a").Text())) + var tags []string + if rawCat != "" { + tags = append(tags, rawCat) + } + doc.Find(".sidetags ul a:last-child").Each(func(_ int, a *goquery.Selection) { + if t := strings.TrimSpace(a.Text()); t != "" { + tags = append(tags, t) + } + }) + result.Genre = strings.Join(tags, ", ") + + statusText := infoEl.Find(":contains(Загружено)").Text() + switch { + case strings.Contains(statusText, "перевод завершен"): + result.Status = source.StatusCompleted + case strings.Contains(statusText, "перевод продолжается"): + result.Status = source.StatusOngoing + default: + result.Status = source.StatusUnknown + } + + if desc := doc.Find("div#description").First(); desc.Length() > 0 { + nodes := desc.Contents() + nodes.Each(func(_ int, n *goquery.Selection) { + if goquery.NodeName(n) == "#text" { + if t := strings.TrimSpace(n.Text()); t != "" { + result.Description = t + } + } + }) + } + + if img := doc.Find("img#cover").First(); img.Length() > 0 { + result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", "")) + } + return result, nil +} + +func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { + doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) + if err != nil { + return nil, err + } + var chapters []source.SChapter + // Selector: table.table_cha tr (skip first 2 header rows) + doc.Find("table.table_cha tr").Each(func(i int, el *goquery.Selection) { + if i <= 1 { + return // skip headers + } + a := el.Find("a").First() + if a.Length() == 0 { + return + } + href := a.AttrOr("href", "") + if href == "" { + return + } + // Make URL relative + u := strings.TrimPrefix(href, s.base()) + if !strings.HasPrefix(u, "/") { + u = "/" + u + } + + dateStr := strings.TrimSpace(el.Find("div.date").First().Text()) + chapters = append(chapters, source.SChapter{ + URL: u, + Name: strings.TrimSpace(a.Text()), + DateUpload: parseDate(dateStr), + }) + }) + return chapters, nil +} + +func parseDate(s string) int64 { + t, err := time.Parse("2006-01-02", s) + if err != nil { + return 0 + } + return t.UnixMilli() +} + +func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { + html, err := s.getRaw(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) + if err != nil { + return nil, err + } + + // Extract fullimg":[...] from raw HTML + const marker = `fullimg":[` + start := strings.Index(html, marker) + if start < 0 { + return nil, fmt.Errorf("multichan: fullimg array not found") + } + start += len(marker) + end := strings.Index(html[start:], ",]") + if end < 0 { + return nil, fmt.Errorf("multichan: fullimg array end not found") + } + rawURLs := html[start : start+end] + rawURLs = strings.ReplaceAll(rawURLs, `"`, "") + + var pages []source.Page + for i, u := range strings.Split(rawURLs, ",") { + u = strings.TrimSpace(u) + if u != "" { + pages = append(pages, source.Page{Index: i, ImageURL: u}) + } + } + return pages, nil +} + +func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } +func (s *Source) GetFilterList() []source.Filter { return nil }