208 lines
6.3 KiB
Go
Executable File
208 lines
6.3 KiB
Go
Executable File
// Package mccms implements the MCCMS (Chinese manga CMS) manga base.
|
|
// HTML scraping; popular/latest via category pages; pages via data-original img attr.
|
|
package mccms
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient"
|
|
"goyomi/internal/source"
|
|
"goyomi/sources/base/util"
|
|
)
|
|
|
|
type Config struct {
|
|
Name string
|
|
BaseURL string
|
|
Lang string
|
|
LazyLoadAttr string // default: "data-original"
|
|
UseMobilePageList bool
|
|
}
|
|
|
|
type Source struct {
|
|
cfg Config
|
|
client *httpclient.Client
|
|
id int64
|
|
}
|
|
|
|
func New(cfg Config) *Source {
|
|
if cfg.LazyLoadAttr == "" {
|
|
cfg.LazyLoadAttr = "data-original"
|
|
}
|
|
c := httpclient.NewClient(httpclient.WithRateLimit(2, 3))
|
|
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.cfg.Name }
|
|
func (s *Source) Lang() string { return s.cfg.Lang }
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
|
|
|
|
var pcUA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0"
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", pcUA)
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("mccms: HTTP %d", resp.StatusCode)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func (s *Source) parseListing(doc *goquery.Document) source.MangasPage {
|
|
var mangas []source.SManga
|
|
doc.Find(".common-comic-item").Each(func(_ int, el *goquery.Selection) {
|
|
titleEl := el.Find(".comic__title").First().Find("a").First()
|
|
if titleEl.Length() == 0 {
|
|
return
|
|
}
|
|
href := titleEl.AttrOr("href", "")
|
|
// strip /index.php prefix
|
|
href = strings.TrimPrefix(href, "/index.php")
|
|
m := source.SManga{
|
|
URL: href,
|
|
Title: strings.TrimSpace(titleEl.Text()),
|
|
}
|
|
if img := el.Find("img").First(); img.Length() > 0 {
|
|
m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("data-original", img.AttrOr("src", "")))
|
|
}
|
|
if m.URL != "" && m.Title != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
|
|
// Pagination: last two links in #Pagination or .NewPages differ → has next
|
|
hasNext := false
|
|
doc.Find("#Pagination a, .NewPages a").Each(func(_ int, _ *goquery.Selection) {})
|
|
buttons := doc.Find("#Pagination a, .NewPages a")
|
|
n := buttons.Length()
|
|
if n >= 2 {
|
|
last := strings.TrimSpace(buttons.Eq(n - 1).AttrOr("href", "a"))
|
|
secondLast := strings.TrimSpace(buttons.Eq(n - 2).AttrOr("href", "b"))
|
|
hasNext = last != secondLast
|
|
}
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), fmt.Sprintf("%s/category/order/hits/page/%d", s.base(), page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseListing(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), fmt.Sprintf("%s/category/order/addtime/page/%d", s.base(), page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseListing(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
u := fmt.Sprintf("%s/search/%s/%d", s.base(), query, page)
|
|
doc, err := s.get(context.Background(), u)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseListing(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
result := source.SManga{URL: manga.URL}
|
|
|
|
box := doc.Find(".de-info__box").First()
|
|
result.Title = strings.TrimSpace(box.Find(".comic-title").First().Text())
|
|
if result.Title == "" {
|
|
result.Title = manga.Title
|
|
}
|
|
if img := box.Find("img").First(); img.Length() > 0 {
|
|
result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, img.AttrOr("src", ""))
|
|
}
|
|
result.Author = strings.TrimSpace(box.Find(".name").First().Text())
|
|
result.Description = strings.TrimSpace(box.Find(".intro-total").First().Text())
|
|
|
|
var genres []string
|
|
box.Find(".comic-status a").Each(func(_ int, a *goquery.Selection) {
|
|
if t := strings.TrimSpace(a.Text()); t != "" {
|
|
genres = append(genres, t)
|
|
}
|
|
})
|
|
result.Genre = strings.Join(genres, ", ")
|
|
return result, nil
|
|
}
|
|
|
|
func removePathPrefix(href string) string {
|
|
return strings.TrimPrefix(href, "/index.php")
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var chapters []source.SChapter
|
|
doc.Find(".chapter__list-box > li").Each(func(_ int, el *goquery.Selection) {
|
|
link := el.Children().First()
|
|
href := removePathPrefix(link.AttrOr("href", ""))
|
|
if href == "" {
|
|
return
|
|
}
|
|
chapters = append(chapters, source.SChapter{
|
|
URL: href,
|
|
Name: strings.TrimSpace(link.Text()),
|
|
})
|
|
})
|
|
// Reverse to get descending order (latest first)
|
|
for i, j := 0, len(chapters)-1; i < j; i, j = i+1, j-1 {
|
|
chapters[i], chapters[j] = chapters[j], chapters[i]
|
|
}
|
|
return chapters, nil
|
|
}
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var pages []source.Page
|
|
if s.cfg.UseMobilePageList {
|
|
doc.Find(".comic-list img").Each(func(i int, img *goquery.Selection) {
|
|
u := img.AttrOr("src", "")
|
|
if u != "" {
|
|
pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)})
|
|
}
|
|
})
|
|
} else {
|
|
doc.Find(fmt.Sprintf("img[%s]", s.cfg.LazyLoadAttr)).Each(func(i int, img *goquery.Selection) {
|
|
u := img.AttrOr(s.cfg.LazyLoadAttr, "")
|
|
if u != "" {
|
|
pages = append(pages, source.Page{Index: i, ImageURL: u})
|
|
}
|
|
})
|
|
}
|
|
return pages, nil
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
|
|
func (s *Source) GetFilterList() []source.Filter { return nil }
|