Files
goyomi/sources/base/madara/madara.go
T
2026-05-11 06:48:23 +00:00

437 lines
13 KiB
Go
Executable File

// Package madara implements the Madara WordPress theme multi-source base.
// Uses admin-ajax.php or /ajax/chapters for chapter lists; HTML scraping throughout.
package madara
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
"goyomi/internal/httpclient"
"goyomi/internal/source"
"goyomi/sources/base/util"
)
// Config holds per-source configuration and overridable CSS selectors.
type Config struct {
Name string
BaseURL string
Lang string
// MangaSubString is the URL path segment for manga listings (default "manga").
MangaSubString string
// UseNewChapterEndpoint: use /ajax/chapters instead of admin-ajax.php.
UseNewChapterEndpoint bool
// Overridable selectors — leave empty to use defaults.
PopularMangaSelector string
PopularMangaURLSelector string
SearchMangaSelector string
ChapterListSelector string
ChapterURLSelector string
ChapterDateSelector string
PageListParseSelector string
MangaDetailsSelectorTitle string
MangaDetailsSelectorAuthor string
MangaDetailsSelectorArtist string
MangaDetailsSelectorStatus string
MangaDetailsSelectorDesc string
MangaDetailsSelectorThumb string
MangaDetailsSelectorGenre string
}
func (c *Config) setDefaults() {
if c.MangaSubString == "" {
c.MangaSubString = "manga"
}
if c.PopularMangaSelector == "" {
c.PopularMangaSelector = "div.page-item-detail, .manga__item"
}
if c.PopularMangaURLSelector == "" {
c.PopularMangaURLSelector = "div.post-title a"
}
if c.SearchMangaSelector == "" {
c.SearchMangaSelector = "div.c-tabs-item__content, div.page-item-detail, .manga__item"
}
if c.ChapterListSelector == "" {
c.ChapterListSelector = "li.wp-manga-chapter"
}
if c.ChapterURLSelector == "" {
c.ChapterURLSelector = "a"
}
if c.ChapterDateSelector == "" {
c.ChapterDateSelector = "span.chapter-release-date"
}
if c.PageListParseSelector == "" {
c.PageListParseSelector = "div.page-break img, li.blocks-gallery-item img, .reading-content img"
}
if c.MangaDetailsSelectorTitle == "" {
c.MangaDetailsSelectorTitle = "div.post-title h3, div.post-title h1, #manga-title > h1"
}
if c.MangaDetailsSelectorAuthor == "" {
c.MangaDetailsSelectorAuthor = "div.author-content > a, div.manga-authors > a"
}
if c.MangaDetailsSelectorArtist == "" {
c.MangaDetailsSelectorArtist = "div.artist-content > a"
}
if c.MangaDetailsSelectorStatus == "" {
c.MangaDetailsSelectorStatus = "div.summary-content, div.summary-heading:contains(Status) + div"
}
if c.MangaDetailsSelectorDesc == "" {
c.MangaDetailsSelectorDesc = "div.description-summary div.summary__content, div.summary_content div.post-content_item > h5 + div"
}
if c.MangaDetailsSelectorThumb == "" {
c.MangaDetailsSelectorThumb = "div.summary_image img"
}
if c.MangaDetailsSelectorGenre == "" {
c.MangaDetailsSelectorGenre = "div.genres-content a"
}
}
// Source implements source.CatalogueSource for Madara-based sites.
type Source struct {
cfg Config
client *httpclient.Client
id int64
}
func New(cfg Config) *Source {
cfg.setDefaults()
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
return &Source{
cfg: cfg,
client: c,
id: source.GenerateSourceID(cfg.Name, cfg.Lang),
}
}
func (s *Source) ID() int64 { return s.id }
func (s *Source) Name() string { return s.cfg.Name }
func (s *Source) Lang() string { return s.cfg.Lang }
func (s *Source) SupportsLatest() bool { return true }
// --- HTTP helpers ---
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("madara: HTTP %d for %s", resp.StatusCode, rawURL)
}
return goquery.NewDocumentFromReader(resp.Body)
}
func (s *Source) post(ctx context.Context, rawURL string, form url.Values) (*goquery.Document, error) {
encoded := form.Encode()
req, err := http.NewRequestWithContext(ctx, http.MethodPost, rawURL,
strings.NewReader(encoded))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("X-Requested-With", "XMLHttpRequest")
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("madara: HTTP %d: %s", resp.StatusCode, string(body))
}
return goquery.NewDocumentFromReader(resp.Body)
}
// searchPage returns the URL path component for page n.
func (s *Source) searchPage(page int) string {
if page == 1 {
return ""
}
return fmt.Sprintf("page/%d/", page)
}
func (s *Source) parseMangaFromElement(el *goquery.Selection) source.SManga {
manga := source.SManga{}
el.Find(s.cfg.PopularMangaURLSelector).First().Each(func(_ int, a *goquery.Selection) {
if href, ok := a.Attr("href"); ok {
manga.URL = stripDomain(href, s.cfg.BaseURL)
}
manga.Title = strings.TrimSpace(a.Text())
})
el.Find("img").First().Each(func(_ int, img *goquery.Selection) {
manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
})
return manga
}
func (s *Source) parseSearchMangaFromElement(el *goquery.Selection) source.SManga {
manga := source.SManga{}
el.Find("div.post-title a, h3.h5 a").First().Each(func(_ int, a *goquery.Selection) {
if href, ok := a.Attr("href"); ok {
manga.URL = stripDomain(href, s.cfg.BaseURL)
}
manga.Title = strings.TrimSpace(a.Text())
})
el.Find("img").First().Each(func(_ int, img *goquery.Selection) {
manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
})
return manga
}
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
pageStr := s.searchPage(page)
rawURL := fmt.Sprintf("%s/%s/%s?m_orderby=views",
strings.TrimRight(s.cfg.BaseURL, "/"), s.cfg.MangaSubString, pageStr)
doc, err := s.get(context.Background(), rawURL)
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangaList(doc, s.cfg.PopularMangaSelector, true), nil
}
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
pageStr := s.searchPage(page)
rawURL := fmt.Sprintf("%s/%s/%s?m_orderby=latest",
strings.TrimRight(s.cfg.BaseURL, "/"), s.cfg.MangaSubString, pageStr)
doc, err := s.get(context.Background(), rawURL)
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangaList(doc, s.cfg.PopularMangaSelector, true), nil
}
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
base := strings.TrimRight(s.cfg.BaseURL, "/")
searchURL := fmt.Sprintf("%s/?s=%s&post_type=wp-manga&paged=%d",
base, url.QueryEscape(query), page)
doc, err := s.get(context.Background(), searchURL)
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangaList(doc, s.cfg.SearchMangaSelector, false), nil
}
func (s *Source) parseMangaList(doc *goquery.Document, selector string, popular bool) source.MangasPage {
var mangas []source.SManga
doc.Find(selector).Each(func(_ int, el *goquery.Selection) {
var m source.SManga
if popular {
m = s.parseMangaFromElement(el)
} else {
m = s.parseSearchMangaFromElement(el)
}
if m.URL != "" {
mangas = append(mangas, m)
}
})
hasNext := doc.Find("div.nav-previous, nav.navigation-ajax, a.nextpostslink").Length() > 0
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
}
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
rawURL := util.AbsURL(s.cfg.BaseURL, manga.URL)
doc, err := s.get(context.Background(), rawURL)
if err != nil {
return manga, err
}
return s.parseMangaDetails(doc, manga.URL), nil
}
func (s *Source) parseMangaDetails(doc *goquery.Document, mangaURL string) source.SManga {
manga := source.SManga{URL: mangaURL}
doc.Find(s.cfg.MangaDetailsSelectorTitle).First().Each(func(_ int, el *goquery.Selection) {
manga.Title = strings.TrimSpace(el.Text())
})
var authors []string
doc.Find(s.cfg.MangaDetailsSelectorAuthor).Each(func(_ int, el *goquery.Selection) {
if t := strings.TrimSpace(el.Text()); t != "" {
authors = append(authors, t)
}
})
manga.Author = strings.Join(authors, ", ")
var artists []string
doc.Find(s.cfg.MangaDetailsSelectorArtist).Each(func(_ int, el *goquery.Selection) {
if t := strings.TrimSpace(el.Text()); t != "" {
artists = append(artists, t)
}
})
manga.Artist = strings.Join(artists, ", ")
doc.Find(s.cfg.MangaDetailsSelectorDesc).First().Each(func(_ int, el *goquery.Selection) {
manga.Description = strings.TrimSpace(el.Text())
})
doc.Find(s.cfg.MangaDetailsSelectorThumb).First().Each(func(_ int, img *goquery.Selection) {
manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
})
doc.Find(s.cfg.MangaDetailsSelectorStatus).Last().Each(func(_ int, el *goquery.Selection) {
manga.Status = util.StatusFromString(el.Text())
})
var genres []string
doc.Find(s.cfg.MangaDetailsSelectorGenre).Each(func(_ int, el *goquery.Selection) {
if t := strings.TrimSpace(el.Text()); t != "" {
genres = append(genres, t)
}
})
manga.Genre = strings.Join(genres, ", ")
return manga
}
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
mangaURL := strings.TrimRight(util.AbsURL(s.cfg.BaseURL, manga.URL), "/")
doc, err := s.get(context.Background(), mangaURL)
if err != nil {
return nil, err
}
// Try inline chapter list first
chapterEls := doc.Find(s.cfg.ChapterListSelector)
if chapterEls.Length() == 0 {
// Need AJAX fetch
chapHolder := doc.Find("div[id^=manga-chapters-holder]")
if chapHolder.Length() > 0 {
mangaID, _ := chapHolder.Attr("data-id")
ajaxDoc, ajaxErr := s.fetchChaptersAJAX(mangaURL, mangaID)
if ajaxErr != nil {
return nil, ajaxErr
}
chapterEls = ajaxDoc.Find(s.cfg.ChapterListSelector)
}
}
var chapters []source.SChapter
chapterEls.Each(func(i int, el *goquery.Selection) {
ch := source.SChapter{}
el.Find(s.cfg.ChapterURLSelector).First().Each(func(_ int, a *goquery.Selection) {
if href, ok := a.Attr("href"); ok {
ch.URL = stripDomain(href+"?style=list", s.cfg.BaseURL)
}
ch.Name = strings.TrimSpace(a.Text())
})
el.Find(s.cfg.ChapterDateSelector).First().Each(func(_ int, span *goquery.Selection) {
ch.DateUpload = util.ParseRelativeDate(span.Text())
})
if ch.URL != "" {
chapters = append(chapters, ch)
}
})
return chapters, nil
}
func (s *Source) fetchChaptersAJAX(mangaURL, mangaID string) (*goquery.Document, error) {
ctx := context.Background()
if s.cfg.UseNewChapterEndpoint {
return s.post(ctx, mangaURL+"/ajax/chapters", url.Values{})
}
form := url.Values{
"action": {"manga_get_chapters"},
"manga": {mangaID},
}
doc, err := s.post(ctx, s.cfg.BaseURL+"/wp-admin/admin-ajax.php", form)
if err != nil {
// Fallback to new endpoint
return s.post(ctx, mangaURL+"/ajax/chapters", url.Values{})
}
return doc, nil
}
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
rawURL := util.AbsURL(s.cfg.BaseURL, chapter.URL)
doc, err := s.get(context.Background(), rawURL)
if err != nil {
return nil, err
}
// Check for chapter protector (AES-encrypted pages)
if doc.Find("#chapter-protector-data").Length() > 0 {
return s.parseProtectedPages(doc, rawURL)
}
var pages []source.Page
doc.Find(s.cfg.PageListParseSelector).Each(func(i int, img *goquery.Selection) {
imgURL := imgAttr(img, s.cfg.BaseURL)
if imgURL != "" {
pages = append(pages, source.Page{Index: i, URL: rawURL, ImageURL: imgURL})
}
})
return pages, nil
}
func (s *Source) parseProtectedPages(doc *goquery.Document, pageURL string) ([]source.Page, error) {
// Extract JSON image array from chapter-protector-data
data := doc.Find("#chapter-protector-data").Text()
// Look for image array in the data JSON
var result struct {
Images []string `json:"arrayofimages"`
}
if err := json.Unmarshal([]byte(data), &result); err == nil && len(result.Images) > 0 {
pages := make([]source.Page, len(result.Images))
for i, img := range result.Images {
pages[i] = source.Page{Index: i, URL: pageURL, ImageURL: util.AbsURL(s.cfg.BaseURL, img)}
}
return pages, nil
}
return nil, fmt.Errorf("madara: could not parse protected chapter pages")
}
func (s *Source) GetImageURL(page source.Page) (string, error) {
return page.ImageURL, nil
}
func (s *Source) GetFilterList() []source.Filter {
return nil
}
// --- helpers ---
// stripDomain removes the base URL scheme+host from an absolute URL, leaving /path.
func stripDomain(href, baseURL string) string {
parsed, err := url.Parse(href)
if err != nil || !parsed.IsAbs() {
return href
}
base, err := url.Parse(baseURL)
if err != nil {
return href
}
if parsed.Host != base.Host {
return href
}
return parsed.RequestURI()
}
// imgAttr returns the best image URL from common lazy-load attributes.
func imgAttr(img *goquery.Selection, baseURL string) string {
for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "data-manga-src", "src"} {
if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") {
return util.AbsURL(baseURL, v)
}
}
return ""
}