Files
goyomi/sources/base/madara/madara.go
T
achmad 00e61480c3 fix(base): add override hooks for masonry, madara, keyoapp
Madara:
- Add PopularURL/LatestURL Config hooks for custom URL building
  (needed by hentai4free which uses search-based popular/latest URLs)

Masonry:
- Replace CSS :not(:has(a[href*=/video/])) with programmatic filtering.
  goquery/cascadia doesn't support :has() + attribute selectors
  (Jsoup does, hence Kotlin works but Go didn't)

Keyoapp:
- Add overridable selector fields (PopularSelector, DescriptionSelector,
  StatusSelector, AuthorSelector, ArtistSelector) to Config
2026-05-14 22:31:11 +07:00

457 lines
14 KiB
Go
Executable File

// Package madara implements the Madara WordPress theme multi-source base.
// Uses admin-ajax.php or /ajax/chapters for chapter lists; HTML scraping throughout.
package madara
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
"goyomi/internal/httpclient/flare"
"goyomi/internal/source"
"goyomi/sources/base/util"
)
// Config holds per-source configuration and overridable CSS selectors.
type Config struct {
Name string
BaseURL string
Lang string
// MangaSubString is the URL path segment for manga listings (default "manga").
MangaSubString string
// UseNewChapterEndpoint: use /ajax/chapters instead of admin-ajax.php.
UseNewChapterEndpoint bool
// PopularURL overrides the URL for GetPopularManga. If nil, default is used.
// Takes page number (1-indexed), returns full URL string.
PopularURL func(page int) string
// LatestURL overrides the URL for GetLatestUpdates. If nil, default is used.
LatestURL func(page int) string
// Overridable selectors — leave empty to use defaults.
PopularMangaSelector string
PopularMangaURLSelector string
SearchMangaSelector string
ChapterListSelector string
ChapterURLSelector string
ChapterDateSelector string
PageListParseSelector string
MangaDetailsSelectorTitle string
MangaDetailsSelectorAuthor string
MangaDetailsSelectorArtist string
MangaDetailsSelectorStatus string
MangaDetailsSelectorDesc string
MangaDetailsSelectorThumb string
MangaDetailsSelectorGenre string
}
func (c *Config) setDefaults() {
if c.MangaSubString == "" {
c.MangaSubString = "manga"
}
if c.PopularMangaSelector == "" {
c.PopularMangaSelector = "div.page-item-detail, .manga__item"
}
if c.PopularMangaURLSelector == "" {
c.PopularMangaURLSelector = "div.post-title a"
}
if c.SearchMangaSelector == "" {
c.SearchMangaSelector = "div.c-tabs-item__content, div.page-item-detail, .manga__item"
}
if c.ChapterListSelector == "" {
c.ChapterListSelector = "li.wp-manga-chapter"
}
if c.ChapterURLSelector == "" {
c.ChapterURLSelector = "a"
}
if c.ChapterDateSelector == "" {
c.ChapterDateSelector = "span.chapter-release-date"
}
if c.PageListParseSelector == "" {
c.PageListParseSelector = "div.page-break img, li.blocks-gallery-item img, .reading-content img"
}
if c.MangaDetailsSelectorTitle == "" {
c.MangaDetailsSelectorTitle = "div.post-title h3, div.post-title h1, #manga-title > h1"
}
if c.MangaDetailsSelectorAuthor == "" {
c.MangaDetailsSelectorAuthor = "div.author-content > a, div.manga-authors > a"
}
if c.MangaDetailsSelectorArtist == "" {
c.MangaDetailsSelectorArtist = "div.artist-content > a"
}
if c.MangaDetailsSelectorStatus == "" {
c.MangaDetailsSelectorStatus = "div.summary-content, div.summary-heading:contains(Status) + div"
}
if c.MangaDetailsSelectorDesc == "" {
c.MangaDetailsSelectorDesc = "div.description-summary div.summary__content, div.summary_content div.post-content_item > h5 + div"
}
if c.MangaDetailsSelectorThumb == "" {
c.MangaDetailsSelectorThumb = "div.summary_image img"
}
if c.MangaDetailsSelectorGenre == "" {
c.MangaDetailsSelectorGenre = "div.genres-content a"
}
}
// Source implements source.CatalogueSource for Madara-based sites.
type Source struct {
cfg Config
client *flare.Client
id int64
}
func New(cfg Config) *Source {
cfg.setDefaults()
opts := []flare.Option{flare.WithRateLimit(1, 2)}
c := flare.NewClient(opts...)
return &Source{
cfg: cfg,
client: c,
id: source.GenerateSourceID(cfg.Name, cfg.Lang),
}
}
func (s *Source) ID() int64 { return s.id }
func (s *Source) Name() string { return s.cfg.Name }
func (s *Source) Lang() string { return s.cfg.Lang }
func (s *Source) SupportsLatest() bool { return true }
// --- HTTP helpers ---
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("madara: HTTP %d for %s", resp.StatusCode, rawURL)
}
return goquery.NewDocumentFromReader(resp.Body)
}
func (s *Source) post(ctx context.Context, rawURL string, form url.Values) (*goquery.Document, error) {
encoded := form.Encode()
req, err := http.NewRequestWithContext(ctx, http.MethodPost, rawURL,
strings.NewReader(encoded))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("X-Requested-With", "XMLHttpRequest")
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("madara: HTTP %d: %s", resp.StatusCode, string(body))
}
return goquery.NewDocumentFromReader(resp.Body)
}
// searchPage returns the URL path component for page n.
func (s *Source) searchPage(page int) string {
if page == 1 {
return ""
}
return fmt.Sprintf("page/%d/", page)
}
func (s *Source) parseMangaFromElement(el *goquery.Selection) source.SManga {
manga := source.SManga{}
el.Find(s.cfg.PopularMangaURLSelector).First().Each(func(_ int, a *goquery.Selection) {
if href, ok := a.Attr("href"); ok {
manga.URL = stripDomain(href, s.cfg.BaseURL)
}
manga.Title = strings.TrimSpace(a.Text())
})
el.Find("img").First().Each(func(_ int, img *goquery.Selection) {
manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
})
return manga
}
func (s *Source) parseSearchMangaFromElement(el *goquery.Selection) source.SManga {
manga := source.SManga{}
el.Find("div.post-title a, h3.h5 a").First().Each(func(_ int, a *goquery.Selection) {
if href, ok := a.Attr("href"); ok {
manga.URL = stripDomain(href, s.cfg.BaseURL)
}
manga.Title = strings.TrimSpace(a.Text())
})
el.Find("img").First().Each(func(_ int, img *goquery.Selection) {
manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
})
return manga
}
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
rawURL := s.popularURL(page)
doc, err := s.get(context.Background(), rawURL)
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangaList(doc, s.cfg.PopularMangaSelector, true), nil
}
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
rawURL := s.latestURL(page)
doc, err := s.get(context.Background(), rawURL)
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangaList(doc, s.cfg.PopularMangaSelector, true), nil
}
func (s *Source) popularURL(page int) string {
if s.cfg.PopularURL != nil {
return s.cfg.PopularURL(page)
}
pageStr := s.searchPage(page)
return fmt.Sprintf("%s/%s/%s?m_orderby=views",
strings.TrimRight(s.cfg.BaseURL, "/"), s.cfg.MangaSubString, pageStr)
}
func (s *Source) latestURL(page int) string {
if s.cfg.LatestURL != nil {
return s.cfg.LatestURL(page)
}
pageStr := s.searchPage(page)
return fmt.Sprintf("%s/%s/%s?m_orderby=latest",
strings.TrimRight(s.cfg.BaseURL, "/"), s.cfg.MangaSubString, pageStr)
}
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
base := strings.TrimRight(s.cfg.BaseURL, "/")
searchURL := fmt.Sprintf("%s/?s=%s&post_type=wp-manga&paged=%d",
base, url.QueryEscape(query), page)
doc, err := s.get(context.Background(), searchURL)
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangaList(doc, s.cfg.SearchMangaSelector, false), nil
}
func (s *Source) parseMangaList(doc *goquery.Document, selector string, popular bool) source.MangasPage {
var mangas []source.SManga
doc.Find(selector).Each(func(_ int, el *goquery.Selection) {
var m source.SManga
if popular {
m = s.parseMangaFromElement(el)
} else {
m = s.parseSearchMangaFromElement(el)
}
if m.URL != "" {
mangas = append(mangas, m)
}
})
hasNext := doc.Find("div.nav-previous, nav.navigation-ajax, a.nextpostslink").Length() > 0
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
}
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
rawURL := util.AbsURL(s.cfg.BaseURL, manga.URL)
doc, err := s.get(context.Background(), rawURL)
if err != nil {
return manga, err
}
return s.parseMangaDetails(doc, manga.URL), nil
}
func (s *Source) parseMangaDetails(doc *goquery.Document, mangaURL string) source.SManga {
manga := source.SManga{URL: mangaURL}
doc.Find(s.cfg.MangaDetailsSelectorTitle).First().Each(func(_ int, el *goquery.Selection) {
manga.Title = strings.TrimSpace(el.Text())
})
var authors []string
doc.Find(s.cfg.MangaDetailsSelectorAuthor).Each(func(_ int, el *goquery.Selection) {
if t := strings.TrimSpace(el.Text()); t != "" {
authors = append(authors, t)
}
})
manga.Author = strings.Join(authors, ", ")
var artists []string
doc.Find(s.cfg.MangaDetailsSelectorArtist).Each(func(_ int, el *goquery.Selection) {
if t := strings.TrimSpace(el.Text()); t != "" {
artists = append(artists, t)
}
})
manga.Artist = strings.Join(artists, ", ")
doc.Find(s.cfg.MangaDetailsSelectorDesc).First().Each(func(_ int, el *goquery.Selection) {
manga.Description = strings.TrimSpace(el.Text())
})
doc.Find(s.cfg.MangaDetailsSelectorThumb).First().Each(func(_ int, img *goquery.Selection) {
manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
})
doc.Find(s.cfg.MangaDetailsSelectorStatus).Last().Each(func(_ int, el *goquery.Selection) {
manga.Status = util.StatusFromString(el.Text())
})
var genres []string
doc.Find(s.cfg.MangaDetailsSelectorGenre).Each(func(_ int, el *goquery.Selection) {
if t := strings.TrimSpace(el.Text()); t != "" {
genres = append(genres, t)
}
})
manga.Genre = strings.Join(genres, ", ")
return manga
}
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
mangaURL := strings.TrimRight(util.AbsURL(s.cfg.BaseURL, manga.URL), "/")
doc, err := s.get(context.Background(), mangaURL)
if err != nil {
return nil, err
}
// Try inline chapter list first
chapterEls := doc.Find(s.cfg.ChapterListSelector)
if chapterEls.Length() == 0 {
// Need AJAX fetch
chapHolder := doc.Find("div[id^=manga-chapters-holder]")
if chapHolder.Length() > 0 {
mangaID, _ := chapHolder.Attr("data-id")
ajaxDoc, ajaxErr := s.fetchChaptersAJAX(mangaURL, mangaID)
if ajaxErr != nil {
return nil, ajaxErr
}
chapterEls = ajaxDoc.Find(s.cfg.ChapterListSelector)
}
}
var chapters []source.SChapter
chapterEls.Each(func(i int, el *goquery.Selection) {
ch := source.SChapter{}
el.Find(s.cfg.ChapterURLSelector).First().Each(func(_ int, a *goquery.Selection) {
if href, ok := a.Attr("href"); ok {
ch.URL = stripDomain(href+"?style=list", s.cfg.BaseURL)
}
ch.Name = strings.TrimSpace(a.Text())
})
el.Find(s.cfg.ChapterDateSelector).First().Each(func(_ int, span *goquery.Selection) {
ch.DateUpload = util.ParseRelativeDate(span.Text())
})
if ch.URL != "" {
chapters = append(chapters, ch)
}
})
return chapters, nil
}
func (s *Source) fetchChaptersAJAX(mangaURL, mangaID string) (*goquery.Document, error) {
ctx := context.Background()
if s.cfg.UseNewChapterEndpoint {
return s.post(ctx, mangaURL+"/ajax/chapters", url.Values{})
}
form := url.Values{
"action": {"manga_get_chapters"},
"manga": {mangaID},
}
doc, err := s.post(ctx, s.cfg.BaseURL+"/wp-admin/admin-ajax.php", form)
if err != nil {
// Fallback to new endpoint
return s.post(ctx, mangaURL+"/ajax/chapters", url.Values{})
}
return doc, nil
}
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
rawURL := util.AbsURL(s.cfg.BaseURL, chapter.URL)
doc, err := s.get(context.Background(), rawURL)
if err != nil {
return nil, err
}
// Check for chapter protector (AES-encrypted pages)
if doc.Find("#chapter-protector-data").Length() > 0 {
return s.parseProtectedPages(doc, rawURL)
}
var pages []source.Page
doc.Find(s.cfg.PageListParseSelector).Each(func(i int, img *goquery.Selection) {
imgURL := imgAttr(img, s.cfg.BaseURL)
if imgURL != "" {
pages = append(pages, source.Page{Index: i, URL: rawURL, ImageURL: imgURL})
}
})
return pages, nil
}
func (s *Source) parseProtectedPages(doc *goquery.Document, pageURL string) ([]source.Page, error) {
// Extract JSON image array from chapter-protector-data
data := doc.Find("#chapter-protector-data").Text()
// Look for image array in the data JSON
var result struct {
Images []string `json:"arrayofimages"`
}
if err := json.Unmarshal([]byte(data), &result); err == nil && len(result.Images) > 0 {
pages := make([]source.Page, len(result.Images))
for i, img := range result.Images {
pages[i] = source.Page{Index: i, URL: pageURL, ImageURL: util.AbsURL(s.cfg.BaseURL, img)}
}
return pages, nil
}
return nil, fmt.Errorf("madara: could not parse protected chapter pages")
}
func (s *Source) GetImageURL(page source.Page) (string, error) {
return page.ImageURL, nil
}
func (s *Source) GetFilterList() []source.Filter {
return nil
}
// --- helpers ---
// stripDomain removes the base URL scheme+host from an absolute URL, leaving /path.
func stripDomain(href, baseURL string) string {
parsed, err := url.Parse(href)
if err != nil || !parsed.IsAbs() {
return href
}
base, err := url.Parse(baseURL)
if err != nil {
return href
}
if parsed.Host != base.Host {
return href
}
return parsed.RequestURI()
}
// imgAttr returns the best image URL from common lazy-load attributes.
func imgAttr(img *goquery.Selection, baseURL string) string {
for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "data-manga-src", "src"} {
if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") {
return util.AbsURL(baseURL, v)
}
}
return ""
}