// Package madara implements the Madara WordPress theme multi-source base. // Uses admin-ajax.php or /ajax/chapters for chapter lists; HTML scraping throughout. package madara import ( "context" "encoding/json" "fmt" "io" "net/http" "net/url" "strings" "github.com/PuerkitoBio/goquery" "goyomi/internal/httpclient" "goyomi/internal/source" "goyomi/sources/base/util" ) // Config holds per-source configuration and overridable CSS selectors. type Config struct { Name string BaseURL string Lang string // MangaSubString is the URL path segment for manga listings (default "manga"). MangaSubString string // UseNewChapterEndpoint: use /ajax/chapters instead of admin-ajax.php. UseNewChapterEndpoint bool // Overridable selectors — leave empty to use defaults. PopularMangaSelector string PopularMangaURLSelector string SearchMangaSelector string ChapterListSelector string ChapterURLSelector string ChapterDateSelector string PageListParseSelector string MangaDetailsSelectorTitle string MangaDetailsSelectorAuthor string MangaDetailsSelectorArtist string MangaDetailsSelectorStatus string MangaDetailsSelectorDesc string MangaDetailsSelectorThumb string MangaDetailsSelectorGenre string } func (c *Config) setDefaults() { if c.MangaSubString == "" { c.MangaSubString = "manga" } if c.PopularMangaSelector == "" { c.PopularMangaSelector = "div.page-item-detail, .manga__item" } if c.PopularMangaURLSelector == "" { c.PopularMangaURLSelector = "div.post-title a" } if c.SearchMangaSelector == "" { c.SearchMangaSelector = "div.c-tabs-item__content, div.page-item-detail, .manga__item" } if c.ChapterListSelector == "" { c.ChapterListSelector = "li.wp-manga-chapter" } if c.ChapterURLSelector == "" { c.ChapterURLSelector = "a" } if c.ChapterDateSelector == "" { c.ChapterDateSelector = "span.chapter-release-date" } if c.PageListParseSelector == "" { c.PageListParseSelector = "div.page-break img, li.blocks-gallery-item img, .reading-content img" } if c.MangaDetailsSelectorTitle == "" { c.MangaDetailsSelectorTitle = "div.post-title h3, div.post-title h1, #manga-title > h1" } if c.MangaDetailsSelectorAuthor == "" { c.MangaDetailsSelectorAuthor = "div.author-content > a, div.manga-authors > a" } if c.MangaDetailsSelectorArtist == "" { c.MangaDetailsSelectorArtist = "div.artist-content > a" } if c.MangaDetailsSelectorStatus == "" { c.MangaDetailsSelectorStatus = "div.summary-content, div.summary-heading:contains(Status) + div" } if c.MangaDetailsSelectorDesc == "" { c.MangaDetailsSelectorDesc = "div.description-summary div.summary__content, div.summary_content div.post-content_item > h5 + div" } if c.MangaDetailsSelectorThumb == "" { c.MangaDetailsSelectorThumb = "div.summary_image img" } if c.MangaDetailsSelectorGenre == "" { c.MangaDetailsSelectorGenre = "div.genres-content a" } } // Source implements source.CatalogueSource for Madara-based sites. type Source struct { cfg Config client *httpclient.Client id int64 } func New(cfg Config) *Source { cfg.setDefaults() c := httpclient.NewClient(httpclient.WithRateLimit(1, 2)) return &Source{ cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang), } } func (s *Source) ID() int64 { return s.id } func (s *Source) Name() string { return s.cfg.Name } func (s *Source) Lang() string { return s.cfg.Lang } func (s *Source) SupportsLatest() bool { return true } // --- HTTP helpers --- func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) if err != nil { return nil, err } req.Header.Set("Referer", s.cfg.BaseURL+"/") resp, err := s.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("madara: HTTP %d for %s", resp.StatusCode, rawURL) } return goquery.NewDocumentFromReader(resp.Body) } func (s *Source) post(ctx context.Context, rawURL string, form url.Values) (*goquery.Document, error) { encoded := form.Encode() req, err := http.NewRequestWithContext(ctx, http.MethodPost, rawURL, strings.NewReader(encoded)) if err != nil { return nil, err } req.Header.Set("Content-Type", "application/x-www-form-urlencoded") req.Header.Set("X-Requested-With", "XMLHttpRequest") req.Header.Set("Referer", s.cfg.BaseURL+"/") resp, err := s.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("madara: HTTP %d: %s", resp.StatusCode, string(body)) } return goquery.NewDocumentFromReader(resp.Body) } // searchPage returns the URL path component for page n. func (s *Source) searchPage(page int) string { if page == 1 { return "" } return fmt.Sprintf("page/%d/", page) } func (s *Source) parseMangaFromElement(el *goquery.Selection) source.SManga { manga := source.SManga{} el.Find(s.cfg.PopularMangaURLSelector).First().Each(func(_ int, a *goquery.Selection) { if href, ok := a.Attr("href"); ok { manga.URL = stripDomain(href, s.cfg.BaseURL) } manga.Title = strings.TrimSpace(a.Text()) }) el.Find("img").First().Each(func(_ int, img *goquery.Selection) { manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL) }) return manga } func (s *Source) parseSearchMangaFromElement(el *goquery.Selection) source.SManga { manga := source.SManga{} el.Find("div.post-title a, h3.h5 a").First().Each(func(_ int, a *goquery.Selection) { if href, ok := a.Attr("href"); ok { manga.URL = stripDomain(href, s.cfg.BaseURL) } manga.Title = strings.TrimSpace(a.Text()) }) el.Find("img").First().Each(func(_ int, img *goquery.Selection) { manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL) }) return manga } func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { pageStr := s.searchPage(page) rawURL := fmt.Sprintf("%s/%s/%s?m_orderby=views", strings.TrimRight(s.cfg.BaseURL, "/"), s.cfg.MangaSubString, pageStr) doc, err := s.get(context.Background(), rawURL) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc, s.cfg.PopularMangaSelector, true), nil } func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { pageStr := s.searchPage(page) rawURL := fmt.Sprintf("%s/%s/%s?m_orderby=latest", strings.TrimRight(s.cfg.BaseURL, "/"), s.cfg.MangaSubString, pageStr) doc, err := s.get(context.Background(), rawURL) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc, s.cfg.PopularMangaSelector, true), nil } func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { base := strings.TrimRight(s.cfg.BaseURL, "/") searchURL := fmt.Sprintf("%s/?s=%s&post_type=wp-manga&paged=%d", base, url.QueryEscape(query), page) doc, err := s.get(context.Background(), searchURL) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc, s.cfg.SearchMangaSelector, false), nil } func (s *Source) parseMangaList(doc *goquery.Document, selector string, popular bool) source.MangasPage { var mangas []source.SManga doc.Find(selector).Each(func(_ int, el *goquery.Selection) { var m source.SManga if popular { m = s.parseMangaFromElement(el) } else { m = s.parseSearchMangaFromElement(el) } if m.URL != "" { mangas = append(mangas, m) } }) hasNext := doc.Find("div.nav-previous, nav.navigation-ajax, a.nextpostslink").Length() > 0 return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} } func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { rawURL := util.AbsURL(s.cfg.BaseURL, manga.URL) doc, err := s.get(context.Background(), rawURL) if err != nil { return manga, err } return s.parseMangaDetails(doc, manga.URL), nil } func (s *Source) parseMangaDetails(doc *goquery.Document, mangaURL string) source.SManga { manga := source.SManga{URL: mangaURL} doc.Find(s.cfg.MangaDetailsSelectorTitle).First().Each(func(_ int, el *goquery.Selection) { manga.Title = strings.TrimSpace(el.Text()) }) var authors []string doc.Find(s.cfg.MangaDetailsSelectorAuthor).Each(func(_ int, el *goquery.Selection) { if t := strings.TrimSpace(el.Text()); t != "" { authors = append(authors, t) } }) manga.Author = strings.Join(authors, ", ") var artists []string doc.Find(s.cfg.MangaDetailsSelectorArtist).Each(func(_ int, el *goquery.Selection) { if t := strings.TrimSpace(el.Text()); t != "" { artists = append(artists, t) } }) manga.Artist = strings.Join(artists, ", ") doc.Find(s.cfg.MangaDetailsSelectorDesc).First().Each(func(_ int, el *goquery.Selection) { manga.Description = strings.TrimSpace(el.Text()) }) doc.Find(s.cfg.MangaDetailsSelectorThumb).First().Each(func(_ int, img *goquery.Selection) { manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL) }) doc.Find(s.cfg.MangaDetailsSelectorStatus).Last().Each(func(_ int, el *goquery.Selection) { manga.Status = util.StatusFromString(el.Text()) }) var genres []string doc.Find(s.cfg.MangaDetailsSelectorGenre).Each(func(_ int, el *goquery.Selection) { if t := strings.TrimSpace(el.Text()); t != "" { genres = append(genres, t) } }) manga.Genre = strings.Join(genres, ", ") return manga } func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { mangaURL := strings.TrimRight(util.AbsURL(s.cfg.BaseURL, manga.URL), "/") doc, err := s.get(context.Background(), mangaURL) if err != nil { return nil, err } // Try inline chapter list first chapterEls := doc.Find(s.cfg.ChapterListSelector) if chapterEls.Length() == 0 { // Need AJAX fetch chapHolder := doc.Find("div[id^=manga-chapters-holder]") if chapHolder.Length() > 0 { mangaID, _ := chapHolder.Attr("data-id") ajaxDoc, ajaxErr := s.fetchChaptersAJAX(mangaURL, mangaID) if ajaxErr != nil { return nil, ajaxErr } chapterEls = ajaxDoc.Find(s.cfg.ChapterListSelector) } } var chapters []source.SChapter chapterEls.Each(func(i int, el *goquery.Selection) { ch := source.SChapter{} el.Find(s.cfg.ChapterURLSelector).First().Each(func(_ int, a *goquery.Selection) { if href, ok := a.Attr("href"); ok { ch.URL = stripDomain(href+"?style=list", s.cfg.BaseURL) } ch.Name = strings.TrimSpace(a.Text()) }) el.Find(s.cfg.ChapterDateSelector).First().Each(func(_ int, span *goquery.Selection) { ch.DateUpload = util.ParseRelativeDate(span.Text()) }) if ch.URL != "" { chapters = append(chapters, ch) } }) return chapters, nil } func (s *Source) fetchChaptersAJAX(mangaURL, mangaID string) (*goquery.Document, error) { ctx := context.Background() if s.cfg.UseNewChapterEndpoint { return s.post(ctx, mangaURL+"/ajax/chapters", url.Values{}) } form := url.Values{ "action": {"manga_get_chapters"}, "manga": {mangaID}, } doc, err := s.post(ctx, s.cfg.BaseURL+"/wp-admin/admin-ajax.php", form) if err != nil { // Fallback to new endpoint return s.post(ctx, mangaURL+"/ajax/chapters", url.Values{}) } return doc, nil } func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { rawURL := util.AbsURL(s.cfg.BaseURL, chapter.URL) doc, err := s.get(context.Background(), rawURL) if err != nil { return nil, err } // Check for chapter protector (AES-encrypted pages) if doc.Find("#chapter-protector-data").Length() > 0 { return s.parseProtectedPages(doc, rawURL) } var pages []source.Page doc.Find(s.cfg.PageListParseSelector).Each(func(i int, img *goquery.Selection) { imgURL := imgAttr(img, s.cfg.BaseURL) if imgURL != "" { pages = append(pages, source.Page{Index: i, URL: rawURL, ImageURL: imgURL}) } }) return pages, nil } func (s *Source) parseProtectedPages(doc *goquery.Document, pageURL string) ([]source.Page, error) { // Extract JSON image array from chapter-protector-data data := doc.Find("#chapter-protector-data").Text() // Look for image array in the data JSON var result struct { Images []string `json:"arrayofimages"` } if err := json.Unmarshal([]byte(data), &result); err == nil && len(result.Images) > 0 { pages := make([]source.Page, len(result.Images)) for i, img := range result.Images { pages[i] = source.Page{Index: i, URL: pageURL, ImageURL: util.AbsURL(s.cfg.BaseURL, img)} } return pages, nil } return nil, fmt.Errorf("madara: could not parse protected chapter pages") } func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } func (s *Source) GetFilterList() []source.Filter { return nil } // --- helpers --- // stripDomain removes the base URL scheme+host from an absolute URL, leaving /path. func stripDomain(href, baseURL string) string { parsed, err := url.Parse(href) if err != nil || !parsed.IsAbs() { return href } base, err := url.Parse(baseURL) if err != nil { return href } if parsed.Host != base.Host { return href } return parsed.RequestURI() } // imgAttr returns the best image URL from common lazy-load attributes. func imgAttr(img *goquery.Selection, baseURL string) string { for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "data-manga-src", "src"} { if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") { return util.AbsURL(baseURL, v) } } return "" }