437 lines
13 KiB
Go
Executable File
437 lines
13 KiB
Go
Executable File
// Package madara implements the Madara WordPress theme multi-source base.
|
|
// Uses admin-ajax.php or /ajax/chapters for chapter lists; HTML scraping throughout.
|
|
package madara
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient"
|
|
"goyomi/internal/source"
|
|
"goyomi/sources/base/util"
|
|
)
|
|
|
|
// Config holds per-source configuration and overridable CSS selectors.
|
|
type Config struct {
|
|
Name string
|
|
BaseURL string
|
|
Lang string
|
|
|
|
// MangaSubString is the URL path segment for manga listings (default "manga").
|
|
MangaSubString string
|
|
|
|
// UseNewChapterEndpoint: use /ajax/chapters instead of admin-ajax.php.
|
|
UseNewChapterEndpoint bool
|
|
|
|
// Overridable selectors — leave empty to use defaults.
|
|
PopularMangaSelector string
|
|
PopularMangaURLSelector string
|
|
SearchMangaSelector string
|
|
ChapterListSelector string
|
|
ChapterURLSelector string
|
|
ChapterDateSelector string
|
|
PageListParseSelector string
|
|
MangaDetailsSelectorTitle string
|
|
MangaDetailsSelectorAuthor string
|
|
MangaDetailsSelectorArtist string
|
|
MangaDetailsSelectorStatus string
|
|
MangaDetailsSelectorDesc string
|
|
MangaDetailsSelectorThumb string
|
|
MangaDetailsSelectorGenre string
|
|
}
|
|
|
|
func (c *Config) setDefaults() {
|
|
if c.MangaSubString == "" {
|
|
c.MangaSubString = "manga"
|
|
}
|
|
if c.PopularMangaSelector == "" {
|
|
c.PopularMangaSelector = "div.page-item-detail, .manga__item"
|
|
}
|
|
if c.PopularMangaURLSelector == "" {
|
|
c.PopularMangaURLSelector = "div.post-title a"
|
|
}
|
|
if c.SearchMangaSelector == "" {
|
|
c.SearchMangaSelector = "div.c-tabs-item__content, div.page-item-detail, .manga__item"
|
|
}
|
|
if c.ChapterListSelector == "" {
|
|
c.ChapterListSelector = "li.wp-manga-chapter"
|
|
}
|
|
if c.ChapterURLSelector == "" {
|
|
c.ChapterURLSelector = "a"
|
|
}
|
|
if c.ChapterDateSelector == "" {
|
|
c.ChapterDateSelector = "span.chapter-release-date"
|
|
}
|
|
if c.PageListParseSelector == "" {
|
|
c.PageListParseSelector = "div.page-break img, li.blocks-gallery-item img, .reading-content img"
|
|
}
|
|
if c.MangaDetailsSelectorTitle == "" {
|
|
c.MangaDetailsSelectorTitle = "div.post-title h3, div.post-title h1, #manga-title > h1"
|
|
}
|
|
if c.MangaDetailsSelectorAuthor == "" {
|
|
c.MangaDetailsSelectorAuthor = "div.author-content > a, div.manga-authors > a"
|
|
}
|
|
if c.MangaDetailsSelectorArtist == "" {
|
|
c.MangaDetailsSelectorArtist = "div.artist-content > a"
|
|
}
|
|
if c.MangaDetailsSelectorStatus == "" {
|
|
c.MangaDetailsSelectorStatus = "div.summary-content, div.summary-heading:contains(Status) + div"
|
|
}
|
|
if c.MangaDetailsSelectorDesc == "" {
|
|
c.MangaDetailsSelectorDesc = "div.description-summary div.summary__content, div.summary_content div.post-content_item > h5 + div"
|
|
}
|
|
if c.MangaDetailsSelectorThumb == "" {
|
|
c.MangaDetailsSelectorThumb = "div.summary_image img"
|
|
}
|
|
if c.MangaDetailsSelectorGenre == "" {
|
|
c.MangaDetailsSelectorGenre = "div.genres-content a"
|
|
}
|
|
}
|
|
|
|
// Source implements source.CatalogueSource for Madara-based sites.
|
|
type Source struct {
|
|
cfg Config
|
|
client *httpclient.Client
|
|
id int64
|
|
}
|
|
|
|
func New(cfg Config) *Source {
|
|
cfg.setDefaults()
|
|
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
|
|
return &Source{
|
|
cfg: cfg,
|
|
client: c,
|
|
id: source.GenerateSourceID(cfg.Name, cfg.Lang),
|
|
}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.cfg.Name }
|
|
func (s *Source) Lang() string { return s.cfg.Lang }
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
// --- HTTP helpers ---
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("madara: HTTP %d for %s", resp.StatusCode, rawURL)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func (s *Source) post(ctx context.Context, rawURL string, form url.Values) (*goquery.Document, error) {
|
|
encoded := form.Encode()
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, rawURL,
|
|
strings.NewReader(encoded))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
|
req.Header.Set("X-Requested-With", "XMLHttpRequest")
|
|
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(resp.Body)
|
|
return nil, fmt.Errorf("madara: HTTP %d: %s", resp.StatusCode, string(body))
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
// searchPage returns the URL path component for page n.
|
|
func (s *Source) searchPage(page int) string {
|
|
if page == 1 {
|
|
return ""
|
|
}
|
|
return fmt.Sprintf("page/%d/", page)
|
|
}
|
|
|
|
func (s *Source) parseMangaFromElement(el *goquery.Selection) source.SManga {
|
|
manga := source.SManga{}
|
|
el.Find(s.cfg.PopularMangaURLSelector).First().Each(func(_ int, a *goquery.Selection) {
|
|
if href, ok := a.Attr("href"); ok {
|
|
manga.URL = stripDomain(href, s.cfg.BaseURL)
|
|
}
|
|
manga.Title = strings.TrimSpace(a.Text())
|
|
})
|
|
el.Find("img").First().Each(func(_ int, img *goquery.Selection) {
|
|
manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
|
|
})
|
|
return manga
|
|
}
|
|
|
|
func (s *Source) parseSearchMangaFromElement(el *goquery.Selection) source.SManga {
|
|
manga := source.SManga{}
|
|
el.Find("div.post-title a, h3.h5 a").First().Each(func(_ int, a *goquery.Selection) {
|
|
if href, ok := a.Attr("href"); ok {
|
|
manga.URL = stripDomain(href, s.cfg.BaseURL)
|
|
}
|
|
manga.Title = strings.TrimSpace(a.Text())
|
|
})
|
|
el.Find("img").First().Each(func(_ int, img *goquery.Selection) {
|
|
manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
|
|
})
|
|
return manga
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
pageStr := s.searchPage(page)
|
|
rawURL := fmt.Sprintf("%s/%s/%s?m_orderby=views",
|
|
strings.TrimRight(s.cfg.BaseURL, "/"), s.cfg.MangaSubString, pageStr)
|
|
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc, s.cfg.PopularMangaSelector, true), nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
pageStr := s.searchPage(page)
|
|
rawURL := fmt.Sprintf("%s/%s/%s?m_orderby=latest",
|
|
strings.TrimRight(s.cfg.BaseURL, "/"), s.cfg.MangaSubString, pageStr)
|
|
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc, s.cfg.PopularMangaSelector, true), nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
base := strings.TrimRight(s.cfg.BaseURL, "/")
|
|
searchURL := fmt.Sprintf("%s/?s=%s&post_type=wp-manga&paged=%d",
|
|
base, url.QueryEscape(query), page)
|
|
|
|
doc, err := s.get(context.Background(), searchURL)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc, s.cfg.SearchMangaSelector, false), nil
|
|
}
|
|
|
|
func (s *Source) parseMangaList(doc *goquery.Document, selector string, popular bool) source.MangasPage {
|
|
var mangas []source.SManga
|
|
doc.Find(selector).Each(func(_ int, el *goquery.Selection) {
|
|
var m source.SManga
|
|
if popular {
|
|
m = s.parseMangaFromElement(el)
|
|
} else {
|
|
m = s.parseSearchMangaFromElement(el)
|
|
}
|
|
if m.URL != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
hasNext := doc.Find("div.nav-previous, nav.navigation-ajax, a.nextpostslink").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
rawURL := util.AbsURL(s.cfg.BaseURL, manga.URL)
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
return s.parseMangaDetails(doc, manga.URL), nil
|
|
}
|
|
|
|
func (s *Source) parseMangaDetails(doc *goquery.Document, mangaURL string) source.SManga {
|
|
manga := source.SManga{URL: mangaURL}
|
|
|
|
doc.Find(s.cfg.MangaDetailsSelectorTitle).First().Each(func(_ int, el *goquery.Selection) {
|
|
manga.Title = strings.TrimSpace(el.Text())
|
|
})
|
|
var authors []string
|
|
doc.Find(s.cfg.MangaDetailsSelectorAuthor).Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
authors = append(authors, t)
|
|
}
|
|
})
|
|
manga.Author = strings.Join(authors, ", ")
|
|
|
|
var artists []string
|
|
doc.Find(s.cfg.MangaDetailsSelectorArtist).Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
artists = append(artists, t)
|
|
}
|
|
})
|
|
manga.Artist = strings.Join(artists, ", ")
|
|
|
|
doc.Find(s.cfg.MangaDetailsSelectorDesc).First().Each(func(_ int, el *goquery.Selection) {
|
|
manga.Description = strings.TrimSpace(el.Text())
|
|
})
|
|
|
|
doc.Find(s.cfg.MangaDetailsSelectorThumb).First().Each(func(_ int, img *goquery.Selection) {
|
|
manga.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
|
|
})
|
|
|
|
doc.Find(s.cfg.MangaDetailsSelectorStatus).Last().Each(func(_ int, el *goquery.Selection) {
|
|
manga.Status = util.StatusFromString(el.Text())
|
|
})
|
|
|
|
var genres []string
|
|
doc.Find(s.cfg.MangaDetailsSelectorGenre).Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
genres = append(genres, t)
|
|
}
|
|
})
|
|
manga.Genre = strings.Join(genres, ", ")
|
|
|
|
return manga
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
mangaURL := strings.TrimRight(util.AbsURL(s.cfg.BaseURL, manga.URL), "/")
|
|
doc, err := s.get(context.Background(), mangaURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Try inline chapter list first
|
|
chapterEls := doc.Find(s.cfg.ChapterListSelector)
|
|
|
|
if chapterEls.Length() == 0 {
|
|
// Need AJAX fetch
|
|
chapHolder := doc.Find("div[id^=manga-chapters-holder]")
|
|
if chapHolder.Length() > 0 {
|
|
mangaID, _ := chapHolder.Attr("data-id")
|
|
ajaxDoc, ajaxErr := s.fetchChaptersAJAX(mangaURL, mangaID)
|
|
if ajaxErr != nil {
|
|
return nil, ajaxErr
|
|
}
|
|
chapterEls = ajaxDoc.Find(s.cfg.ChapterListSelector)
|
|
}
|
|
}
|
|
|
|
var chapters []source.SChapter
|
|
chapterEls.Each(func(i int, el *goquery.Selection) {
|
|
ch := source.SChapter{}
|
|
el.Find(s.cfg.ChapterURLSelector).First().Each(func(_ int, a *goquery.Selection) {
|
|
if href, ok := a.Attr("href"); ok {
|
|
ch.URL = stripDomain(href+"?style=list", s.cfg.BaseURL)
|
|
}
|
|
ch.Name = strings.TrimSpace(a.Text())
|
|
})
|
|
el.Find(s.cfg.ChapterDateSelector).First().Each(func(_ int, span *goquery.Selection) {
|
|
ch.DateUpload = util.ParseRelativeDate(span.Text())
|
|
})
|
|
if ch.URL != "" {
|
|
chapters = append(chapters, ch)
|
|
}
|
|
})
|
|
return chapters, nil
|
|
}
|
|
|
|
func (s *Source) fetchChaptersAJAX(mangaURL, mangaID string) (*goquery.Document, error) {
|
|
ctx := context.Background()
|
|
if s.cfg.UseNewChapterEndpoint {
|
|
return s.post(ctx, mangaURL+"/ajax/chapters", url.Values{})
|
|
}
|
|
form := url.Values{
|
|
"action": {"manga_get_chapters"},
|
|
"manga": {mangaID},
|
|
}
|
|
doc, err := s.post(ctx, s.cfg.BaseURL+"/wp-admin/admin-ajax.php", form)
|
|
if err != nil {
|
|
// Fallback to new endpoint
|
|
return s.post(ctx, mangaURL+"/ajax/chapters", url.Values{})
|
|
}
|
|
return doc, nil
|
|
}
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
rawURL := util.AbsURL(s.cfg.BaseURL, chapter.URL)
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Check for chapter protector (AES-encrypted pages)
|
|
if doc.Find("#chapter-protector-data").Length() > 0 {
|
|
return s.parseProtectedPages(doc, rawURL)
|
|
}
|
|
|
|
var pages []source.Page
|
|
doc.Find(s.cfg.PageListParseSelector).Each(func(i int, img *goquery.Selection) {
|
|
imgURL := imgAttr(img, s.cfg.BaseURL)
|
|
if imgURL != "" {
|
|
pages = append(pages, source.Page{Index: i, URL: rawURL, ImageURL: imgURL})
|
|
}
|
|
})
|
|
return pages, nil
|
|
}
|
|
|
|
func (s *Source) parseProtectedPages(doc *goquery.Document, pageURL string) ([]source.Page, error) {
|
|
// Extract JSON image array from chapter-protector-data
|
|
data := doc.Find("#chapter-protector-data").Text()
|
|
// Look for image array in the data JSON
|
|
var result struct {
|
|
Images []string `json:"arrayofimages"`
|
|
}
|
|
if err := json.Unmarshal([]byte(data), &result); err == nil && len(result.Images) > 0 {
|
|
pages := make([]source.Page, len(result.Images))
|
|
for i, img := range result.Images {
|
|
pages[i] = source.Page{Index: i, URL: pageURL, ImageURL: util.AbsURL(s.cfg.BaseURL, img)}
|
|
}
|
|
return pages, nil
|
|
}
|
|
return nil, fmt.Errorf("madara: could not parse protected chapter pages")
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) {
|
|
return page.ImageURL, nil
|
|
}
|
|
|
|
func (s *Source) GetFilterList() []source.Filter {
|
|
return nil
|
|
}
|
|
|
|
// --- helpers ---
|
|
|
|
// stripDomain removes the base URL scheme+host from an absolute URL, leaving /path.
|
|
func stripDomain(href, baseURL string) string {
|
|
parsed, err := url.Parse(href)
|
|
if err != nil || !parsed.IsAbs() {
|
|
return href
|
|
}
|
|
base, err := url.Parse(baseURL)
|
|
if err != nil {
|
|
return href
|
|
}
|
|
if parsed.Host != base.Host {
|
|
return href
|
|
}
|
|
return parsed.RequestURI()
|
|
}
|
|
|
|
// imgAttr returns the best image URL from common lazy-load attributes.
|
|
func imgAttr(img *goquery.Selection, baseURL string) string {
|
|
for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "data-manga-src", "src"} {
|
|
if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") {
|
|
return util.AbsURL(baseURL, v)
|
|
}
|
|
}
|
|
return ""
|
|
}
|