phase3: implement first 20 base sources + shared util

Ports bases from previous session:
util (shared helpers), bakkin, fmreader, foolslide, gigaviewer,
gmanga, grouple, guya, heancms, hentaihand, kemono, madara,
madtheme, mangadventure, mangahub, mangathemesia, mangaworld,
mmrcms, senkuro, wpcomics.
This commit is contained in:
achmad
2026-05-10 22:15:11 +07:00
parent f0658472f3
commit ca609ccae7
20 changed files with 4418 additions and 0 deletions
+226
View File
@@ -0,0 +1,226 @@
// Package madtheme implements the MadTheme WordPress base.
// All list types via GET {base}/search?page={n}&sort=...
// Pages extracted from JSON blob in <script> tag.
package madtheme
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/url"
"regexp"
"strings"
"github.com/PuerkitoBio/goquery"
"goyomi/internal/httpclient"
"goyomi/internal/source"
"goyomi/sources/base/util"
)
type Config struct {
Name string
BaseURL string
Lang string
}
type Source struct {
cfg Config
client *httpclient.Client
id int64
}
func New(cfg Config) *Source {
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
}
func (s *Source) ID() int64 { return s.id }
func (s *Source) Name() string { return s.cfg.Name }
func (s *Source) Lang() string { return s.cfg.Lang }
func (s *Source) SupportsLatest() bool { return true }
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Referer", s.cfg.BaseURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("madtheme: HTTP %d", resp.StatusCode)
}
return goquery.NewDocumentFromReader(resp.Body)
}
func (s *Source) searchURL(page int, q, sort string) string {
u, _ := url.Parse(strings.TrimRight(s.cfg.BaseURL, "/") + "/search")
qv := u.Query()
qv.Set("q", q)
qv.Set("page", fmt.Sprintf("%d", page))
if sort != "" {
qv.Set("sort", sort)
}
u.RawQuery = qv.Encode()
return u.String()
}
func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage {
var mangas []source.SManga
doc.Find("div.book-item, div.item, div.manga-item").Each(func(_ int, el *goquery.Selection) {
m := source.SManga{}
el.Find("a").First().Each(func(_ int, a *goquery.Selection) {
if href, ok := a.Attr("href"); ok {
m.URL = stripDomain(href, s.cfg.BaseURL)
}
m.Title = strings.TrimSpace(a.AttrOr("title", ""))
})
if m.Title == "" {
el.Find("div.title, h3, h2").First().Each(func(_ int, e *goquery.Selection) {
m.Title = strings.TrimSpace(e.Text())
})
}
el.Find("img").First().Each(func(_ int, img *goquery.Selection) {
m.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
})
if m.URL != "" {
mangas = append(mangas, m)
}
})
hasNext := doc.Find(".next, .pagination .next, a[rel=next]").Length() > 0
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
}
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
doc, err := s.get(context.Background(), s.searchURL(page, "", "views"))
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangaList(doc), nil
}
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
doc, err := s.get(context.Background(), s.searchURL(page, "", "latest"))
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangaList(doc), nil
}
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
doc, err := s.get(context.Background(), s.searchURL(page, query, ""))
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangaList(doc), nil
}
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
if err != nil {
return manga, err
}
result := source.SManga{URL: manga.URL}
doc.Find("h1, h2").First().Each(func(_ int, el *goquery.Selection) { result.Title = strings.TrimSpace(el.Text()) })
doc.Find(".author a, .info a.author").First().Each(func(_ int, el *goquery.Selection) { result.Author = strings.TrimSpace(el.Text()) })
doc.Find(".summary, .description, .manga-summary").First().Each(func(_ int, el *goquery.Selection) { result.Description = strings.TrimSpace(el.Text()) })
doc.Find(".cover img, .manga-cover img").First().Each(func(_ int, img *goquery.Selection) { result.ThumbnailURL = imgAttr(img, s.cfg.BaseURL) })
doc.Find(".status").First().Each(func(_ int, el *goquery.Selection) { result.Status = util.StatusFromString(el.Text()) })
var genres []string
doc.Find(".genres a, .genre a").Each(func(_ int, el *goquery.Selection) {
if t := strings.TrimSpace(el.Text()); t != "" {
genres = append(genres, t)
}
})
result.Genre = strings.Join(genres, ", ")
return result, nil
}
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
if err != nil {
return nil, err
}
var chapters []source.SChapter
doc.Find("ul.chapter-list li, .chapters li, .chapter-item").Each(func(i int, el *goquery.Selection) {
ch := source.SChapter{}
el.Find("a").First().Each(func(_ int, a *goquery.Selection) {
if href, ok := a.Attr("href"); ok {
ch.URL = stripDomain(href, s.cfg.BaseURL)
}
ch.Name = strings.TrimSpace(a.Text())
})
el.Find(".date, time").First().Each(func(_ int, e *goquery.Selection) {
ch.DateUpload = util.ParseRelativeDate(e.Text())
})
if ch.URL != "" {
chapters = append(chapters, ch)
}
})
return chapters, nil
}
var pageJSONRe = regexp.MustCompile(`chapImages\s*=\s*'([^']+)'|"images"\s*:\s*(\[.*?])`)
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
rawURL := util.AbsURL(s.cfg.BaseURL, chapter.URL)
doc, err := s.get(context.Background(), rawURL)
if err != nil {
return nil, err
}
var images []string
doc.Find("script").Each(func(_ int, script *goquery.Selection) {
text := script.Text()
m := pageJSONRe.FindStringSubmatch(text)
if len(m) > 1 {
blob := m[1]
if blob == "" {
blob = m[2]
}
if blob != "" && images == nil {
_ = json.Unmarshal([]byte(blob), &images)
}
}
})
if len(images) == 0 {
doc.Find(".reading-content img, .chapter-content img").Each(func(i int, img *goquery.Selection) {
if u := imgAttr(img, s.cfg.BaseURL); u != "" {
images = append(images, u)
}
})
}
pages := make([]source.Page, len(images))
for i, img := range images {
pages[i] = source.Page{Index: i, URL: rawURL, ImageURL: util.AbsURL(s.cfg.BaseURL, img)}
}
return pages, nil
}
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
func (s *Source) GetFilterList() []source.Filter { return nil }
func stripDomain(href, baseURL string) string {
parsed, err := url.Parse(href)
if err != nil || !parsed.IsAbs() {
return href
}
base, _ := url.Parse(baseURL)
if base != nil && parsed.Host == base.Host {
return parsed.RequestURI()
}
return href
}
func imgAttr(img *goquery.Selection, baseURL string) string {
for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} {
if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") {
return util.AbsURL(baseURL, v)
}
}
return ""
}