ca609ccae7
Ports bases from previous session: util (shared helpers), bakkin, fmreader, foolslide, gigaviewer, gmanga, grouple, guya, heancms, hentaihand, kemono, madara, madtheme, mangadventure, mangahub, mangathemesia, mangaworld, mmrcms, senkuro, wpcomics.
319 lines
9.5 KiB
Go
319 lines
9.5 KiB
Go
// Package mangathemesia implements the MangaThemesia WordPress theme base.
|
|
// Pages extracted from ts_reader.run({...}) JS blob; FlareSolverr required.
|
|
package mangathemesia
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient"
|
|
"goyomi/internal/source"
|
|
"goyomi/sources/base/util"
|
|
)
|
|
|
|
// Config holds per-source configuration.
|
|
type Config struct {
|
|
Name string
|
|
BaseURL string
|
|
Lang string
|
|
MangaURLDirectory string // e.g. "/manga" or "/manhwa"
|
|
|
|
// Overridable selectors
|
|
SearchMangaSelector string
|
|
SeriesThumbSelector string
|
|
SeriesAuthorSelector string
|
|
SeriesArtistSelector string
|
|
SeriesDescSelector string
|
|
SeriesStatusSelector string
|
|
SeriesGenreSelector string
|
|
SeriesTitleSelector string
|
|
ChapterListSelector string
|
|
}
|
|
|
|
func (c *Config) setDefaults() {
|
|
if c.MangaURLDirectory == "" {
|
|
c.MangaURLDirectory = "/manga"
|
|
}
|
|
if c.SearchMangaSelector == "" {
|
|
c.SearchMangaSelector = "div.listupd div.bs, div.listupd div.bsx"
|
|
}
|
|
if c.SeriesThumbSelector == "" {
|
|
c.SeriesThumbSelector = "div.thumb img, div.bigcontent img"
|
|
}
|
|
if c.SeriesAuthorSelector == "" {
|
|
c.SeriesAuthorSelector = ".infotable tr:contains(Author) td:last-child, .tsinfo .imptdt:contains(Author) i"
|
|
}
|
|
if c.SeriesArtistSelector == "" {
|
|
c.SeriesArtistSelector = ".infotable tr:contains(Artist) td:last-child, .tsinfo .imptdt:contains(Artist) i"
|
|
}
|
|
if c.SeriesDescSelector == "" {
|
|
c.SeriesDescSelector = "div.entry-content[itemprop=description] p, div.synops"
|
|
}
|
|
if c.SeriesStatusSelector == "" {
|
|
c.SeriesStatusSelector = ".infotable tr:contains(Status) td:last-child, .tsinfo .imptdt:contains(Status) i"
|
|
}
|
|
if c.SeriesGenreSelector == "" {
|
|
c.SeriesGenreSelector = "div.gnr a, .mgen a, .seriestugenre a"
|
|
}
|
|
if c.SeriesTitleSelector == "" {
|
|
c.SeriesTitleSelector = "h1.entry-title"
|
|
}
|
|
if c.ChapterListSelector == "" {
|
|
c.ChapterListSelector = "div.bxcl li, div.cl li, #chapterlist li, ul li:has(div.chbox)"
|
|
}
|
|
}
|
|
|
|
// Source implements source.CatalogueSource for MangaThemesia sites.
|
|
type Source struct {
|
|
cfg Config
|
|
client *httpclient.Client
|
|
id int64
|
|
}
|
|
|
|
func New(cfg Config) *Source {
|
|
cfg.setDefaults()
|
|
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
|
|
return &Source{
|
|
cfg: cfg,
|
|
client: c,
|
|
id: source.GenerateSourceID(cfg.Name, cfg.Lang),
|
|
}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.cfg.Name }
|
|
func (s *Source) Lang() string { return s.cfg.Lang }
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("mangathemesia: HTTP %d for %s", resp.StatusCode, rawURL)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func (s *Source) searchURL(page int, query string, orderBy string) string {
|
|
base := strings.TrimRight(s.cfg.BaseURL, "/")
|
|
dir := strings.Trim(s.cfg.MangaURLDirectory, "/")
|
|
u, _ := url.Parse(base + "/" + dir + "/")
|
|
q := u.Query()
|
|
q.Set("title", query)
|
|
q.Set("page", fmt.Sprintf("%d", page))
|
|
if orderBy != "" {
|
|
q.Set("order", orderBy)
|
|
}
|
|
u.RawQuery = q.Encode()
|
|
return u.String()
|
|
}
|
|
|
|
func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage {
|
|
var mangas []source.SManga
|
|
doc.Find(s.cfg.SearchMangaSelector).Each(func(_ int, el *goquery.Selection) {
|
|
m := source.SManga{}
|
|
el.Find("a").First().Each(func(_ int, a *goquery.Selection) {
|
|
if href, ok := a.Attr("href"); ok {
|
|
m.URL = stripDomain(href, s.cfg.BaseURL)
|
|
}
|
|
})
|
|
el.Find("div.tt, div.bigor .tt").First().Each(func(_ int, e *goquery.Selection) {
|
|
m.Title = strings.TrimSpace(e.Text())
|
|
})
|
|
if m.Title == "" {
|
|
el.Find("a").First().Each(func(_ int, a *goquery.Selection) {
|
|
m.Title = strings.TrimSpace(a.AttrOr("title", a.Text()))
|
|
})
|
|
}
|
|
el.Find("img").First().Each(func(_ int, img *goquery.Selection) {
|
|
m.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
|
|
})
|
|
if m.URL != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
hasNext := doc.Find(".next, a.r, div.hpage a.r, .pagination .next").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.searchURL(page, "", "popular"))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.searchURL(page, "", "update"))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.searchURL(page, query, ""))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
rawURL := util.AbsURL(s.cfg.BaseURL, manga.URL)
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
result := source.SManga{URL: manga.URL}
|
|
|
|
doc.Find(s.cfg.SeriesTitleSelector).First().Each(func(_ int, el *goquery.Selection) {
|
|
result.Title = strings.TrimSpace(el.Text())
|
|
})
|
|
doc.Find(s.cfg.SeriesThumbSelector).First().Each(func(_ int, img *goquery.Selection) {
|
|
result.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
|
|
})
|
|
doc.Find(s.cfg.SeriesAuthorSelector).First().Each(func(_ int, el *goquery.Selection) {
|
|
result.Author = strings.TrimSpace(el.Text())
|
|
})
|
|
doc.Find(s.cfg.SeriesArtistSelector).First().Each(func(_ int, el *goquery.Selection) {
|
|
result.Artist = strings.TrimSpace(el.Text())
|
|
})
|
|
var descParts []string
|
|
doc.Find(s.cfg.SeriesDescSelector).Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
descParts = append(descParts, t)
|
|
}
|
|
})
|
|
result.Description = strings.Join(descParts, "\n\n")
|
|
doc.Find(s.cfg.SeriesStatusSelector).First().Each(func(_ int, el *goquery.Selection) {
|
|
result.Status = util.StatusFromString(el.Text())
|
|
})
|
|
var genres []string
|
|
doc.Find(s.cfg.SeriesGenreSelector).Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
genres = append(genres, t)
|
|
}
|
|
})
|
|
result.Genre = strings.Join(genres, ", ")
|
|
return result, nil
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
rawURL := util.AbsURL(s.cfg.BaseURL, manga.URL)
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var chapters []source.SChapter
|
|
doc.Find(s.cfg.ChapterListSelector).Each(func(i int, el *goquery.Selection) {
|
|
ch := source.SChapter{}
|
|
el.Find("a").First().Each(func(_ int, a *goquery.Selection) {
|
|
if href, ok := a.Attr("href"); ok {
|
|
ch.URL = stripDomain(href, s.cfg.BaseURL)
|
|
}
|
|
el.Find(".chnum").First().Each(func(_ int, e *goquery.Selection) {
|
|
ch.Name = strings.TrimSpace(e.Text())
|
|
})
|
|
if ch.Name == "" {
|
|
ch.Name = strings.TrimSpace(a.Text())
|
|
}
|
|
})
|
|
el.Find(".chapterdate").First().Each(func(_ int, e *goquery.Selection) {
|
|
ch.DateUpload = util.ParseAbsoluteDate(strings.TrimSpace(e.Text()), "January 02, 2006")
|
|
if ch.DateUpload == 0 {
|
|
ch.DateUpload = util.ParseRelativeDate(e.Text())
|
|
}
|
|
})
|
|
if ch.URL != "" {
|
|
chapters = append(chapters, ch)
|
|
}
|
|
})
|
|
return chapters, nil
|
|
}
|
|
|
|
// jsonImageListRe extracts the images array from ts_reader.run({..., "images": [...], ...}).
|
|
var jsonImageListRe = regexp.MustCompile(`"images"\s*:\s*(\[.*?])`)
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
rawURL := util.AbsURL(s.cfg.BaseURL, chapter.URL)
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Find ts_reader.run({...}) script
|
|
var imageListJSON string
|
|
doc.Find("script").Each(func(_ int, script *goquery.Selection) {
|
|
text := script.Text()
|
|
if strings.Contains(text, "ts_reader.run") {
|
|
if m := jsonImageListRe.FindStringSubmatch(text); len(m) > 1 {
|
|
imageListJSON = m[1]
|
|
}
|
|
}
|
|
})
|
|
|
|
if imageListJSON == "" {
|
|
return nil, fmt.Errorf("mangathemesia: could not find ts_reader image list")
|
|
}
|
|
|
|
var images []string
|
|
if err := json.Unmarshal([]byte(imageListJSON), &images); err != nil {
|
|
return nil, fmt.Errorf("mangathemesia: parse images: %w", err)
|
|
}
|
|
|
|
pages := make([]source.Page, len(images))
|
|
for i, img := range images {
|
|
pages[i] = source.Page{Index: i, URL: rawURL, ImageURL: util.AbsURL(s.cfg.BaseURL, img)}
|
|
}
|
|
return pages, nil
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) {
|
|
return page.ImageURL, nil
|
|
}
|
|
|
|
func (s *Source) GetFilterList() []source.Filter {
|
|
return nil
|
|
}
|
|
|
|
func stripDomain(href, baseURL string) string {
|
|
parsed, err := url.Parse(href)
|
|
if err != nil || !parsed.IsAbs() {
|
|
return href
|
|
}
|
|
base, err := url.Parse(baseURL)
|
|
if err != nil {
|
|
return href
|
|
}
|
|
if parsed.Host != base.Host {
|
|
return href
|
|
}
|
|
return parsed.RequestURI()
|
|
}
|
|
|
|
func imgAttr(img *goquery.Selection, baseURL string) string {
|
|
for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} {
|
|
if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") {
|
|
return util.AbsURL(baseURL, v)
|
|
}
|
|
}
|
|
return ""
|
|
}
|