b199bad30d
- Add internal/httpclient/flare package for Cloudflare-protected sites - Update 7 bases (madara, zmanga, mangaworld, mangathemesia, mangareader, libgroup, liliana) to use flare client - Remove unused internal/config/source.go
320 lines
9.5 KiB
Go
Executable File
320 lines
9.5 KiB
Go
Executable File
// Package mangathemesia implements the MangaThemesia WordPress theme base.
|
|
// Pages extracted from ts_reader.run({...}) JS blob; FlareSolverr required.
|
|
package mangathemesia
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient/flare"
|
|
"goyomi/internal/source"
|
|
"goyomi/sources/base/util"
|
|
)
|
|
|
|
// Config holds per-source configuration.
|
|
type Config struct {
|
|
Name string
|
|
BaseURL string
|
|
Lang string
|
|
MangaURLDirectory string // e.g. "/manga" or "/manhwa"
|
|
|
|
// Overridable selectors
|
|
SearchMangaSelector string
|
|
SeriesThumbSelector string
|
|
SeriesAuthorSelector string
|
|
SeriesArtistSelector string
|
|
SeriesDescSelector string
|
|
SeriesStatusSelector string
|
|
SeriesGenreSelector string
|
|
SeriesTitleSelector string
|
|
ChapterListSelector string
|
|
}
|
|
|
|
func (c *Config) setDefaults() {
|
|
if c.MangaURLDirectory == "" {
|
|
c.MangaURLDirectory = "/manga"
|
|
}
|
|
if c.SearchMangaSelector == "" {
|
|
c.SearchMangaSelector = "div.listupd div.bs, div.listupd div.bsx"
|
|
}
|
|
if c.SeriesThumbSelector == "" {
|
|
c.SeriesThumbSelector = "div.thumb img, div.bigcontent img"
|
|
}
|
|
if c.SeriesAuthorSelector == "" {
|
|
c.SeriesAuthorSelector = ".infotable tr:contains(Author) td:last-child, .tsinfo .imptdt:contains(Author) i"
|
|
}
|
|
if c.SeriesArtistSelector == "" {
|
|
c.SeriesArtistSelector = ".infotable tr:contains(Artist) td:last-child, .tsinfo .imptdt:contains(Artist) i"
|
|
}
|
|
if c.SeriesDescSelector == "" {
|
|
c.SeriesDescSelector = "div.entry-content[itemprop=description] p, div.synops"
|
|
}
|
|
if c.SeriesStatusSelector == "" {
|
|
c.SeriesStatusSelector = ".infotable tr:contains(Status) td:last-child, .tsinfo .imptdt:contains(Status) i"
|
|
}
|
|
if c.SeriesGenreSelector == "" {
|
|
c.SeriesGenreSelector = "div.gnr a, .mgen a, .seriestugenre a"
|
|
}
|
|
if c.SeriesTitleSelector == "" {
|
|
c.SeriesTitleSelector = "h1.entry-title"
|
|
}
|
|
if c.ChapterListSelector == "" {
|
|
c.ChapterListSelector = "div.bxcl li, div.cl li, #chapterlist li, ul li:has(div.chbox)"
|
|
}
|
|
}
|
|
|
|
// Source implements source.CatalogueSource for MangaThemesia sites.
|
|
type Source struct {
|
|
cfg Config
|
|
client *flare.Client
|
|
id int64
|
|
}
|
|
|
|
func New(cfg Config) *Source {
|
|
cfg.setDefaults()
|
|
opts := []flare.Option{flare.WithRateLimit(1, 2)}
|
|
c := flare.NewClient(opts...)
|
|
return &Source{
|
|
cfg: cfg,
|
|
client: c,
|
|
id: source.GenerateSourceID(cfg.Name, cfg.Lang),
|
|
}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.cfg.Name }
|
|
func (s *Source) Lang() string { return s.cfg.Lang }
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("mangathemesia: HTTP %d for %s", resp.StatusCode, rawURL)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func (s *Source) searchURL(page int, query string, orderBy string) string {
|
|
base := strings.TrimRight(s.cfg.BaseURL, "/")
|
|
dir := strings.Trim(s.cfg.MangaURLDirectory, "/")
|
|
u, _ := url.Parse(base + "/" + dir + "/")
|
|
q := u.Query()
|
|
q.Set("title", query)
|
|
q.Set("page", fmt.Sprintf("%d", page))
|
|
if orderBy != "" {
|
|
q.Set("order", orderBy)
|
|
}
|
|
u.RawQuery = q.Encode()
|
|
return u.String()
|
|
}
|
|
|
|
func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage {
|
|
var mangas []source.SManga
|
|
doc.Find(s.cfg.SearchMangaSelector).Each(func(_ int, el *goquery.Selection) {
|
|
m := source.SManga{}
|
|
el.Find("a").First().Each(func(_ int, a *goquery.Selection) {
|
|
if href, ok := a.Attr("href"); ok {
|
|
m.URL = stripDomain(href, s.cfg.BaseURL)
|
|
}
|
|
})
|
|
el.Find("div.tt, div.bigor .tt").First().Each(func(_ int, e *goquery.Selection) {
|
|
m.Title = strings.TrimSpace(e.Text())
|
|
})
|
|
if m.Title == "" {
|
|
el.Find("a").First().Each(func(_ int, a *goquery.Selection) {
|
|
m.Title = strings.TrimSpace(a.AttrOr("title", a.Text()))
|
|
})
|
|
}
|
|
el.Find("img").First().Each(func(_ int, img *goquery.Selection) {
|
|
m.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
|
|
})
|
|
if m.URL != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
hasNext := doc.Find(".next, a.r, div.hpage a.r, .pagination .next").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.searchURL(page, "", "popular"))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.searchURL(page, "", "update"))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.searchURL(page, query, ""))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
rawURL := util.AbsURL(s.cfg.BaseURL, manga.URL)
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
result := source.SManga{URL: manga.URL}
|
|
|
|
doc.Find(s.cfg.SeriesTitleSelector).First().Each(func(_ int, el *goquery.Selection) {
|
|
result.Title = strings.TrimSpace(el.Text())
|
|
})
|
|
doc.Find(s.cfg.SeriesThumbSelector).First().Each(func(_ int, img *goquery.Selection) {
|
|
result.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
|
|
})
|
|
doc.Find(s.cfg.SeriesAuthorSelector).First().Each(func(_ int, el *goquery.Selection) {
|
|
result.Author = strings.TrimSpace(el.Text())
|
|
})
|
|
doc.Find(s.cfg.SeriesArtistSelector).First().Each(func(_ int, el *goquery.Selection) {
|
|
result.Artist = strings.TrimSpace(el.Text())
|
|
})
|
|
var descParts []string
|
|
doc.Find(s.cfg.SeriesDescSelector).Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
descParts = append(descParts, t)
|
|
}
|
|
})
|
|
result.Description = strings.Join(descParts, "\n\n")
|
|
doc.Find(s.cfg.SeriesStatusSelector).First().Each(func(_ int, el *goquery.Selection) {
|
|
result.Status = util.StatusFromString(el.Text())
|
|
})
|
|
var genres []string
|
|
doc.Find(s.cfg.SeriesGenreSelector).Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
genres = append(genres, t)
|
|
}
|
|
})
|
|
result.Genre = strings.Join(genres, ", ")
|
|
return result, nil
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
rawURL := util.AbsURL(s.cfg.BaseURL, manga.URL)
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var chapters []source.SChapter
|
|
doc.Find(s.cfg.ChapterListSelector).Each(func(i int, el *goquery.Selection) {
|
|
ch := source.SChapter{}
|
|
el.Find("a").First().Each(func(_ int, a *goquery.Selection) {
|
|
if href, ok := a.Attr("href"); ok {
|
|
ch.URL = stripDomain(href, s.cfg.BaseURL)
|
|
}
|
|
el.Find(".chnum").First().Each(func(_ int, e *goquery.Selection) {
|
|
ch.Name = strings.TrimSpace(e.Text())
|
|
})
|
|
if ch.Name == "" {
|
|
ch.Name = strings.TrimSpace(a.Text())
|
|
}
|
|
})
|
|
el.Find(".chapterdate").First().Each(func(_ int, e *goquery.Selection) {
|
|
ch.DateUpload = util.ParseAbsoluteDate(strings.TrimSpace(e.Text()), "January 02, 2006")
|
|
if ch.DateUpload == 0 {
|
|
ch.DateUpload = util.ParseRelativeDate(e.Text())
|
|
}
|
|
})
|
|
if ch.URL != "" {
|
|
chapters = append(chapters, ch)
|
|
}
|
|
})
|
|
return chapters, nil
|
|
}
|
|
|
|
// jsonImageListRe extracts the images array from ts_reader.run({..., "images": [...], ...}).
|
|
var jsonImageListRe = regexp.MustCompile(`"images"\s*:\s*(\[.*?])`)
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
rawURL := util.AbsURL(s.cfg.BaseURL, chapter.URL)
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Find ts_reader.run({...}) script
|
|
var imageListJSON string
|
|
doc.Find("script").Each(func(_ int, script *goquery.Selection) {
|
|
text := script.Text()
|
|
if strings.Contains(text, "ts_reader.run") {
|
|
if m := jsonImageListRe.FindStringSubmatch(text); len(m) > 1 {
|
|
imageListJSON = m[1]
|
|
}
|
|
}
|
|
})
|
|
|
|
if imageListJSON == "" {
|
|
return nil, fmt.Errorf("mangathemesia: could not find ts_reader image list")
|
|
}
|
|
|
|
var images []string
|
|
if err := json.Unmarshal([]byte(imageListJSON), &images); err != nil {
|
|
return nil, fmt.Errorf("mangathemesia: parse images: %w", err)
|
|
}
|
|
|
|
pages := make([]source.Page, len(images))
|
|
for i, img := range images {
|
|
pages[i] = source.Page{Index: i, URL: rawURL, ImageURL: util.AbsURL(s.cfg.BaseURL, img)}
|
|
}
|
|
return pages, nil
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) {
|
|
return page.ImageURL, nil
|
|
}
|
|
|
|
func (s *Source) GetFilterList() []source.Filter {
|
|
return nil
|
|
}
|
|
|
|
func stripDomain(href, baseURL string) string {
|
|
parsed, err := url.Parse(href)
|
|
if err != nil || !parsed.IsAbs() {
|
|
return href
|
|
}
|
|
base, err := url.Parse(baseURL)
|
|
if err != nil {
|
|
return href
|
|
}
|
|
if parsed.Host != base.Host {
|
|
return href
|
|
}
|
|
return parsed.RequestURI()
|
|
}
|
|
|
|
func imgAttr(img *goquery.Selection, baseURL string) string {
|
|
for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} {
|
|
if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") {
|
|
return util.AbsURL(baseURL, v)
|
|
}
|
|
}
|
|
return ""
|
|
}
|