// Package mangathemesia implements the MangaThemesia WordPress theme base. // Pages extracted from ts_reader.run({...}) JS blob; FlareSolverr required. package mangathemesia import ( "context" "encoding/json" "fmt" "net/http" "net/url" "regexp" "strings" "github.com/PuerkitoBio/goquery" "goyomi/internal/httpclient/flare" "goyomi/internal/source" "goyomi/sources/base/util" ) // Config holds per-source configuration. type Config struct { Name string BaseURL string Lang string MangaURLDirectory string // e.g. "/manga" or "/manhwa" // Overridable selectors SearchMangaSelector string SeriesThumbSelector string SeriesAuthorSelector string SeriesArtistSelector string SeriesDescSelector string SeriesStatusSelector string SeriesGenreSelector string SeriesTitleSelector string ChapterListSelector string } func (c *Config) setDefaults() { if c.MangaURLDirectory == "" { c.MangaURLDirectory = "/manga" } if c.SearchMangaSelector == "" { c.SearchMangaSelector = "div.listupd div.bs, div.listupd div.bsx" } if c.SeriesThumbSelector == "" { c.SeriesThumbSelector = "div.thumb img, div.bigcontent img" } if c.SeriesAuthorSelector == "" { c.SeriesAuthorSelector = ".infotable tr:contains(Author) td:last-child, .tsinfo .imptdt:contains(Author) i" } if c.SeriesArtistSelector == "" { c.SeriesArtistSelector = ".infotable tr:contains(Artist) td:last-child, .tsinfo .imptdt:contains(Artist) i" } if c.SeriesDescSelector == "" { c.SeriesDescSelector = "div.entry-content[itemprop=description] p, div.synops" } if c.SeriesStatusSelector == "" { c.SeriesStatusSelector = ".infotable tr:contains(Status) td:last-child, .tsinfo .imptdt:contains(Status) i" } if c.SeriesGenreSelector == "" { c.SeriesGenreSelector = "div.gnr a, .mgen a, .seriestugenre a" } if c.SeriesTitleSelector == "" { c.SeriesTitleSelector = "h1.entry-title" } if c.ChapterListSelector == "" { c.ChapterListSelector = "div.bxcl li, div.cl li, #chapterlist li, ul li:has(div.chbox)" } } // Source implements source.CatalogueSource for MangaThemesia sites. type Source struct { cfg Config client *flare.Client id int64 } func New(cfg Config) *Source { cfg.setDefaults() opts := []flare.Option{flare.WithRateLimit(1, 2)} c := flare.NewClient(opts...) return &Source{ cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang), } } func (s *Source) ID() int64 { return s.id } func (s *Source) Name() string { return s.cfg.Name } func (s *Source) Lang() string { return s.cfg.Lang } func (s *Source) SupportsLatest() bool { return true } func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) if err != nil { return nil, err } req.Header.Set("Referer", s.cfg.BaseURL+"/") resp, err := s.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("mangathemesia: HTTP %d for %s", resp.StatusCode, rawURL) } return goquery.NewDocumentFromReader(resp.Body) } func (s *Source) searchURL(page int, query string, orderBy string) string { base := strings.TrimRight(s.cfg.BaseURL, "/") dir := strings.Trim(s.cfg.MangaURLDirectory, "/") u, _ := url.Parse(base + "/" + dir + "/") q := u.Query() q.Set("title", query) q.Set("page", fmt.Sprintf("%d", page)) if orderBy != "" { q.Set("order", orderBy) } u.RawQuery = q.Encode() return u.String() } func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { var mangas []source.SManga doc.Find(s.cfg.SearchMangaSelector).Each(func(_ int, el *goquery.Selection) { m := source.SManga{} el.Find("a").First().Each(func(_ int, a *goquery.Selection) { if href, ok := a.Attr("href"); ok { m.URL = stripDomain(href, s.cfg.BaseURL) } }) el.Find("div.tt, div.bigor .tt").First().Each(func(_ int, e *goquery.Selection) { m.Title = strings.TrimSpace(e.Text()) }) if m.Title == "" { el.Find("a").First().Each(func(_ int, a *goquery.Selection) { m.Title = strings.TrimSpace(a.AttrOr("title", a.Text())) }) } el.Find("img").First().Each(func(_ int, img *goquery.Selection) { m.ThumbnailURL = imgAttr(img, s.cfg.BaseURL) }) if m.URL != "" { mangas = append(mangas, m) } }) hasNext := doc.Find(".next, a.r, div.hpage a.r, .pagination .next").Length() > 0 return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} } func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { doc, err := s.get(context.Background(), s.searchURL(page, "", "popular")) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc), nil } func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { doc, err := s.get(context.Background(), s.searchURL(page, "", "update")) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc), nil } func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { doc, err := s.get(context.Background(), s.searchURL(page, query, "")) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc), nil } func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { rawURL := util.AbsURL(s.cfg.BaseURL, manga.URL) doc, err := s.get(context.Background(), rawURL) if err != nil { return manga, err } result := source.SManga{URL: manga.URL} doc.Find(s.cfg.SeriesTitleSelector).First().Each(func(_ int, el *goquery.Selection) { result.Title = strings.TrimSpace(el.Text()) }) doc.Find(s.cfg.SeriesThumbSelector).First().Each(func(_ int, img *goquery.Selection) { result.ThumbnailURL = imgAttr(img, s.cfg.BaseURL) }) doc.Find(s.cfg.SeriesAuthorSelector).First().Each(func(_ int, el *goquery.Selection) { result.Author = strings.TrimSpace(el.Text()) }) doc.Find(s.cfg.SeriesArtistSelector).First().Each(func(_ int, el *goquery.Selection) { result.Artist = strings.TrimSpace(el.Text()) }) var descParts []string doc.Find(s.cfg.SeriesDescSelector).Each(func(_ int, el *goquery.Selection) { if t := strings.TrimSpace(el.Text()); t != "" { descParts = append(descParts, t) } }) result.Description = strings.Join(descParts, "\n\n") doc.Find(s.cfg.SeriesStatusSelector).First().Each(func(_ int, el *goquery.Selection) { result.Status = util.StatusFromString(el.Text()) }) var genres []string doc.Find(s.cfg.SeriesGenreSelector).Each(func(_ int, el *goquery.Selection) { if t := strings.TrimSpace(el.Text()); t != "" { genres = append(genres, t) } }) result.Genre = strings.Join(genres, ", ") return result, nil } func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { rawURL := util.AbsURL(s.cfg.BaseURL, manga.URL) doc, err := s.get(context.Background(), rawURL) if err != nil { return nil, err } var chapters []source.SChapter doc.Find(s.cfg.ChapterListSelector).Each(func(i int, el *goquery.Selection) { ch := source.SChapter{} el.Find("a").First().Each(func(_ int, a *goquery.Selection) { if href, ok := a.Attr("href"); ok { ch.URL = stripDomain(href, s.cfg.BaseURL) } el.Find(".chnum").First().Each(func(_ int, e *goquery.Selection) { ch.Name = strings.TrimSpace(e.Text()) }) if ch.Name == "" { ch.Name = strings.TrimSpace(a.Text()) } }) el.Find(".chapterdate").First().Each(func(_ int, e *goquery.Selection) { ch.DateUpload = util.ParseAbsoluteDate(strings.TrimSpace(e.Text()), "January 02, 2006") if ch.DateUpload == 0 { ch.DateUpload = util.ParseRelativeDate(e.Text()) } }) if ch.URL != "" { chapters = append(chapters, ch) } }) return chapters, nil } // jsonImageListRe extracts the images array from ts_reader.run({..., "images": [...], ...}). var jsonImageListRe = regexp.MustCompile(`"images"\s*:\s*(\[.*?])`) func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { rawURL := util.AbsURL(s.cfg.BaseURL, chapter.URL) doc, err := s.get(context.Background(), rawURL) if err != nil { return nil, err } // Find ts_reader.run({...}) script var imageListJSON string doc.Find("script").Each(func(_ int, script *goquery.Selection) { text := script.Text() if strings.Contains(text, "ts_reader.run") { if m := jsonImageListRe.FindStringSubmatch(text); len(m) > 1 { imageListJSON = m[1] } } }) if imageListJSON == "" { return nil, fmt.Errorf("mangathemesia: could not find ts_reader image list") } var images []string if err := json.Unmarshal([]byte(imageListJSON), &images); err != nil { return nil, fmt.Errorf("mangathemesia: parse images: %w", err) } pages := make([]source.Page, len(images)) for i, img := range images { pages[i] = source.Page{Index: i, URL: rawURL, ImageURL: util.AbsURL(s.cfg.BaseURL, img)} } return pages, nil } func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } func (s *Source) GetFilterList() []source.Filter { return nil } func stripDomain(href, baseURL string) string { parsed, err := url.Parse(href) if err != nil || !parsed.IsAbs() { return href } base, err := url.Parse(baseURL) if err != nil { return href } if parsed.Host != base.Host { return href } return parsed.RequestURI() } func imgAttr(img *goquery.Selection, baseURL string) string { for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} { if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") { return util.AbsURL(baseURL, v) } } return "" }