Files
goyomi/sources/base/mangataro/mangataro.go
T
2026-05-11 06:48:23 +00:00

418 lines
11 KiB
Go
Executable File

// Package mangataro implements the MangaTaro manga base.
// WP JSON API (browse/details) + custom auth endpoints with MD5 token (chapters/pages); CF-protected.
package mangataro
import (
"bytes"
"context"
"crypto/md5"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"goyomi/internal/httpclient"
"goyomi/internal/source"
"goyomi/sources/base/util"
)
type Config struct {
Name string
BaseURL string
Lang string
}
type Source struct {
cfg Config
client *httpclient.Client
id int64
}
func New(cfg Config) *Source {
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
}
func (s *Source) ID() int64 { return s.id }
func (s *Source) Name() string { return s.cfg.Name }
func (s *Source) Lang() string { return s.cfg.Lang }
func (s *Source) SupportsLatest() bool { return true }
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
func (s *Source) doGet(ctx context.Context, rawURL string, out any) error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return err
}
req.Header.Set("Referer", s.cfg.BaseURL+"/")
req.Header.Set("Accept", "application/json")
resp, err := s.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("mangataro: HTTP %d for %s", resp.StatusCode, rawURL)
}
body, _ := io.ReadAll(resp.Body)
return json.Unmarshal(body, out)
}
func (s *Source) doPost(ctx context.Context, rawURL string, payload any, out any) error {
body, err := json.Marshal(payload)
if err != nil {
return err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, rawURL, bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Referer", s.cfg.BaseURL+"/")
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "application/json")
resp, err := s.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("mangataro: HTTP %d for %s", resp.StatusCode, rawURL)
}
respBody, _ := io.ReadAll(resp.Body)
return json.Unmarshal(respBody, out)
}
// mangaURLDTO is stored as the manga.URL field (JSON-encoded).
type mangaURLDTO struct {
ID string `json:"id"`
Slug string `json:"slug"`
}
func encodeMangaURL(id, slug string) string {
b, _ := json.Marshal(mangaURLDTO{ID: id, Slug: slug})
return string(b)
}
func decodeMangaURL(raw string) mangaURLDTO {
var dto mangaURLDTO
_ = json.Unmarshal([]byte(raw), &dto)
return dto
}
// Search/browse DTOs
type searchPayload struct {
Page int `json:"page"`
Search string `json:"search"`
Years string `json:"years"`
Genres string `json:"genres"`
Types string `json:"types"`
Statuses string `json:"statuses"`
Sort string `json:"sort"`
GenreMatchMode string `json:"genreMatchMode"`
}
type browseManga struct {
ID string `json:"id"`
URL string `json:"url"` // slug
Title string `json:"title"`
Cover string `json:"cover"`
Type string `json:"type"`
Description string `json:"description"`
Status string `json:"status"`
}
func (s *Source) browse(ctx context.Context, page int, search, sort string) (source.MangasPage, error) {
payload := searchPayload{
Page: page,
Search: search,
Years: "[]",
Genres: "[]",
Types: "[]",
Statuses: "[]",
Sort: sort,
GenreMatchMode: "and",
}
var items []browseManga
if err := s.doPost(ctx, s.base()+"/wp-json/manga/v1/load", payload, &items); err != nil {
return source.MangasPage{}, err
}
var mangas []source.SManga
for _, item := range items {
if item.Type == "Novel" || item.URL == "" {
continue
}
mangas = append(mangas, source.SManga{
URL: encodeMangaURL(item.ID, item.URL),
Title: unescapeHTML(item.Title),
ThumbnailURL: item.Cover,
Description: unescapeHTML(item.Description),
Status: parseStatus(item.Status),
})
}
// hasNextPage: Kotlin checks data.size == 24
hasNext := len(items) == 24
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil
}
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
return s.browse(context.Background(), page, "", "popular_desc")
}
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
return s.browse(context.Background(), page, "", "post_desc")
}
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
return s.browse(context.Background(), page, query, "popular_desc")
}
// Manga details DTOs (WP JSON API)
type mangaDetails struct {
ID int `json:"id"`
Slug string `json:"slug"`
Title rendered `json:"title"`
Content rendered `json:"content"`
Type string `json:"type"`
Embedded embedded `json:"_embedded"`
}
type rendered struct {
Rendered string `json:"rendered"`
}
type embedded struct {
FeaturedMedia []thumbnail `json:"wp:featuredmedia"`
Terms [][]term `json:"wp:term"`
}
func (e embedded) getTerms(taxonomy string) []string {
for _, group := range e.Terms {
if len(group) > 0 && group[0].Taxonomy == taxonomy {
names := make([]string, len(group))
for i, t := range group {
names[i] = t.Name
}
return names
}
}
return nil
}
type thumbnail struct {
URL string `json:"source_url"`
}
type term struct {
Name string `json:"name"`
Taxonomy string `json:"taxonomy"`
}
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
dto := decodeMangaURL(manga.URL)
if dto.ID == "" {
return manga, fmt.Errorf("mangataro: cannot decode manga URL: %s", manga.URL)
}
u := fmt.Sprintf("%s/wp-json/wp/v2/manga/%s?_embed", s.base(), dto.ID)
var data mangaDetails
if err := s.doGet(context.Background(), u, &data); err != nil {
return manga, err
}
result := source.SManga{URL: manga.URL}
result.URL = encodeMangaURL(fmt.Sprint(data.ID), data.Slug)
result.Title = unescapeHTML(data.Title.Rendered)
result.Description = plainText(data.Content.Rendered)
tags := data.Embedded.getTerms("post_tag")
genreSet := make(map[string]bool)
for _, t := range tags {
genreSet[t] = true
}
knownTypes := []string{"Manhwa", "Manhua", "Manga"}
hasKnown := false
for _, kt := range knownTypes {
if genreSet[kt] {
hasKnown = true
break
}
}
if !hasKnown && data.Type != "" {
genreSet[data.Type] = true
}
var genres []string
for g := range genreSet {
genres = append(genres, g)
}
result.Genre = strings.Join(genres, ", ")
result.Author = strings.Join(data.Embedded.getTerms("manga_author"), ", ")
if len(data.Embedded.FeaturedMedia) > 0 {
result.ThumbnailURL = data.Embedded.FeaturedMedia[0].URL
}
result.Status = manga.Status // preserved from browse
return result, nil
}
// Chapter list DTOs
type chapterList struct {
Chapters []chapter `json:"chapters"`
}
type chapter struct {
URL string `json:"url"`
Chapter string `json:"chapter"`
Title *string `json:"title"`
Date string `json:"date"`
GroupName *string `json:"group_name"`
Language string `json:"language"`
}
func md5Token(timestamp int64) string {
date := time.Unix(timestamp, 0).UTC().Format("2006-01-02")
input := fmt.Sprintf("%dmng_ch_%s", timestamp, date)
sum := md5.Sum([]byte(input))
return fmt.Sprintf("%x", sum)[:16]
}
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
dto := decodeMangaURL(manga.URL)
if dto.ID == "" {
return nil, fmt.Errorf("mangataro: cannot decode manga URL: %s", manga.URL)
}
ts := time.Now().Unix()
token := md5Token(ts)
u, _ := url.Parse(s.base() + "/auth/manga-chapters")
q := u.Query()
q.Set("manga_id", dto.ID)
q.Set("offset", "0")
q.Set("limit", "9999")
q.Set("order", "DESC")
q.Set("_t", token)
u.RawQuery = q.Encode()
var data chapterList
if err := s.doGet(context.Background(), u.String(), &data); err != nil {
return nil, err
}
placeholders := map[string]bool{"": true, "N/A": true, "—": true}
var chapters []source.SChapter
for _, ch := range data.Chapters {
if !strings.EqualFold(ch.Language, s.cfg.Lang) {
continue
}
name := "Chapter " + ch.Chapter
if ch.Title != nil && !placeholders[*ch.Title] {
name += ": " + unescapeHTML(*ch.Title)
}
chURL := ch.URL
if !strings.HasPrefix(chURL, "http") {
chURL = s.base() + chURL
}
chapters = append(chapters, source.SChapter{
URL: chURL,
Name: name,
DateUpload: util.ParseRelativeDate(ch.Date),
})
}
return chapters, nil
}
// Pages DTO
type pagesDTO struct {
Images []string `json:"images"`
}
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
// chapterId = last path segment of chapter URL, after last "-"
chapterURL := chapter.URL
if !strings.HasPrefix(chapterURL, "http") {
chapterURL = s.base() + chapterURL
}
parsed, err := url.Parse(chapterURL)
if err != nil {
return nil, err
}
segs := strings.Split(strings.TrimRight(parsed.Path, "/"), "/")
lastSeg := segs[len(segs)-1]
chapterID := lastSeg
if idx := strings.LastIndex(lastSeg, "-"); idx >= 0 {
chapterID = lastSeg[idx+1:]
}
u := fmt.Sprintf("%s/auth/chapter-content?chapter_id=%s", s.base(), chapterID)
var data pagesDTO
if err := s.doGet(context.Background(), u, &data); err != nil {
return nil, err
}
pages := make([]source.Page, len(data.Images))
for i, img := range data.Images {
pages[i] = source.Page{Index: i, ImageURL: img}
}
return pages, nil
}
func parseStatus(s string) int {
switch strings.ToLower(s) {
case "ongoing":
return source.StatusOngoing
case "completed", "complete":
return source.StatusCompleted
case "hiatus", "on hold", "on-hold":
return source.StatusHiatus
case "cancelled", "canceled":
return source.StatusCancelled
}
return source.StatusUnknown
}
func unescapeHTML(s string) string {
// Basic HTML entity unescaping
r := strings.NewReplacer(
"&", "&",
"&lt;", "<",
"&gt;", ">",
"&quot;", `"`,
"&#039;", "'",
"&apos;", "'",
)
prev := ""
for prev != s {
prev = s
s = r.Replace(s)
}
return s
}
// plainText strips HTML tags from a string.
func plainText(html string) string {
// Quick approximation: remove tags
var b strings.Builder
inTag := false
for _, r := range html {
switch {
case r == '<':
inTag = true
case r == '>':
inTag = false
case !inTag:
b.WriteRune(r)
}
}
return strings.TrimSpace(b.String())
}
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
func (s *Source) GetFilterList() []source.Filter { return nil }