373 lines
11 KiB
Go
Executable File
373 lines
11 KiB
Go
Executable File
// Package mangabox implements the MangaBox manga base.
|
|
// HTML scraping for lists; JSON API for chapter list; chapter pages via HTML + JS array extraction.
|
|
package mangabox
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient"
|
|
"goyomi/internal/source"
|
|
"goyomi/sources/base/util"
|
|
)
|
|
|
|
type Config struct {
|
|
Name string
|
|
BaseURL string
|
|
Lang string
|
|
PopularURLPath string // default: "manga-list/hot-manga"
|
|
LatestURLPath string // default: "manga-list/latest-manga"
|
|
SimpleQueryPath string // default: "search/story"
|
|
}
|
|
|
|
type Source struct {
|
|
cfg Config
|
|
client *httpclient.Client
|
|
id int64
|
|
}
|
|
|
|
func New(cfg Config) *Source {
|
|
if cfg.PopularURLPath == "" {
|
|
cfg.PopularURLPath = "manga-list/hot-manga"
|
|
}
|
|
if cfg.LatestURLPath == "" {
|
|
cfg.LatestURLPath = "manga-list/latest-manga"
|
|
}
|
|
if cfg.SimpleQueryPath == "" {
|
|
cfg.SimpleQueryPath = "search/story"
|
|
}
|
|
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
|
|
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.cfg.Name }
|
|
func (s *Source) Lang() string { return s.cfg.Lang }
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("mangabox: HTTP %d", resp.StatusCode)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func (s *Source) getJSON(ctx context.Context, rawURL string, out any) error {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
|
req.Header.Set("Accept", "application/json")
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return fmt.Errorf("mangabox: HTTP %d", resp.StatusCode)
|
|
}
|
|
body, _ := io.ReadAll(resp.Body)
|
|
return json.Unmarshal(body, out)
|
|
}
|
|
|
|
func mangaFromElement(el *goquery.Selection, baseURL string) source.SManga {
|
|
m := source.SManga{}
|
|
a := el.Find("h3 a, h2 a").First()
|
|
if a.Length() == 0 {
|
|
a = el.Find("a").First()
|
|
}
|
|
m.URL = a.AttrOr("href", "")
|
|
m.Title = strings.TrimSpace(a.Text())
|
|
if thumb := el.Find("img").First().AttrOr("src", ""); thumb != "" {
|
|
m.ThumbnailURL = util.AbsURL(baseURL, thumb)
|
|
}
|
|
return m
|
|
}
|
|
|
|
func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage {
|
|
var mangas []source.SManga
|
|
sel := "div.truyen-list > div.list-truyen-item-wrap, div.comic-list > .list-comic-item-wrap"
|
|
doc.Find(sel).Each(func(_ int, el *goquery.Selection) {
|
|
m := mangaFromElement(el, s.cfg.BaseURL)
|
|
if m.URL != "" && m.Title != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
hasNext := doc.Find("div.group_page a:not([href]) + a:not(:contains(Last)), a.page_select + a:not(.page_last), a.page-select + a:not(.page-last)").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), fmt.Sprintf("%s/%s?page=%d", s.base(), s.cfg.PopularURLPath, page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), fmt.Sprintf("%s/%s?page=%d", s.base(), s.cfg.LatestURLPath, page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
slug := normalizeSearchQuery(query)
|
|
u := fmt.Sprintf("%s/%s/%s?page=%d", s.base(), s.cfg.SimpleQueryPath, slug, page)
|
|
doc, err := s.get(context.Background(), u)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
var mangas []source.SManga
|
|
doc.Find(".panel_story_list .story_item, div.list-truyen-item-wrap, div.list-comic-item-wrap").Each(func(_ int, el *goquery.Selection) {
|
|
m := mangaFromElement(el, s.cfg.BaseURL)
|
|
if m.URL != "" && m.Title != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
hasNext := doc.Find("a.page_select + a:not(.page_last), a.page-select + a:not(.page-last)").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
result := source.SManga{URL: manga.URL}
|
|
main := doc.Find("div.manga-info-top, div.panel-story-info").First()
|
|
|
|
result.Title = strings.TrimSpace(main.Find("h1").Text())
|
|
if result.Title == "" {
|
|
result.Title = manga.Title
|
|
}
|
|
if thumb := doc.Find("div.manga-info-pic img, span.info-image img").First().AttrOr("src", ""); thumb != "" {
|
|
result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, thumb)
|
|
}
|
|
result.Description = strings.TrimSpace(doc.Find("div#noidungm, div#panel-story-info-description, div#contentBox").First().Text())
|
|
|
|
result.Author = strings.TrimSpace(main.Find("li:contains(author) a, td:contains(author) + td a").First().Text())
|
|
|
|
statusText := strings.TrimSpace(main.Find("li:contains(status), td:contains(status) + td").First().Text())
|
|
switch {
|
|
case strings.Contains(statusText, "Ongoing"):
|
|
result.Status = source.StatusOngoing
|
|
case strings.Contains(statusText, "Completed"):
|
|
result.Status = source.StatusCompleted
|
|
default:
|
|
result.Status = source.StatusUnknown
|
|
}
|
|
|
|
// Genres from Kakalot style or Nelo style
|
|
var genres []string
|
|
main.Find("div.manga-info-top li:contains(genres) a").Each(func(_ int, a *goquery.Selection) {
|
|
if t := strings.TrimSpace(a.Text()); t != "" {
|
|
genres = append(genres, t)
|
|
}
|
|
})
|
|
if len(genres) == 0 {
|
|
main.Find("td:contains(genres) + td a").Each(func(_ int, a *goquery.Selection) {
|
|
if t := strings.TrimSpace(a.Text()); t != "" {
|
|
genres = append(genres, t)
|
|
}
|
|
})
|
|
}
|
|
result.Genre = strings.Join(genres, ", ")
|
|
|
|
// Alt name appended to description
|
|
if altEl := doc.Find(".story-alternative, tr:has(.info-alternative) h2").First(); altEl.Length() > 0 {
|
|
alt := strings.TrimSpace(altEl.Text())
|
|
if alt != "" {
|
|
if result.Description == "" {
|
|
result.Description = "Alternative Name: " + alt
|
|
} else {
|
|
result.Description += "\n\nAlternative Name: " + alt
|
|
}
|
|
}
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
// JSON DTOs for chapter list API
|
|
|
|
type apiResponse struct {
|
|
Data apiDataResponse `json:"data"`
|
|
}
|
|
|
|
type apiDataResponse struct {
|
|
Chapters []apiChapter `json:"chapters"`
|
|
Pagination apiPagination `json:"pagination"`
|
|
}
|
|
|
|
type apiChapter struct {
|
|
ChapterName string `json:"chapter_name"`
|
|
ChapterSlug string `json:"chapter_slug"`
|
|
ChapterNum float32 `json:"chapter_num"`
|
|
UpdatedAt string `json:"updated_at"`
|
|
}
|
|
|
|
type apiPagination struct {
|
|
HasMore bool `json:"has_more"`
|
|
}
|
|
|
|
var dateFormats = []string{
|
|
"2006-01-02T15:04:05.000000Z",
|
|
"2006-01-02T15:04:05Z",
|
|
"2006-01-02",
|
|
}
|
|
|
|
func parseChapterDate(s string) int64 {
|
|
for _, f := range dateFormats {
|
|
if t, err := time.Parse(f, s); err == nil {
|
|
return t.UnixMilli()
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
// Extract slug from manga URL: last path segment
|
|
slug := util.SlugFromURL(manga.URL)
|
|
if slug == "" {
|
|
// Fallback: use last non-empty path part
|
|
parts := strings.Split(strings.TrimRight(manga.URL, "/"), "/")
|
|
slug = parts[len(parts)-1]
|
|
}
|
|
|
|
offset := 0
|
|
const limit = 500
|
|
var chapters []source.SChapter
|
|
|
|
for {
|
|
u := fmt.Sprintf("%s/api/manga/%s/chapters?limit=%d&offset=%d", s.base(), slug, limit, offset)
|
|
var apiResp apiResponse
|
|
if err := s.getJSON(context.Background(), u, &apiResp); err != nil {
|
|
return nil, err
|
|
}
|
|
for _, ch := range apiResp.Data.Chapters {
|
|
chURL := fmt.Sprintf("%s/manga/%s/%s", s.base(), slug, ch.ChapterSlug)
|
|
chapters = append(chapters, source.SChapter{
|
|
URL: chURL,
|
|
Name: ch.ChapterName,
|
|
DateUpload: parseChapterDate(ch.UpdatedAt),
|
|
})
|
|
}
|
|
if !apiResp.Data.Pagination.HasMore {
|
|
break
|
|
}
|
|
offset += limit
|
|
}
|
|
return chapters, nil
|
|
}
|
|
|
|
var arrayRe = regexp.MustCompile(`(?s)(\w+)\s*=\s*\[([^\]]+)\]`)
|
|
|
|
func extractJSArray(content, name string) []string {
|
|
re := regexp.MustCompile(`(?s)` + regexp.QuoteMeta(name) + `\s*=\s*\[([^\]]+)\]`)
|
|
m := re.FindStringSubmatch(content)
|
|
if len(m) < 2 {
|
|
return nil
|
|
}
|
|
var result []string
|
|
for _, part := range strings.Split(m[1], ",") {
|
|
val := strings.TrimSpace(part)
|
|
val = strings.Trim(val, `"'`)
|
|
val = strings.ReplaceAll(val, `\/`, "/")
|
|
val = strings.TrimRight(val, "/")
|
|
if val != "" {
|
|
result = append(result, val)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Try extracting from script with cdns and chapterImages
|
|
var scriptContent string
|
|
doc.Find("script").Each(func(_ int, el *goquery.Selection) {
|
|
html, _ := el.Html()
|
|
if strings.Contains(html, "cdns") && strings.Contains(html, "chapterImages") {
|
|
scriptContent += html + "\n"
|
|
}
|
|
})
|
|
|
|
if scriptContent != "" {
|
|
cdns := extractJSArray(scriptContent, "cdns")
|
|
if len(cdns) == 0 {
|
|
cdns = extractJSArray(scriptContent, "backupImage")
|
|
}
|
|
chapterImages := extractJSArray(scriptContent, "chapterImages")
|
|
|
|
if len(cdns) > 0 && len(chapterImages) > 0 {
|
|
pages := make([]source.Page, len(chapterImages))
|
|
for i, img := range chapterImages {
|
|
cdn := cdns[i%len(cdns)]
|
|
var imageURL string
|
|
if strings.HasPrefix(img, "http") {
|
|
imageURL = img
|
|
} else {
|
|
imageURL = strings.TrimRight(cdn, "/") + "/" + strings.TrimLeft(img, "/")
|
|
}
|
|
pages[i] = source.Page{Index: i, ImageURL: imageURL}
|
|
}
|
|
return pages, nil
|
|
}
|
|
}
|
|
|
|
// Fallback: div.container-chapter-reader > img
|
|
var pages []source.Page
|
|
doc.Find("div.container-chapter-reader > img").Each(func(i int, img *goquery.Selection) {
|
|
u := img.AttrOr("src", "")
|
|
if u != "" {
|
|
pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)})
|
|
}
|
|
})
|
|
return pages, nil
|
|
}
|
|
|
|
// normalizeSearchQuery mimics the change_alias JS function from Mangakakalot.
|
|
func normalizeSearchQuery(query string) string {
|
|
q := strings.ToLower(query)
|
|
var b strings.Builder
|
|
for _, r := range q {
|
|
switch {
|
|
case (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9'):
|
|
b.WriteRune(r)
|
|
case r == ' ':
|
|
b.WriteByte('_')
|
|
}
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
|
|
func (s *Source) GetFilterList() []source.Filter { return nil }
|