9a42dd2ab1
- Remove global ProxyEnabled() logic from httpclient - Each source now explicitly chooses client at import time: - flare client: for JS-rendering/cloudflare sources - normal httpclient: for REST API sources - Updated 29 base sources based on Kotlin reference (network.cloudflareClient)
425 lines
10 KiB
Go
Executable File
425 lines
10 KiB
Go
Executable File
package sinmh
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient/flare"
|
|
"goyomi/internal/source"
|
|
)
|
|
|
|
type Config struct {
|
|
Name string
|
|
BaseURL string
|
|
Lang string
|
|
MobileURL string
|
|
}
|
|
|
|
type Source struct {
|
|
cfg Config
|
|
client *flare.Client
|
|
id int64
|
|
imageHost string
|
|
categories []Category
|
|
dateFormat string
|
|
}
|
|
|
|
type Category struct {
|
|
Name string
|
|
Values []string
|
|
URIParts []string
|
|
}
|
|
|
|
func New(cfg Config) *Source {
|
|
if cfg.MobileURL == "" {
|
|
cfg.MobileURL = strings.Replace(cfg.BaseURL, "www.", "m.", 1)
|
|
}
|
|
c := flare.NewClient(flare.WithRateLimit(2, 1))
|
|
s := &Source{
|
|
cfg: cfg,
|
|
client: c,
|
|
id: source.GenerateSourceID(cfg.Name, cfg.Lang),
|
|
dateFormat: "2006-01-02",
|
|
}
|
|
return s
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.cfg.Name }
|
|
func (s *Source) Lang() string { return s.cfg.Lang }
|
|
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
func (s *Source) parseCategories(doc *goquery.Document) {
|
|
if len(s.categories) > 0 {
|
|
return
|
|
}
|
|
|
|
doc.Find("div.filter-nav label").Each(func(i int, sel *goquery.Selection) {
|
|
name := sel.Text()
|
|
var cat Category
|
|
cat.Name = name
|
|
|
|
sel.Parent().Find("a").Each(func(_ int, a *goquery.Selection) {
|
|
text := a.Text()
|
|
href := a.AttrOr("href", "")
|
|
cat.Values = append(cat.Values, text)
|
|
cat.URIParts = append(cat.URIParts, strings.TrimPrefix(strings.TrimSuffix(href, "/"), "/list/"))
|
|
})
|
|
|
|
if len(cat.Values) > 0 {
|
|
s.categories = append(s.categories, cat)
|
|
}
|
|
})
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
doc, err := s.fetchDoc(fmt.Sprintf("%s/list/click/?page=%d", s.cfg.BaseURL, page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
|
|
s.parseCategories(doc)
|
|
|
|
mangas := make([]source.SManga, 0)
|
|
doc.Find("#contList > li, li.list-comic").Each(func(_ int, sel *goquery.Selection) {
|
|
m := s.mangaFromElement(sel)
|
|
if m.URL != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
|
|
hasNext := doc.Find("ul.pagination > li.next:not(.disabled)").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil
|
|
}
|
|
|
|
func (s *Source) mangaFromElement(sel *goquery.Selection) source.SManga {
|
|
titleSel := sel.Find("p > a, h3 > a")
|
|
title := titleSel.Text()
|
|
href := titleSel.AttrOr("href", "")
|
|
img := sel.Find("img")
|
|
thumb := img.AttrOr("src", "")
|
|
if thumb == "" {
|
|
thumb = img.AttrOr("data-src", "")
|
|
}
|
|
|
|
return source.SManga{
|
|
URL: href,
|
|
Title: strings.TrimSpace(title),
|
|
ThumbnailURL: thumb,
|
|
}
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
doc, err := s.fetchDoc(fmt.Sprintf("%s/list/update/?page=%d", s.cfg.BaseURL, page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
|
|
mangas := make([]source.SManga, 0)
|
|
doc.Find("#contList > li, li.list-comic").Each(func(_ int, sel *goquery.Selection) {
|
|
m := s.mangaFromElement(sel)
|
|
if m.URL != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
|
|
hasNext := doc.Find("ul.pagination > li.next:not(.disabled)").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
if query != "" {
|
|
doc, err := s.fetchDoc(fmt.Sprintf("%s/search/?keywords=%s&page=%d", s.cfg.BaseURL, url.QueryEscape(query), page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
|
|
mangas := make([]source.SManga, 0)
|
|
doc.Find("#contList > li, li.list-comic").Each(func(_ int, sel *goquery.Selection) {
|
|
m := s.mangaFromElement(sel)
|
|
if m.URL != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
|
|
hasNext := doc.Find("ul.pagination > li.next:not(.disabled)").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil
|
|
}
|
|
|
|
categories := make([]string, 0)
|
|
sortOrder := ""
|
|
|
|
for _, f := range filters {
|
|
if sel, ok := f.(*source.SelectFilter); ok {
|
|
values := sel.Values
|
|
if len(values) > sel.Selected && sel.Selected >= 0 {
|
|
val := values[sel.Selected]
|
|
if strings.Contains(sel.FilterName, "Sort") || strings.Contains(sel.FilterName, "排序") {
|
|
sortOrder = val
|
|
} else {
|
|
categories = append(categories, val)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
urlParts := []string{"/list/"}
|
|
urlParts = append(urlParts, categories...)
|
|
if sortOrder != "" {
|
|
urlParts = append(urlParts, sortOrder)
|
|
}
|
|
urlParts = append(urlParts, "/")
|
|
|
|
searchURL := s.cfg.BaseURL + strings.Join(urlParts, "-") + fmt.Sprintf("?page=%d", page)
|
|
|
|
doc, err := s.fetchDoc(searchURL)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
|
|
mangas := make([]source.SManga, 0)
|
|
doc.Find("#contList > li, li.list-comic").Each(func(_ int, sel *goquery.Selection) {
|
|
m := s.mangaFromElement(sel)
|
|
if m.URL != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
|
|
hasNext := doc.Find("ul.pagination > li.next:not(.disabled)").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
doc, err := s.fetchDoc(s.cfg.MobileURL + manga.URL)
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
|
|
title := doc.Find(".book-title h1").Text()
|
|
detailsList := doc.Find(".detail-list")
|
|
author := detailsList.Find("strong:contains(作者) ~ *").Text()
|
|
description := doc.Find("#intro-all").Text()
|
|
description = strings.TrimPrefix(description, "漫画简介:")
|
|
description = strings.TrimSpace(description)
|
|
|
|
genre := doc.Find("div.breadcrumb-bar a[href^=/list/]").Map(func(_ int, sel *goquery.Selection) string {
|
|
return sel.Text()
|
|
})
|
|
genre = append(genre, detailsList.Find("strong:contains(类型) ~ a").Text())
|
|
|
|
statusText := detailsList.Find("strong:contains(状态) + *").Text()
|
|
status := 0
|
|
switch statusText {
|
|
case "连载中":
|
|
status = 1
|
|
case "已完结":
|
|
status = 2
|
|
}
|
|
|
|
thumbnail := doc.Find("div.book-cover img").AttrOr("src", "")
|
|
|
|
manga.Title = strings.TrimSpace(title)
|
|
manga.Author = strings.TrimSpace(author)
|
|
manga.Description = strings.TrimSpace(description)
|
|
manga.Genre = strings.Join(genre, ", ")
|
|
manga.Status = status
|
|
manga.ThumbnailURL = thumbnail
|
|
manga.Initialized = true
|
|
|
|
return manga, nil
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
doc, err := s.fetchDoc(s.cfg.MobileURL + manga.URL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
chapters := make([]source.SChapter, 0)
|
|
doc.Find(".chapter-body li > a").Each(func(_ int, sel *goquery.Selection) {
|
|
href := sel.AttrOr("href", "")
|
|
name := sel.Text()
|
|
if sel.Children().Length() > 0 {
|
|
name = sel.Children().First().Text()
|
|
}
|
|
chapters = append(chapters, source.SChapter{
|
|
URL: href,
|
|
Name: strings.TrimSpace(name),
|
|
})
|
|
})
|
|
|
|
if len(chapters) > 0 {
|
|
dateSel := doc.Find(".date")
|
|
if dateSel.Length() > 0 {
|
|
dateText := dateSel.First().Text()
|
|
if t, err := time.Parse(s.dateFormat, strings.TrimSpace(dateText)); err == nil {
|
|
chapters[0].DateUpload = t.UnixMilli()
|
|
}
|
|
}
|
|
}
|
|
|
|
for i, j := 0, len(chapters)-1; i < j; i, j = i+1, j-1 {
|
|
chapters[i], chapters[j] = chapters[j], chapters[i]
|
|
}
|
|
|
|
return chapters, nil
|
|
}
|
|
|
|
func (s *Source) fetchImageHost() (string, error) {
|
|
if s.imageHost != "" {
|
|
return s.imageHost, nil
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, s.cfg.BaseURL+"/js/config.js", nil)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, _ := io.ReadAll(resp.Body)
|
|
re := regexp.MustCompile(`""resHost:.+?"?domain"?:\["(.+?)""""`)
|
|
matches := re.FindStringSubmatch(string(body))
|
|
if len(matches) > 1 {
|
|
s.imageHost = matches[1]
|
|
return s.imageHost, nil
|
|
}
|
|
|
|
return "", fmt.Errorf("could not find image host")
|
|
}
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
doc, err := s.fetchDoc(s.cfg.MobileURL + chapter.URL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
imageHost, err := s.fetchImageHost()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var images []string
|
|
doc.Find("body > script").Each(func(_ int, sel *goquery.Selection) {
|
|
html, err := sel.Html()
|
|
if err != nil {
|
|
return
|
|
}
|
|
if strings.Contains(html, "chapterImages") {
|
|
re := regexp.MustCompile(`chapterImages = (.+?);`)
|
|
m := re.FindStringSubmatch(html)
|
|
if len(m) > 1 {
|
|
imagesStr := m[1]
|
|
if len(imagesStr) > 2 {
|
|
imagesStr = imagesStr[1 : len(imagesStr)-1]
|
|
imagesStr = strings.ReplaceAll(imagesStr, `\`, "")
|
|
images = strings.Split(imagesStr, `","`)
|
|
}
|
|
}
|
|
}
|
|
})
|
|
|
|
if len(images) == 0 {
|
|
return nil, fmt.Errorf("no images found")
|
|
}
|
|
|
|
pathRe := regexp.MustCompile(`chapterPath = "(.+?)"`)
|
|
firstScript := doc.Find("body > script").First()
|
|
html, _ := firstScript.Html()
|
|
pathMatch := pathRe.FindStringSubmatch(html)
|
|
path := ""
|
|
if len(pathMatch) > 1 {
|
|
path = pathMatch[1]
|
|
}
|
|
|
|
pages := make([]source.Page, len(images))
|
|
for i, img := range images {
|
|
imgURL := img
|
|
switch {
|
|
case strings.HasPrefix(imgURL, "https://"):
|
|
// already full URL
|
|
case strings.HasPrefix(imgURL, "/"):
|
|
imgURL = imageHost + imgURL
|
|
default:
|
|
imgURL = imageHost + "/" + path + img
|
|
}
|
|
pages[i] = source.Page{Index: i, ImageURL: imgURL}
|
|
}
|
|
|
|
return pages, nil
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) {
|
|
return page.ImageURL, nil
|
|
}
|
|
|
|
func (s *Source) GetFilterList() []source.Filter {
|
|
filters := make([]source.Filter, 0)
|
|
|
|
for _, cat := range s.categories {
|
|
options := make([]string, len(cat.Values))
|
|
copy(options, cat.Values)
|
|
filters = append(filters, &source.SelectFilter{
|
|
FilterName: cat.Name,
|
|
Values: options,
|
|
Selected: 0,
|
|
})
|
|
}
|
|
|
|
filters = append(filters, &source.SelectFilter{
|
|
FilterName: "排序方式",
|
|
Values: []string{"post/", "-post/", "update/", "-update/", "click/", "-click/"},
|
|
Selected: 0,
|
|
})
|
|
|
|
return filters
|
|
}
|
|
|
|
func (s *Source) fetchDoc(url string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0")
|
|
req.Header.Set("Referer", s.cfg.BaseURL)
|
|
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("HTTP %d for %s", resp.StatusCode, url)
|
|
}
|
|
|
|
html, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return goquery.NewDocumentFromReader(strings.NewReader(string(html)))
|
|
}
|
|
|
|
func parseChapterNumber(s string) float32 {
|
|
f, _ := strconv.ParseFloat(s, 32)
|
|
if f == 0 {
|
|
return -1
|
|
}
|
|
return float32(f)
|
|
}
|
|
|
|
var _ source.CatalogueSource = (*Source)(nil) |