257 lines
7.8 KiB
Go
Executable File
257 lines
7.8 KiB
Go
Executable File
// Package manhwaz implements the ManhwaZ manga base.
|
|
// HTML scraping; popular from homepage; latest paginated; pages via div.page-break img.
|
|
package manhwaz
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient"
|
|
"goyomi/internal/source"
|
|
"goyomi/sources/base/util"
|
|
)
|
|
|
|
type Config struct {
|
|
Name string
|
|
BaseURL string
|
|
Lang string
|
|
SearchPath string // default: "search"
|
|
AuthorHeading string // default: "author(s)"
|
|
StatusHeading string // default: "status"
|
|
}
|
|
|
|
type Source struct {
|
|
cfg Config
|
|
client *httpclient.Client
|
|
id int64
|
|
}
|
|
|
|
func New(cfg Config) *Source {
|
|
if cfg.SearchPath == "" {
|
|
cfg.SearchPath = "search"
|
|
}
|
|
if cfg.AuthorHeading == "" {
|
|
cfg.AuthorHeading = "author(s)"
|
|
}
|
|
if cfg.StatusHeading == "" {
|
|
cfg.StatusHeading = "status"
|
|
}
|
|
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
|
|
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.cfg.Name }
|
|
func (s *Source) Lang() string { return s.cfg.Lang }
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
|
req.Header.Set("Origin", s.cfg.BaseURL)
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("manhwaz: HTTP %d", resp.StatusCode)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func imgAttr(img *goquery.Selection) string {
|
|
for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} {
|
|
if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") {
|
|
return v
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
// Popular comes from homepage; no pagination
|
|
doc, err := s.get(context.Background(), s.base())
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
var mangas []source.SManga
|
|
doc.Find("#slide-top > .item").Each(func(_ int, el *goquery.Selection) {
|
|
a := el.Find(".info-item a").First()
|
|
if a.Length() == 0 {
|
|
return
|
|
}
|
|
m := source.SManga{}
|
|
m.Title = strings.TrimSpace(a.Text())
|
|
m.URL = a.AttrOr("href", "")
|
|
if img := el.Find(".img-item img").First(); img.Length() > 0 {
|
|
m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, imgAttr(img))
|
|
}
|
|
if m.URL != "" && m.Title != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), fmt.Sprintf("%s/?page=%d", s.base(), page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
var mangas []source.SManga
|
|
doc.Find(".page-item-detail").Each(func(_ int, el *goquery.Selection) {
|
|
a := el.Find(".item-summary a").First()
|
|
if a.Length() == 0 {
|
|
return
|
|
}
|
|
m := source.SManga{}
|
|
m.Title = strings.TrimSpace(a.Text())
|
|
m.URL = a.AttrOr("href", "")
|
|
if img := el.Find(".item-thumb img").First(); img.Length() > 0 {
|
|
m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, imgAttr(img))
|
|
}
|
|
if m.URL != "" && m.Title != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
hasNext := doc.Find("ul.pager a[rel=next]").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
u := fmt.Sprintf("%s/%s?s=%s&page=%d", s.base(), s.cfg.SearchPath, query, page)
|
|
doc, err := s.get(context.Background(), u)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
var mangas []source.SManga
|
|
doc.Find(".page-item-detail").Each(func(_ int, el *goquery.Selection) {
|
|
a := el.Find(".item-summary a").First()
|
|
if a.Length() == 0 {
|
|
return
|
|
}
|
|
m := source.SManga{}
|
|
m.Title = strings.TrimSpace(a.Text())
|
|
m.URL = a.AttrOr("href", "")
|
|
if img := el.Find(".item-thumb img").First(); img.Length() > 0 {
|
|
m.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, imgAttr(img))
|
|
}
|
|
if m.URL != "" && m.Title != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
hasNext := doc.Find("ul.pager a[rel=next]").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}, nil
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
result := source.SManga{URL: manga.URL}
|
|
result.Title = strings.TrimSpace(doc.Find("div.post-title h1").Text())
|
|
if result.Title == "" {
|
|
result.Title = manga.Title
|
|
}
|
|
|
|
statusHeading := s.cfg.StatusHeading
|
|
statusText := strings.TrimSpace(doc.Find("div.summary-heading").FilterFunction(func(_ int, el *goquery.Selection) bool {
|
|
return strings.Contains(strings.ToLower(el.Text()), statusHeading)
|
|
}).Next().Text())
|
|
|
|
ongoingTerms := []string{"ongoing", "đang ra", "on going", "publishing"}
|
|
completedTerms := []string{"completed", "hoàn thành", "truyện full", "complete"}
|
|
statusLower := strings.ToLower(statusText)
|
|
switch {
|
|
case containsAny(statusLower, ongoingTerms):
|
|
result.Status = source.StatusOngoing
|
|
case containsAny(statusLower, completedTerms):
|
|
result.Status = source.StatusCompleted
|
|
default:
|
|
result.Status = source.StatusUnknown
|
|
}
|
|
|
|
authorHeading := s.cfg.AuthorHeading
|
|
result.Author = strings.TrimSpace(doc.Find("div.summary-heading").FilterFunction(func(_ int, el *goquery.Selection) bool {
|
|
return strings.Contains(strings.ToLower(el.Text()), authorHeading)
|
|
}).Next().Text())
|
|
|
|
result.Description = strings.TrimSpace(doc.Find("div.summary__content").Text())
|
|
|
|
var genres []string
|
|
doc.Find("div.genres-content a[rel=tag]").Each(func(_ int, a *goquery.Selection) {
|
|
if t := strings.TrimSpace(a.Text()); t != "" {
|
|
genres = append(genres, t)
|
|
}
|
|
})
|
|
result.Genre = strings.Join(genres, ", ")
|
|
|
|
if img := doc.Find("div.summary_image img").First(); img.Length() > 0 {
|
|
result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, imgAttr(img))
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var chapters []source.SChapter
|
|
doc.Find("li.wp-manga-chapter").Each(func(_ int, el *goquery.Selection) {
|
|
a := el.Find("a").First()
|
|
if a.Length() == 0 {
|
|
return
|
|
}
|
|
ch := source.SChapter{
|
|
URL: a.AttrOr("href", ""),
|
|
Name: strings.TrimSpace(a.Text()),
|
|
}
|
|
if dateEl := el.Find("span.chapter-release-date").First(); dateEl.Length() > 0 {
|
|
ch.DateUpload = util.ParseRelativeDate(strings.TrimSpace(dateEl.Text()))
|
|
}
|
|
if ch.URL != "" {
|
|
chapters = append(chapters, ch)
|
|
}
|
|
})
|
|
return chapters, nil
|
|
}
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var pages []source.Page
|
|
doc.Find("div.page-break img").Each(func(i int, img *goquery.Selection) {
|
|
u := imgAttr(img)
|
|
if u != "" {
|
|
pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(s.cfg.BaseURL, u)})
|
|
}
|
|
})
|
|
return pages, nil
|
|
}
|
|
|
|
func containsAny(s string, terms []string) bool {
|
|
for _, t := range terms {
|
|
if strings.Contains(s, t) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
|
|
func (s *Source) GetFilterList() []source.Filter { return nil }
|