ca609ccae7
Ports bases from previous session: util (shared helpers), bakkin, fmreader, foolslide, gigaviewer, gmanga, grouple, guya, heancms, hentaihand, kemono, madara, madtheme, mangadventure, mangahub, mangathemesia, mangaworld, mmrcms, senkuro, wpcomics.
194 lines
6.1 KiB
Go
194 lines
6.1 KiB
Go
// Package wpcomics implements the WPComics base.
|
|
// GET {base}/{popularPath}?page={n}; HTML scraping.
|
|
package wpcomics
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient"
|
|
"goyomi/internal/source"
|
|
"goyomi/sources/base/util"
|
|
)
|
|
|
|
type Config struct {
|
|
Name string
|
|
BaseURL string
|
|
Lang string
|
|
PopularPath string // default "hot"
|
|
}
|
|
|
|
type Source struct {
|
|
cfg Config
|
|
client *httpclient.Client
|
|
id int64
|
|
}
|
|
|
|
func New(cfg Config) *Source {
|
|
if cfg.PopularPath == "" {
|
|
cfg.PopularPath = "hot"
|
|
}
|
|
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
|
|
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.cfg.Name }
|
|
func (s *Source) Lang() string { return s.cfg.Lang }
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("wpcomics: HTTP %d", resp.StatusCode)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage {
|
|
var mangas []source.SManga
|
|
doc.Find("div.items div.item, div.comic-item").Each(func(_ int, el *goquery.Selection) {
|
|
m := source.SManga{}
|
|
el.Find("h3 a, a.cover").First().Each(func(_ int, a *goquery.Selection) {
|
|
if href, ok := a.Attr("href"); ok {
|
|
m.URL = stripDomain(href, s.cfg.BaseURL)
|
|
}
|
|
m.Title = strings.TrimSpace(a.Text())
|
|
})
|
|
el.Find("img").First().Each(func(_ int, img *goquery.Selection) {
|
|
m.ThumbnailURL = imgAttr(img, s.cfg.BaseURL)
|
|
})
|
|
if m.URL != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
hasNext := doc.Find(".pagination .next, a[rel=next]").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
u := fmt.Sprintf("%s/%s", strings.TrimRight(s.cfg.BaseURL, "/"), s.cfg.PopularPath)
|
|
if page > 1 {
|
|
u += fmt.Sprintf("?page=%d", page)
|
|
}
|
|
doc, err := s.get(context.Background(), u)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
u := fmt.Sprintf("%s/new?page=%d", strings.TrimRight(s.cfg.BaseURL, "/"), page)
|
|
doc, err := s.get(context.Background(), u)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
u := fmt.Sprintf("%s/tim-kiem?q=%s&page=%d", strings.TrimRight(s.cfg.BaseURL, "/"), query, page)
|
|
doc, err := s.get(context.Background(), u)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
result := source.SManga{URL: manga.URL}
|
|
doc.Find("h1").First().Each(func(_ int, el *goquery.Selection) { result.Title = strings.TrimSpace(el.Text()) })
|
|
doc.Find("li.author p.col-xs-8").First().Each(func(_ int, el *goquery.Selection) { result.Author = strings.TrimSpace(el.Text()) })
|
|
doc.Find("li.status p.col-xs-8").First().Each(func(_ int, el *goquery.Selection) { result.Status = util.StatusFromString(el.Text()) })
|
|
doc.Find("div.detail-content p").First().Each(func(_ int, el *goquery.Selection) { result.Description = strings.TrimSpace(el.Text()) })
|
|
doc.Find(".cover img, img.cover").First().Each(func(_ int, img *goquery.Selection) { result.ThumbnailURL = imgAttr(img, s.cfg.BaseURL) })
|
|
var genres []string
|
|
doc.Find("li.kind a").Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
genres = append(genres, t)
|
|
}
|
|
})
|
|
result.Genre = strings.Join(genres, ", ")
|
|
return result, nil
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var chapters []source.SChapter
|
|
doc.Find("div.list-chapter li.row:not(.heading)").Each(func(_ int, el *goquery.Selection) {
|
|
ch := source.SChapter{}
|
|
el.Find("a").First().Each(func(_ int, a *goquery.Selection) {
|
|
if href, ok := a.Attr("href"); ok {
|
|
ch.URL = stripDomain(href, s.cfg.BaseURL)
|
|
}
|
|
ch.Name = strings.TrimSpace(a.Text())
|
|
})
|
|
el.Find("div.col-xs-4").First().Each(func(_ int, e *goquery.Selection) {
|
|
ch.DateUpload = util.ParseRelativeDate(e.Text())
|
|
})
|
|
if ch.URL != "" {
|
|
chapters = append(chapters, ch)
|
|
}
|
|
})
|
|
return chapters, nil
|
|
}
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
rawURL := util.AbsURL(s.cfg.BaseURL, chapter.URL)
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var pages []source.Page
|
|
doc.Find(".reading-detail img, .page-chapter img").Each(func(i int, img *goquery.Selection) {
|
|
if u := imgAttr(img, s.cfg.BaseURL); u != "" {
|
|
pages = append(pages, source.Page{Index: i, URL: rawURL, ImageURL: u})
|
|
}
|
|
})
|
|
return pages, nil
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
|
|
func (s *Source) GetFilterList() []source.Filter { return nil }
|
|
|
|
func stripDomain(href, baseURL string) string {
|
|
if !strings.HasPrefix(href, "http") {
|
|
return href
|
|
}
|
|
base := strings.TrimRight(baseURL, "/")
|
|
if strings.HasPrefix(href, base) {
|
|
return href[len(base):]
|
|
}
|
|
return href
|
|
}
|
|
|
|
func imgAttr(img *goquery.Selection, baseURL string) string {
|
|
for _, attr := range []string{"data-lazy-src", "data-src", "data-original", "src"} {
|
|
if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") {
|
|
return util.AbsURL(baseURL, v)
|
|
}
|
|
}
|
|
return ""
|
|
}
|