229 lines
6.5 KiB
Go
Executable File
229 lines
6.5 KiB
Go
Executable File
// Package hotcomics implements the HotComics manga base.
|
|
// HTML scraping; popular: GET {base}/en; requires cookie hc_vfs=Y.
|
|
package hotcomics
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient"
|
|
"goyomi/internal/source"
|
|
"goyomi/sources/base/util"
|
|
)
|
|
|
|
type Config struct {
|
|
Name string
|
|
BaseURL string
|
|
Lang string
|
|
}
|
|
|
|
type Source struct {
|
|
cfg Config
|
|
client *httpclient.Client
|
|
id int64
|
|
}
|
|
|
|
func New(cfg Config) *Source {
|
|
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
|
|
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.cfg.Name }
|
|
func (s *Source) Lang() string { return s.cfg.Lang }
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
|
// Inject required cookie
|
|
req.AddCookie(&http.Cookie{Name: "hc_vfs", Value: "Y"})
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("hotcomics: HTTP %d", resp.StatusCode)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func imgAttr(img *goquery.Selection) string {
|
|
if v, ok := img.Attr("data-src"); ok && v != "" {
|
|
return v
|
|
}
|
|
v, _ := img.Attr("src")
|
|
return v
|
|
}
|
|
|
|
func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage {
|
|
var mangas []source.SManga
|
|
seen := map[string]bool{}
|
|
doc.Find("li[itemtype*=ComicSeries]:not(.no-comic) > a").Each(func(_ int, a *goquery.Selection) {
|
|
m := source.SManga{}
|
|
m.URL, _ = a.Attr("href")
|
|
if m.URL == "" || seen[m.URL] {
|
|
return
|
|
}
|
|
seen[m.URL] = true
|
|
a.Find("div.visual img").First().Each(func(_ int, img *goquery.Selection) {
|
|
m.ThumbnailURL = imgAttr(img)
|
|
})
|
|
m.Title = strings.TrimSpace(a.Find("div.main-text > h4.title").Text())
|
|
if m.Title != "" {
|
|
mangas = append(mangas, m)
|
|
}
|
|
})
|
|
hasNext := doc.Find("div.pagination a.vnext:not(.disabled)").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.base()+"/en")
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.base()+"/en/new")
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
var u string
|
|
if query != "" {
|
|
u = fmt.Sprintf("%s/en/search?keyword=%s", s.base(), query)
|
|
} else {
|
|
u = fmt.Sprintf("%s/en?page=%d", s.base(), page)
|
|
}
|
|
doc, err := s.get(context.Background(), u)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangaList(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
result := source.SManga{URL: manga.URL}
|
|
result.Title = strings.TrimSpace(doc.Find("h2.episode-title").Text())
|
|
if result.Title == "" {
|
|
result.Title = manga.Title
|
|
}
|
|
typeBox := doc.Find("p.type_box")
|
|
result.Author = strings.TrimSpace(
|
|
strings.TrimPrefix(typeBox.Find("span.writer").Text(), "ⓒ"))
|
|
var genres []string
|
|
typeBox.Find("span.type").First().Each(func(_ int, el *goquery.Selection) {
|
|
for _, g := range strings.Split(el.Text(), "/") {
|
|
if g = strings.TrimSpace(g); g != "" {
|
|
genres = append(genres, g)
|
|
}
|
|
}
|
|
})
|
|
result.Genre = strings.Join(genres, ", ")
|
|
switch typeBox.Find("span.date").Text() {
|
|
case "End", "Ende":
|
|
result.Status = source.StatusCompleted
|
|
case "":
|
|
result.Status = source.StatusUnknown
|
|
default:
|
|
result.Status = source.StatusOngoing
|
|
}
|
|
var descParts []string
|
|
doc.Find("div.episode-contents header").First().Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
descParts = append(descParts, t)
|
|
}
|
|
})
|
|
doc.Find("div.title_content > h2:not(.episode-title)").First().Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
descParts = append(descParts, t)
|
|
}
|
|
})
|
|
result.Description = strings.Join(descParts, "\n\n")
|
|
return result, nil
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var chapters []source.SChapter
|
|
doc.Find("#tab-chapter a").Each(func(_ int, a *goquery.Selection) {
|
|
onclick := a.AttrOr("onclick", "")
|
|
// onclick="popupLogin('/chapter/url')"
|
|
u := strings.TrimSuffix(strings.TrimPrefix(
|
|
strings.TrimPrefix(onclick, "popupLogin('"), "popupLogin(\""), "'")
|
|
u = strings.TrimSuffix(u, "\")")
|
|
u = strings.TrimSuffix(u, "')")
|
|
if u == "" || u == onclick {
|
|
return
|
|
}
|
|
name := strings.TrimSpace(a.Find(".cell-num").Text())
|
|
dateStr := strings.TrimSpace(a.Find(".cell-time").Text())
|
|
chapters = append(chapters, source.SChapter{
|
|
URL: u,
|
|
Name: name,
|
|
DateUpload: parseDate(dateStr),
|
|
})
|
|
})
|
|
// reverse: oldest first
|
|
for i, j := 0, len(chapters)-1; i < j; i, j = i+1, j-1 {
|
|
chapters[i], chapters[j] = chapters[j], chapters[i]
|
|
}
|
|
return chapters, nil
|
|
}
|
|
|
|
func parseDate(s string) int64 {
|
|
if s == "" {
|
|
return 0
|
|
}
|
|
t, err := time.Parse("Jan 02, 2006", s)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return t.UnixMilli()
|
|
}
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var pages []source.Page
|
|
doc.Find("#viewer-img img").Each(func(i int, img *goquery.Selection) {
|
|
u := imgAttr(img)
|
|
if u != "" {
|
|
if !strings.HasPrefix(u, "http") {
|
|
u = util.AbsURL(s.cfg.BaseURL, u)
|
|
}
|
|
pages = append(pages, source.Page{Index: i, ImageURL: u})
|
|
}
|
|
})
|
|
return pages, nil
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
|
|
func (s *Source) GetFilterList() []source.Filter { return nil }
|