// Package wpcomics implements the WPComics base. // GET {base}/{popularPath}?page={n}; HTML scraping. package wpcomics import ( "context" "fmt" "net/http" "strings" "github.com/PuerkitoBio/goquery" "goyomi/internal/httpclient/flare" "goyomi/internal/source" "goyomi/sources/base/util" ) type Config struct { Name string BaseURL string Lang string PopularPath string // default "hot" } type Source struct { cfg Config client *flare.Client id int64 } func New(cfg Config) *Source { if cfg.PopularPath == "" { cfg.PopularPath = "hot" } c := flare.NewClient(flare.WithRateLimit(1, 2)) return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} } func (s *Source) ID() int64 { return s.id } func (s *Source) Name() string { return s.cfg.Name } func (s *Source) Lang() string { return s.cfg.Lang } func (s *Source) SupportsLatest() bool { return true } func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { resp, err := s.client.Get(ctx, rawURL) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("wpcomics: HTTP %d", resp.StatusCode) } return goquery.NewDocumentFromReader(resp.Body) } func (s *Source) parseMangaList(doc *goquery.Document) source.MangasPage { var mangas []source.SManga doc.Find("div.items div.item, div.comic-item").Each(func(_ int, el *goquery.Selection) { m := source.SManga{} el.Find("h3 a, a.cover").First().Each(func(_ int, a *goquery.Selection) { if href, ok := a.Attr("href"); ok { m.URL = stripDomain(href, s.cfg.BaseURL) } m.Title = strings.TrimSpace(a.Text()) }) el.Find("img").First().Each(func(_ int, img *goquery.Selection) { m.ThumbnailURL = imgAttr(img, s.cfg.BaseURL) }) if m.URL != "" { mangas = append(mangas, m) } }) hasNext := doc.Find(".pagination .next, a[rel=next]").Length() > 0 return source.MangasPage{Mangas: mangas, HasNextPage: hasNext} } func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { u := fmt.Sprintf("%s/%s", strings.TrimRight(s.cfg.BaseURL, "/"), s.cfg.PopularPath) if page > 1 { u += fmt.Sprintf("?page=%d", page) } doc, err := s.get(context.Background(), u) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc), nil } func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { u := fmt.Sprintf("%s/new?page=%d", strings.TrimRight(s.cfg.BaseURL, "/"), page) doc, err := s.get(context.Background(), u) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc), nil } func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { u := fmt.Sprintf("%s/tim-kiem?q=%s&page=%d", strings.TrimRight(s.cfg.BaseURL, "/"), query, page) doc, err := s.get(context.Background(), u) if err != nil { return source.MangasPage{}, err } return s.parseMangaList(doc), nil } func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) if err != nil { return manga, err } result := source.SManga{URL: manga.URL} doc.Find("h1").First().Each(func(_ int, el *goquery.Selection) { result.Title = strings.TrimSpace(el.Text()) }) doc.Find("li.author p.col-xs-8").First().Each(func(_ int, el *goquery.Selection) { result.Author = strings.TrimSpace(el.Text()) }) doc.Find("li.status p.col-xs-8").First().Each(func(_ int, el *goquery.Selection) { result.Status = util.StatusFromString(el.Text()) }) doc.Find("div.detail-content p").First().Each(func(_ int, el *goquery.Selection) { result.Description = strings.TrimSpace(el.Text()) }) doc.Find(".cover img, img.cover").First().Each(func(_ int, img *goquery.Selection) { result.ThumbnailURL = imgAttr(img, s.cfg.BaseURL) }) var genres []string doc.Find("li.kind a").Each(func(_ int, el *goquery.Selection) { if t := strings.TrimSpace(el.Text()); t != "" { genres = append(genres, t) } }) result.Genre = strings.Join(genres, ", ") return result, nil } func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) if err != nil { return nil, err } var chapters []source.SChapter doc.Find("div.list-chapter li.row:not(.heading)").Each(func(_ int, el *goquery.Selection) { ch := source.SChapter{} el.Find("a").First().Each(func(_ int, a *goquery.Selection) { if href, ok := a.Attr("href"); ok { ch.URL = stripDomain(href, s.cfg.BaseURL) } ch.Name = strings.TrimSpace(a.Text()) }) el.Find("div.col-xs-4").First().Each(func(_ int, e *goquery.Selection) { ch.DateUpload = util.ParseRelativeDate(e.Text()) }) if ch.URL != "" { chapters = append(chapters, ch) } }) return chapters, nil } func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { rawURL := util.AbsURL(s.cfg.BaseURL, chapter.URL) doc, err := s.get(context.Background(), rawURL) if err != nil { return nil, err } var pages []source.Page doc.Find(".reading-detail img, .page-chapter img").Each(func(i int, img *goquery.Selection) { if u := imgAttr(img, s.cfg.BaseURL); u != "" { pages = append(pages, source.Page{Index: i, URL: rawURL, ImageURL: u}) } }) return pages, nil } func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } func (s *Source) GetFilterList() []source.Filter { return nil } func stripDomain(href, baseURL string) string { if !strings.HasPrefix(href, "http") { return href } base := strings.TrimRight(baseURL, "/") if strings.HasPrefix(href, base) { return href[len(base):] } return href } func imgAttr(img *goquery.Selection, baseURL string) string { for _, attr := range []string{"data-lazy-src", "data-src", "data-original", "src"} { if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") { return util.AbsURL(baseURL, v) } } return "" }