// Package fuzzydoodle implements the FuzzyDoodle manga base. // https://github.com/jhin1m/fuzzy-doodle — HTML scraping, CF-protected. package fuzzydoodle import ( "context" "fmt" "net/http" "strings" "github.com/PuerkitoBio/goquery" "goyomi/internal/httpclient/flare" "goyomi/internal/source" "goyomi/sources/base/util" ) type Config struct { Name string BaseURL string Lang string } type Source struct { cfg Config client *flare.Client id int64 } func New(cfg Config) *Source { c := flare.NewClient(flare.WithRateLimit(1, 2)) return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)} } func (s *Source) ID() int64 { return s.id } func (s *Source) Name() string { return s.cfg.Name } func (s *Source) Lang() string { return s.cfg.Lang } func (s *Source) SupportsLatest() bool { return true } func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") } func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) if err != nil { return nil, err } req.Header.Set("Referer", s.cfg.BaseURL+"/") resp, err := s.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("fuzzydoodle: HTTP %d", resp.StatusCode) } return goquery.NewDocumentFromReader(resp.Body) } func imgAttr(img *goquery.Selection, baseURL string) string { for _, attr := range []string{"data-lazy-src", "data-src", "data-cfsrc", "src"} { if v, ok := img.Attr(attr); ok && v != "" && !strings.HasPrefix(v, "data:") { return util.AbsURL(baseURL, v) } } // srcset: take first entry if v, ok := img.Attr("srcset"); ok && v != "" { return util.AbsURL(baseURL, strings.Fields(v)[0]) } return "" } func mangaFromElement(el *goquery.Selection, baseURL string) source.SManga { m := source.SManga{} el.Find("a").First().Each(func(_ int, a *goquery.Selection) { m.URL, _ = a.Attr("href") }) m.Title = strings.TrimSpace(el.Find("h2.text-sm").Text()) if m.Title == "" { m.Title = strings.TrimSpace(el.Find("h2, h3").First().Text()) } el.Find("img").First().Each(func(_ int, img *goquery.Selection) { m.ThumbnailURL = imgAttr(img, baseURL) }) return m } func hasNextPage(doc *goquery.Document) bool { return doc.Find("ul.pagination > li:last-child:not(.pagination-disabled)").Length() > 0 } func (s *Source) parsePage(doc *goquery.Document) source.MangasPage { var mangas []source.SManga doc.Find("div#card-real").Each(func(_ int, el *goquery.Selection) { m := mangaFromElement(el, s.cfg.BaseURL) if m.URL != "" { mangas = append(mangas, m) } }) return source.MangasPage{Mangas: mangas, HasNextPage: hasNextPage(doc)} } func (s *Source) GetPopularManga(page int) (source.MangasPage, error) { u := fmt.Sprintf("%s/manga?page=%d", s.base(), page) doc, err := s.get(context.Background(), u) if err != nil { return source.MangasPage{}, err } return s.parsePage(doc), nil } func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) { u := fmt.Sprintf("%s/latest?page=%d", s.base(), page) doc, err := s.get(context.Background(), u) if err != nil { return source.MangasPage{}, err } return s.parsePage(doc), nil } func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) { u := fmt.Sprintf("%s/manga?title=%s&page=%d", s.base(), query, page) doc, err := s.get(context.Background(), u) if err != nil { return source.MangasPage{}, err } return s.parsePage(doc), nil } func getInfo(doc *goquery.Selection, label string) string { // p:has(span:containsOwn({label})) span.capitalize var result string doc.Find("p").Each(func(_ int, p *goquery.Selection) { if strings.Contains(p.Find("span").First().Text(), label) { result = strings.TrimSpace(p.Find("span.capitalize").Text()) } }) return result } func parseStatus(s string) int { switch strings.ToLower(strings.TrimSpace(s)) { case "ongoing", "en cours": return source.StatusOngoing case "completed", "completed (season)", "terminé": return source.StatusCompleted case "hiatus", "on hiatus": return source.StatusHiatus default: return source.StatusUnknown } } func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL)) if err != nil { return manga, err } result := source.SManga{URL: manga.URL} result.Title = strings.TrimSpace(doc.Find("h1").First().Text()) if result.Title == "" { result.Title = manga.Title } result.ThumbnailURL = imgAttr(doc.Find("div.relative img").First(), s.cfg.BaseURL) var genres []string doc.Find("div.flex > a.inline-block").Each(func(_ int, el *goquery.Selection) { if t := strings.TrimSpace(el.Text()); t != "" { genres = append(genres, t) } }) // description var descParts []string doc.Find("div:has(> p#description)").First().Each(func(_ int, el *goquery.Selection) { descParts = append(descParts, strings.TrimSpace(el.Text())) }) result.Description = strings.Join(descParts, "\n\n") if t := getInfo(doc.Selection, "Status"); t != "" || getInfo(doc.Selection, "Statut") != "" { st := t if st == "" { st = getInfo(doc.Selection, "Statut") } result.Status = parseStatus(st) } if a := getInfo(doc.Selection, "Artist"); a != "" { result.Artist = a } else if a := getInfo(doc.Selection, "Artiste"); a != "" { result.Artist = a } if a := getInfo(doc.Selection, "Author"); a != "" { result.Author = a } else if a := getInfo(doc.Selection, "Auteur"); a != "" { result.Author = a } if t := getInfo(doc.Selection, "Type"); t != "" { genres = append([]string{t}, genres...) } result.Genre = strings.Join(genres, ", ") return result, nil } func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) { mangaURL := util.AbsURL(s.cfg.BaseURL, manga.URL) var chapters []source.SChapter page := 1 for { u := mangaURL if page > 1 { u = fmt.Sprintf("%s?page=%d", mangaURL, page) } doc, err := s.get(context.Background(), u) if err != nil { if page == 1 { return nil, err } break } doc.Find("div#chapters-list > a[href]").Each(func(_ int, a *goquery.Selection) { ch := source.SChapter{} ch.URL, _ = a.Attr("href") ch.Name = strings.TrimSpace(a.Find("#item-title, span").First().Text()) if ch.Name == "" { ch.Name = strings.TrimSpace(a.Text()) } // date a.Find("p, span").Each(func(_ int, el *goquery.Selection) { if t := strings.TrimSpace(el.Text()); t != "" && ch.DateUpload == 0 { ch.DateUpload = util.ParseRelativeDate(t) } }) if ch.URL != "" { chapters = append(chapters, ch) } }) if doc.Find("ul.pagination > li:last-child:not(.pagination-disabled)").Length() == 0 { break } page++ } return chapters, nil } func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) { doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL)) if err != nil { return nil, err } var pages []source.Page doc.Find("div#chapter-container > img").Each(func(i int, img *goquery.Selection) { if u := imgAttr(img, s.cfg.BaseURL); u != "" { pages = append(pages, source.Page{Index: i, ImageURL: u}) } }) return pages, nil } func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil } func (s *Source) GetFilterList() []source.Filter { return nil }