316ae2f9db
Add 8 all/ sources (7 Masonry, 1 Madara) and 38 en/ sources spanning Madara, MangaThemesia, MadTheme, Keyoapp, and Guya bases, plus 8 earlier all/ standalone sources from the previous session (ahottie, akuma, allporncomicsco, asmhentai, baobua, beauty3600000, buondua, comicfury, comicgrowl, comicklive, comicsvalley, comikey, commitstrip, coomer). Also annotates phase4-standalone.md with base-class tags for 43 additional unimplemented en/ sources identified in a full scan.
367 lines
11 KiB
Go
367 lines
11 KiB
Go
// Package comicfury implements the Comic Fury webcomic hosting source.
|
|
// Multi-language factory. Search-based popular (sort=popularity) and latest (sort=lastupdate).
|
|
// Chapter list scraped from /read/{comicUrl}/archive; supports hierarchical chapters-in-chapters.
|
|
// FlareSolverr used (matches Kotlin cloudflareClient).
|
|
package comicfury
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient/flare"
|
|
"goyomi/internal/registry"
|
|
"goyomi/internal/source"
|
|
)
|
|
|
|
const siteURL = "https://comicfury.com"
|
|
|
|
type Source struct {
|
|
name string
|
|
lang string
|
|
siteLang string // used in search query
|
|
client *flare.Client
|
|
id int64
|
|
}
|
|
|
|
func newSource(name, lang, siteLang string) *Source {
|
|
return &Source{
|
|
name: name,
|
|
lang: lang,
|
|
siteLang: siteLang,
|
|
client: flare.NewClient(flare.WithRateLimit(1, 2)),
|
|
id: source.GenerateSourceID(name, lang),
|
|
}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return s.name }
|
|
func (s *Source) Lang() string { return s.lang }
|
|
func (s *Source) SupportsLatest() bool { return true }
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Referer", siteURL+"/")
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("comicfury: HTTP %d", resp.StatusCode)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func (s *Source) searchURL(page int, query, sort string, filters []source.Filter) string {
|
|
params := url.Values{
|
|
"query": {query},
|
|
"page": {strconv.Itoa(page)},
|
|
"language": {s.siteLang},
|
|
"sort": {sort},
|
|
}
|
|
for _, f := range filters {
|
|
switch sf := f.(type) {
|
|
case *source.TextFilter:
|
|
if sf.FilterName == "Tags" && sf.Text != "" {
|
|
params.Set("tags", sf.Text)
|
|
}
|
|
case *source.SelectFilter:
|
|
switch sf.FilterName {
|
|
case "Sort By":
|
|
params.Set("sort", strconv.Itoa(sf.Selected))
|
|
case "Last Updated":
|
|
params.Set("lastupdate", strconv.Itoa(sf.Selected))
|
|
case "Violence":
|
|
params.Set("fv", strconv.Itoa(sf.Selected))
|
|
case "Frontal Nudity":
|
|
params.Set("fn", strconv.Itoa(sf.Selected))
|
|
case "Strong Language":
|
|
params.Set("fl", strconv.Itoa(sf.Selected))
|
|
case "Sexual Content":
|
|
params.Set("fs", strconv.Itoa(sf.Selected))
|
|
}
|
|
case *source.CheckboxFilter:
|
|
if sf.FilterName == "Comic Completed" {
|
|
completed := 1
|
|
if sf.State {
|
|
completed = 0
|
|
}
|
|
params.Set("completed", strconv.Itoa(completed))
|
|
}
|
|
}
|
|
}
|
|
return siteURL + "/search.php?" + params.Encode()
|
|
}
|
|
|
|
func (s *Source) parseSearch(doc *goquery.Document) source.MangasPage {
|
|
var mangas []source.SManga
|
|
doc.Find("div.webcomic-result").Each(func(_ int, el *goquery.Selection) {
|
|
link := el.Find("div.webcomic-result-avatar a").First()
|
|
href := link.AttrOr("href", "")
|
|
title := el.Find("div.webcomic-result-title").First().AttrOr("title", "")
|
|
thumb := el.Find("div.webcomic-result-avatar a img").First().AttrOr("src", "")
|
|
if href == "" || title == "" {
|
|
return
|
|
}
|
|
mangas = append(mangas, source.SManga{URL: href, Title: title, ThumbnailURL: thumb})
|
|
})
|
|
hasNext := doc.Find("div.search-next-page").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.searchURL(page, "", "1", nil))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseSearch(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.searchURL(page, "", "2", nil))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseSearch(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), s.searchURL(page, query, "0", filters))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseSearch(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
rawURL := manga.URL
|
|
if !strings.HasPrefix(rawURL, "http") {
|
|
rawURL = siteURL + rawURL
|
|
}
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
result := source.SManga{URL: manga.URL}
|
|
desDiv := doc.Find("div.description-tags")
|
|
result.Description = strings.TrimSpace(desDiv.Parent().Clone().Find("*").Remove().End().Text())
|
|
var genres []string
|
|
desDiv.Children().Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
genres = append(genres, t)
|
|
}
|
|
})
|
|
result.Genre = strings.Join(genres, ", ")
|
|
var authors []string
|
|
doc.Find("a.authorname").Each(func(_ int, el *goquery.Selection) {
|
|
if t := strings.TrimSpace(el.Text()); t != "" {
|
|
authors = append(authors, t)
|
|
}
|
|
})
|
|
result.Author = strings.Join(authors, ", ")
|
|
if result.Title == "" {
|
|
result.Title = manga.Title
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
// comicURL extracts the comic slug from a manga URL like /comicprofile.php?url=slug.
|
|
func comicURL(mangaURL string) string {
|
|
parsed, err := url.Parse(mangaURL)
|
|
if err != nil {
|
|
return mangaURL
|
|
}
|
|
if u := parsed.Query().Get("url"); u != "" {
|
|
return u
|
|
}
|
|
// Fallback: use last path segment.
|
|
segments := strings.Split(strings.Trim(parsed.Path, "/"), "/")
|
|
if len(segments) > 0 {
|
|
return segments[len(segments)-1]
|
|
}
|
|
return mangaURL
|
|
}
|
|
|
|
// parseDate handles formats like "4th March 2023 12:00 PM", "4 March 2023", "March 4 2023".
|
|
var dateOrdinalRe = regexp.MustCompile(`(?i)(\d+)(st|nd|rd|th)`)
|
|
|
|
func parseDate(s string) int64 {
|
|
s = dateOrdinalRe.ReplaceAllString(s, "$1")
|
|
s = strings.ReplaceAll(s, ",", "")
|
|
s = strings.TrimSpace(s)
|
|
formats := []string{
|
|
"2 January 2006 3:04 PM",
|
|
"2 January 2006",
|
|
"January 2 2006",
|
|
}
|
|
for _, f := range formats {
|
|
if t, err := time.Parse(f, s); err == nil {
|
|
return t.UnixMilli()
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// collectChapters follows pagination from a given archive page.
|
|
func (s *Source) collectChapters(startDoc *goquery.Document) ([]source.SChapter, error) {
|
|
var chapters []source.SChapter
|
|
doc := startDoc
|
|
for {
|
|
doc.Find("a:has(div.archive-comic)").Each(func(_ int, a *goquery.Selection) {
|
|
href := a.AttrOr("href", "")
|
|
if href == "" {
|
|
return
|
|
}
|
|
parsed, _ := url.Parse(href)
|
|
chURL := parsed.Path
|
|
if parsed.RawQuery != "" {
|
|
chURL += "?" + parsed.RawQuery
|
|
}
|
|
name := strings.TrimSpace(a.Find(".archive-comic-title").Text())
|
|
if name == "" {
|
|
name = "Chapter"
|
|
}
|
|
date := parseDate(strings.TrimSpace(a.Find(".archive-comic-date").Text()))
|
|
chapters = append(chapters, source.SChapter{URL: chURL, Name: name, DateUpload: date})
|
|
})
|
|
nextPage := doc.Find("span.vfpagecurrent + a.vfpage").First()
|
|
nextHref := nextPage.AttrOr("href", "")
|
|
if nextHref == "" {
|
|
break
|
|
}
|
|
nextURL := nextHref
|
|
if !strings.HasPrefix(nextURL, "http") {
|
|
nextURL = siteURL + nextURL
|
|
}
|
|
next, err := s.get(context.Background(), nextURL)
|
|
if err != nil {
|
|
break
|
|
}
|
|
doc = next
|
|
}
|
|
return chapters, nil
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
slug := comicURL(manga.URL)
|
|
archiveURL := fmt.Sprintf("%s/read/%s/archive", siteURL, slug)
|
|
doc, err := s.get(context.Background(), archiveURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var chapters []source.SChapter
|
|
archiveLinks := doc.Find("a:has(div.archive-chapter)")
|
|
if archiveLinks.Length() > 0 {
|
|
// Has parent chapters — fetch each sub-archive.
|
|
var fetchErr error
|
|
archiveLinks.Each(func(_ int, a *goquery.Selection) {
|
|
if fetchErr != nil {
|
|
return
|
|
}
|
|
href := a.AttrOr("href", "")
|
|
if href == "" {
|
|
return
|
|
}
|
|
if !strings.HasPrefix(href, "http") {
|
|
href = siteURL + href
|
|
}
|
|
subDoc, err := s.get(context.Background(), href)
|
|
if err != nil {
|
|
fetchErr = err
|
|
return
|
|
}
|
|
sub, err := s.collectChapters(subDoc)
|
|
if err != nil {
|
|
fetchErr = err
|
|
return
|
|
}
|
|
chapters = append(chapters, sub...)
|
|
})
|
|
if fetchErr != nil {
|
|
return nil, fetchErr
|
|
}
|
|
} else {
|
|
chapters, err = s.collectChapters(doc)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
// Number and reverse (oldest first → chapter_number ascending).
|
|
for i := range chapters {
|
|
chapters[i].ChapterNumber = float32(i)
|
|
}
|
|
// Reverse so newest is first in list.
|
|
for i, j := 0, len(chapters)-1; i < j; i, j = i+1, j-1 {
|
|
chapters[i], chapters[j] = chapters[j], chapters[i]
|
|
}
|
|
return chapters, nil
|
|
}
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
rawURL := chapter.URL
|
|
if !strings.HasPrefix(rawURL, "http") {
|
|
rawURL = siteURL + rawURL
|
|
}
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var pages []source.Page
|
|
doc.Find("div.is--comic-page div.is--image-segment div img").Each(func(i int, img *goquery.Selection) {
|
|
if src := img.AttrOr("src", ""); src != "" {
|
|
pages = append(pages, source.Page{Index: i, URL: rawURL, ImageURL: src})
|
|
}
|
|
})
|
|
return pages, nil
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
|
|
|
|
func (s *Source) GetFilterList() []source.Filter {
|
|
return []source.Filter{
|
|
&source.TextFilter{FilterName: "Tags"},
|
|
&source.SelectFilter{FilterName: "Sort By", Values: []string{"Relevance", "Popularity", "Last Update"}},
|
|
&source.SelectFilter{FilterName: "Last Updated", Values: []string{"All Time", "This Week", "This Month", "This Year", "Completed Only"}},
|
|
&source.CheckboxFilter{FilterName: "Comic Completed"},
|
|
&source.SelectFilter{FilterName: "Violence", Values: []string{"None / Minimal", "Violent Content", "Gore / Graphic"}, Selected: 2},
|
|
&source.SelectFilter{FilterName: "Frontal Nudity", Values: []string{"None", "Occasional", "Frequent"}, Selected: 2},
|
|
&source.SelectFilter{FilterName: "Strong Language", Values: []string{"None", "Occasional", "Frequent"}, Selected: 2},
|
|
&source.SelectFilter{FilterName: "Sexual Content", Values: []string{"No Sexual Content", "Sexual Situations", "Strong Sexual Themes"}, Selected: 2},
|
|
}
|
|
}
|
|
|
|
func init() {
|
|
instances := []struct{ name, lang, siteLang string }{
|
|
{"Comic Fury", "all", "all"},
|
|
{"Comic Fury", "en", "en"},
|
|
{"Comic Fury", "es", "es"},
|
|
{"Comic Fury", "pt-BR", "pt"},
|
|
{"Comic Fury", "de", "de"},
|
|
{"Comic Fury", "fr", "fr"},
|
|
{"Comic Fury", "it", "it"},
|
|
{"Comic Fury", "pl", "pl"},
|
|
{"Comic Fury", "ja", "ja"},
|
|
{"Comic Fury", "zh", "zh"},
|
|
{"Comic Fury", "ru", "ru"},
|
|
{"Comic Fury", "fi", "fi"},
|
|
{"Comic Fury", "other", "other"},
|
|
{"Comic Fury (No Text)", "other", "notext"},
|
|
}
|
|
for _, inst := range instances {
|
|
registry.Register(newSource(inst.name, inst.lang, inst.siteLang))
|
|
}
|
|
}
|