feat(sourcetest): add -v flag with verbose manga list output

When -v is passed, test-sources.sh passes it through to go test -v.
sourcetest.Run uses testing.Verbose() to print the full manga list
from GetPopularManga and GetLatestUpdates, showing title + URL.
This commit is contained in:
achmad
2026-05-14 13:23:29 +07:00
parent 26063e097b
commit 44b50937d5
7 changed files with 371 additions and 245 deletions
+243 -58
View File
@@ -1,11 +1,14 @@
package httpclient
import (
"bytes"
"context"
"fmt"
"io"
"log"
"net/http"
"net/http/cookiejar"
"net/url"
"strconv"
"sync"
"time"
@@ -15,17 +18,58 @@ import (
const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
var verboseLog bool
var (
verboseLog bool
defaultOnce sync.Once
defaultClient *Client
)
func SetVerboseLog(enabled bool) {
verboseLog = enabled
func SetVerboseLog(enabled bool) { verboseLog = enabled }
// DefaultClient returns the shared singleton HTTP client.
// FlareSolverr is auto-configured if the FLARESOLVERR_URL env var is set.
// All sources share the same rate limiter (+ cookie jar) through this client.
func DefaultClient() *Client {
defaultOnce.Do(func() {
defaultClient = newClient()
})
return defaultClient
}
// NewClient creates a standalone client with optional per-source overrides.
// Only create a new client when the source needs different behaviour
// (e.g. a custom rate limit); otherwise use DefaultClient.
func NewClient(opts ...Option) *Client {
c := newClient()
for _, o := range opts {
o(c)
}
return c
}
func newClient() *Client {
jar, _ := cookiejar.New(nil)
c := &Client{
http: &http.Client{Timeout: 30 * time.Second, Jar: jar},
rateLimit: 1,
burst: 1,
userAgent: defaultUserAgent,
limiters: map[string]*rate.Limiter{},
verboseLog: verboseLog,
}
fsClient, err := NewFlareSolverrClient()
if err == nil {
c.fsClient = fsClient
}
return c
}
type Client struct {
http *http.Client
fsClient *FlareSolverrClient
rateLimit float64
burst int
referer string
userAgent string
verboseLog bool
mu sync.Mutex
@@ -45,29 +89,14 @@ func WithTimeout(d time.Duration) Option {
return func(c *Client) { c.http.Timeout = d }
}
func WithReferer(referer string) Option {
return func(c *Client) { c.referer = referer }
func WithUserAgent(ua string) Option {
return func(c *Client) { c.userAgent = ua }
}
func WithVerboseLog(enabled bool) Option {
return func(c *Client) { c.verboseLog = enabled }
}
func NewClient(opts ...Option) *Client {
jar, _ := cookiejar.New(nil)
c := &Client{
http: &http.Client{Timeout: 30 * time.Second, Jar: jar},
rateLimit: 1,
burst: 1,
limiters: map[string]*rate.Limiter{},
verboseLog: verboseLog,
}
for _, o := range opts {
o(c)
}
return c
}
func (c *Client) limiter(host string) *rate.Limiter {
c.mu.Lock()
defer c.mu.Unlock()
@@ -79,23 +108,55 @@ func (c *Client) limiter(host string) *rate.Limiter {
return l
}
// Do tries a direct HTTP request first. If the server returns 403/503 (a
// Cloudflare or DDoS challenge) and FlareSolverr is available, it falls back
// to FlareSolverr raw mode to solve the challenge and return the actual body.
//
// When FlareSolverr is used, the Chrome HTML wrapper is stripped from the
// response so that both JSON and HTML callers receive the real server output.
func (c *Client) Do(req *http.Request) (*http.Response, error) {
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
return nil, err
}
if req.Header.Get("User-Agent") == "" {
req.Header.Set("User-Agent", c.userAgent)
}
// Always route through FlareSolverr when configured. Go's TLS fingerprint
// doesn't match Chrome's, so Cloudflare clearance cookies from FS are
// rejected by Go's net/http — meaning every direct request gets challenged
// again. FS Chrome caches the clearance internally, so subsequent calls
// for the same domain are near-instant.
//
// When FS is not configured, fall back to direct HTTP.
if c.fsClient != nil {
return c.doFS(req, 0)
}
// --- direct-first path (commented out — see TLS fingerprint issue above) ---
// resp, err := c.doDirect(req)
// var directStatus int
// if err == nil {
// directStatus = resp.StatusCode
// if resp.StatusCode != http.StatusForbidden && resp.StatusCode != http.StatusServiceUnavailable {
// return resp, nil
// }
// resp.Body.Close()
// }
// if c.fsClient == nil {
// if err != nil {
// return nil, err
// }
// return nil, fmt.Errorf("HTTP %d (challenge detected but FlareSolverr not configured)", resp.StatusCode)
// }
// return c.doFS(req, directStatus)
return c.doDirect(req)
}
func (c *Client) doDirect(req *http.Request) (*http.Response, error) {
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
return nil, err
}
if c.referer != "" && req.Header.Get("Referer") == "" {
req.Header.Set("Referer", c.referer)
}
if req.Header.Get("User-Agent") == "" {
req.Header.Set("User-Agent", defaultUserAgent)
}
if c.verboseLog {
log.Printf("[httpclient] DIRECT GET %s", req.URL.String())
log.Printf("[httpclient] DIRECT %s %s", req.Method, req.URL.String())
}
const maxRetries = 3
@@ -124,6 +185,156 @@ func (c *Client) doDirect(req *http.Request) (*http.Response, error) {
panic("unreachable")
}
func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, error) {
if c.verboseLog {
log.Printf("[httpclient] FS FALLBACK %s %s", req.Method, req.URL.String())
}
rawURL := req.URL.String()
rawBody, statusCode, fsHeaders, cookies, fsRespURL, err := c.fsClient.GetRaw(req.Context(), rawURL)
if err != nil {
return nil, err
}
// Use the actual response URL from FlareSolverr (follows redirects
// through Chrome) so cookies are associated with the right domain.
respURL := rawURL
if fsRespURL != "" {
respURL = fsRespURL
}
// Feed FlareSolverr cookies into the shared jar so subsequent direct
// requests to the same host skip the challenge.
if len(cookies) > 0 {
if u, uErr := url.Parse(respURL); uErr == nil {
c.http.Jar.SetCookies(u, cookies)
}
}
// When FlareSolverr returns status 200, Chrome rendered the page.
// Check if the body actually contains Cloudflare challenge indicators
// rather than relying on structural heuristics (<pre> wrapper).
if statusCode == 200 {
if isCloudflareChallenge([]byte(rawBody)) {
statusCode = directStatus
}
}
// Build response headers from the actual FS response headers,
// falling back to the request headers for keys not present in the
// FS response (e.g. Content-Type on an empty GET body).
hdr := make(http.Header)
if len(fsHeaders) > 0 {
for k, v := range fsHeaders {
switch val := v.(type) {
case string:
hdr.Set(k, val)
case []any:
for _, sv := range val {
hdr.Add(k, fmt.Sprint(sv))
}
}
}
}
// Ensure Set-Cookie headers from FS cookies are present even if FS
// omitted them from the headers map.
if len(cookies) > 0 {
for _, ck := range cookies {
hdr.Add("Set-Cookie", ck.String())
}
}
// Copy any request headers not present in the FS response (e.g. Host).
for k, v := range req.Header {
if hdr.Get(k) == "" {
hdr[k] = v
}
}
body := stripFSWrapper([]byte(rawBody))
return &http.Response{
StatusCode: statusCode,
Header: hdr,
Body: io.NopCloser(bytes.NewReader(body)),
Request: req,
}, nil
}
// HTTPClient returns the underlying *http.Client (for passing to graphql etc.).
func (c *Client) HTTPClient() *http.Client { return c.http }
// Cookie returns the value of a named cookie stored in the jar for the given
// host (e.g. "mangahub.io"). Returns empty string when the cookie is not found.
func (c *Client) Cookie(name, host string) string {
u := &url.URL{Scheme: "https", Host: host}
for _, ck := range c.http.Jar.Cookies(u) {
if ck.Name == name {
return ck.Value
}
}
return ""
}
// Get is a convenience wrapper around Do. To add custom headers, build the
// request manually and call Do.
func (c *Client) Get(ctx context.Context, urlStr string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
if err != nil {
return nil, err
}
return c.Do(req)
}
// Post is a convenience wrapper around Do.
func (c *Client) Post(ctx context.Context, urlStr string, bodyType string, body io.Reader) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, urlStr, body)
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", bodyType)
return c.Do(req)
}
// isCloudflareChallenge detects whether the response body is a Cloudflare
// challenge page (i.e. FS failed to solve it and Chrome rendered the challenge).
func isCloudflareChallenge(body []byte) bool {
indicators := []string{
"Just a moment...",
"cf_chl_opt",
"challenges.cloudflare.com",
"/cdn-cgi/challenge-platform",
"Enable JavaScript and cookies",
}
for _, ind := range indicators {
if bytes.Contains(body, []byte(ind)) {
return true
}
}
return false
}
// stripFSWrapper removes FlareSolverr's Chrome HTML wrapper.
// FlareSolverr wraps all responses in:
//
// <html><head>...<meta charset...>...</head><body><pre>actual_body</pre></body></html>
//
// If a <pre> tag is found inside the wrapper, its content is returned.
// Otherwise the body is returned unchanged (HTML pages rendered by Chrome).
func stripFSWrapper(body []byte) []byte {
if !bytes.HasPrefix(bytes.TrimSpace(body), []byte("<html")) {
return body
}
preStart := bytes.Index(body, []byte("<pre>"))
if preStart < 0 {
return body
}
preEnd := bytes.LastIndex(body, []byte("</pre>"))
if preEnd <= preStart {
return body
}
return body[preStart+5 : preEnd]
}
func retryAfter(resp *http.Response) time.Duration {
ra := resp.Header.Get("Retry-After")
if ra == "" {
@@ -140,29 +351,3 @@ func retryAfter(resp *http.Response) time.Duration {
}
return 5 * time.Second
}
func (c *Client) Get(ctx context.Context, url string, headers map[string]string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
for k, v := range headers {
req.Header.Set(k, v)
}
return c.Do(req)
}
func (c *Client) Post(ctx context.Context, url string, body io.Reader, contentType string, headers map[string]string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, body)
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", contentType)
for k, v := range headers {
req.Header.Set(k, v)
}
return c.Do(req)
}
// HTTPClient returns the underlying *http.Client (for passing to graphql helper etc.)
func (c *Client) HTTPClient() *http.Client { return c.http }
+22 -175
View File
@@ -1,184 +1,31 @@
// Package flare provides backward-compatible HTTP client helpers.
//
// All client logic now lives in the parent httpclient package.
// This package re-exports httpclient.Client for sources that already import
// "goyomi/internal/httpclient/flare" and is kept to avoid breaking existing code.
package flare
import (
"context"
"fmt"
"io"
"net/http"
"net/http/cookiejar"
"net/url"
"strings"
"sync"
"time"
import "goyomi/internal/httpclient"
"goyomi/internal/httpclient"
"golang.org/x/time/rate"
)
// Client is an alias for httpclient.Client.
//
// Sources that need the shared singleton should call httpclient.DefaultClient().
// Sources that need custom configuration (e.g., different rate limit) should
// call httpclient.NewClient() directly.
type Client = httpclient.Client
const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
type Client struct {
fsClient *httpclient.FlareSolverrClient
rateLimit float64
burst int
referer string
mu sync.Mutex
limiters map[string]*rate.Limiter
// NewClient creates a new httpclient.Client.
//
// Deprecated: prefer httpclient.DefaultClient() for the shared singleton or
// httpclient.NewClient(...) for sources with custom configuration.
func NewClient(opts ...httpclient.Option) *Client {
return httpclient.NewClient(opts...)
}
type Response struct {
*http.Response
Body io.ReadCloser
}
func (r *Response) Close() error {
return r.Body.Close()
}
type Option func(*Client)
// Option aliases httpclient.Option.
type Option = httpclient.Option
// WithRateLimit aliases httpclient.WithRateLimit.
func WithRateLimit(rps float64, burst int) Option {
return func(c *Client) {
c.rateLimit = rps
c.burst = burst
}
return httpclient.WithRateLimit(rps, burst)
}
func WithReferer(referer string) Option {
return func(c *Client) { c.referer = referer }
}
func WithTimeout(d time.Duration) Option {
return func(c *Client) {}
}
func NewClient(opts ...Option) *Client {
c := &Client{
limiters: make(map[string]*rate.Limiter),
}
for _, opt := range opts {
opt(c)
}
if c.rateLimit == 0 {
c.rateLimit = 1
}
if c.burst == 0 {
c.burst = 1
}
fsClient, err := httpclient.NewFlareSolverrClient()
if err == nil {
c.fsClient = fsClient
}
return c
}
func (c *Client) SetFlareSolverrClient(fs *httpclient.FlareSolverrClient) {
c.fsClient = fs
}
func (c *Client) doRequest(ctx context.Context, method string, rawURL string, body string) (*Response, error) {
if c.fsClient == nil {
return nil, fmt.Errorf("FlareSolverr client not configured")
}
c.mu.Lock()
limiter, ok := c.limiters[rawURL]
if !ok {
limiter = rate.NewLimiter(rate.Limit(c.rateLimit), c.burst)
c.limiters[rawURL] = limiter
}
c.mu.Unlock()
if err := limiter.Wait(ctx); err != nil {
return nil, err
}
var html string
var cookies []*http.Cookie
var err error
if method == http.MethodGet {
html, cookies, err = c.fsClient.Get(ctx, rawURL)
} else {
html, cookies, err = c.fsClient.Post(ctx, rawURL, body)
}
if err != nil {
return nil, err
}
jar, _ := cookiejar.New(nil)
if len(cookies) > 0 {
httpURL, _ := http.NewRequest(http.MethodGet, rawURL, nil)
jar.SetCookies(httpURL.URL, cookies)
}
fakeResp := &http.Response{
StatusCode: http.StatusOK,
Header: http.Header{},
Body: io.NopCloser(strings.NewReader(html)),
Request: &http.Request{URL: &url.URL{Path: rawURL}},
}
return &Response{Response: fakeResp, Body: fakeResp.Body}, nil
}
func (c *Client) Get(ctx context.Context, rawURL string) (*Response, error) {
return c.doRequest(ctx, http.MethodGet, rawURL, "")
}
func (c *Client) Post(ctx context.Context, rawURL string, bodyType string, body io.Reader) (*Response, error) {
bodyStr, _ := io.ReadAll(body)
return c.doRequest(ctx, http.MethodPost, rawURL, string(bodyStr))
}
func (c *Client) GetHTML(ctx context.Context, url string) (io.ReadCloser, error) {
resp, err := c.Get(ctx, url)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("HTTP %d for %s", resp.StatusCode, url)
}
return resp.Body, nil
}
func (c *Client) PostHTML(ctx context.Context, url string, body string) (io.ReadCloser, error) {
resp, err := c.Post(ctx, url, "application/x-www-form-urlencoded", strings.NewReader(body))
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("HTTP %d for %s", resp.StatusCode, url)
}
return resp.Body, nil
}
func (c *Client) GetBytes(ctx context.Context, url string) ([]byte, error) {
resp, err := c.Get(ctx, url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("HTTP %d for %s", resp.StatusCode, url)
}
return io.ReadAll(resp.Body)
}
func (c *Client) SetProxy(proxyURL string) error {
return nil
}
func (c *Client) Do(req *http.Request) (*http.Response, error) {
url := req.URL.String()
var body string
if req.Body != nil {
b, _ := io.ReadAll(req.Body)
body = string(b)
}
resp, err := c.doRequest(req.Context(), req.Method, url, body)
if err != nil {
return nil, err
}
return resp.Response, nil
}
+72 -4
View File
@@ -10,8 +10,9 @@ import (
)
type FlareSolverrClient struct {
endpoint string
client *http.Client
endpoint string
client *http.Client
sessionID string
}
func NewFlareSolverrClient() (*FlareSolverrClient, error) {
@@ -19,9 +20,14 @@ func NewFlareSolverrClient() (*FlareSolverrClient, error) {
if ep == "" {
return nil, fmt.Errorf("FLARESOLVERR_URL not set")
}
sessionID := os.Getenv("FLARESOLVERR_SESSION")
if sessionID == "" {
sessionID = "goyomi"
}
return &FlareSolverrClient{
endpoint: ep,
client: &http.Client{},
endpoint: ep,
client: &http.Client{},
sessionID: sessionID,
}, nil
}
@@ -58,6 +64,66 @@ func (f *FlareSolverrClient) Get(ctx context.Context, url string) (html string,
return f.request(ctx, "request.get", url, "", nil)
}
// GetRaw fetches a Cloudflare-protected URL via FlareSolverr with raw mode.
// Returns the raw response body, actual HTTP status code, response headers, cookies,
// and the final response URL (after redirects). Unlike Get, this does NOT render the
// page through Chrome — it returns the raw server response, making it suitable for
// JSON API endpoints behind Cloudflare.
func (f *FlareSolverrClient) GetRaw(ctx context.Context, url string) (body string, statusCode int, respHeaders map[string]any, cookies []*http.Cookie, respURL string, err error) {
type fsRequest struct {
Cmd string `json:"cmd"`
URL string `json:"url"`
PostData string `json:"postData,omitempty"`
Headers map[string]string `json:"headers,omitempty"`
MaxTimeout int `json:"maxTimeout"`
Raw bool `json:"raw,omitempty"`
Session string `json:"session,omitempty"`
}
reqData := fsRequest{
Cmd: "request.get",
URL: url,
MaxTimeout: 60000,
Raw: true,
Session: f.sessionID,
}
payload, _ := json.Marshal(reqData)
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, f.endpoint+"/v1", bytes.NewReader(payload))
if err != nil {
return "", 0, nil, nil, "", err
}
httpReq.Header.Set("Content-Type", "application/json")
resp, err := f.client.Do(httpReq)
if err != nil {
return "", 0, nil, nil, "", err
}
defer resp.Body.Close()
var fsResp FlareSolverrResponse
if err := json.NewDecoder(resp.Body).Decode(&fsResp); err != nil {
return "", 0, nil, nil, "", err
}
if fsResp.Status != "ok" {
return "", 0, nil, nil, "", fmt.Errorf("flaresolverr: status %q", fsResp.Status)
}
for _, c := range fsResp.Solution.Cookies {
cookies = append(cookies, &http.Cookie{
Name: c.Name,
Value: c.Value,
Domain: c.Domain,
Path: c.Path,
HttpOnly: c.HTTPOnly,
Secure: c.Secure,
})
}
return fsResp.Solution.Response, fsResp.Solution.Status, fsResp.Solution.Headers, cookies, fsResp.Solution.URL, nil
}
func (f *FlareSolverrClient) Post(ctx context.Context, url string, body string) (html string, cookies []*http.Cookie, err error) {
return f.request(ctx, "request.post", url, body, map[string]string{"Content-Type": "application/x-www-form-urlencoded"})
}
@@ -69,12 +135,14 @@ func (f *FlareSolverrClient) request(ctx context.Context, cmd, url, body string,
PostData string `json:"postData,omitempty"`
Headers map[string]string `json:"headers,omitempty"`
MaxTimeout int `json:"maxTimeout"`
Session string `json:"session,omitempty"`
}
req := fsRequest{
Cmd: cmd,
URL: url,
MaxTimeout: 60000,
Session: f.sessionID,
}
if body != "" {
req.PostData = body
+12
View File
@@ -39,6 +39,12 @@ func Run(t *testing.T, s source.CatalogueSource, wantName, wantLang string) {
if len(page.Mangas) == 0 {
t.Fatal("GetPopularManga returned 0 results")
}
if testing.Verbose() {
t.Logf("--- GetPopularManga (%d results) ---", len(page.Mangas))
for i, m := range page.Mangas {
t.Logf(" [%d] %-60s %s", i, m.Title, m.URL)
}
}
for i, m := range page.Mangas {
if m.Title == "" {
t.Errorf("manga[%d].Title is empty", i)
@@ -61,6 +67,12 @@ func Run(t *testing.T, s source.CatalogueSource, wantName, wantLang string) {
if len(page.Mangas) == 0 {
t.Fatal("GetLatestUpdates returned 0 results")
}
if testing.Verbose() {
t.Logf("--- GetLatestUpdates (%d results) ---", len(page.Mangas))
for i, m := range page.Mangas {
t.Logf(" [%d] %-60s %s", i, m.Title, m.URL)
}
}
for i, m := range page.Mangas {
if m.Title == "" {
t.Errorf("manga[%d].Title is empty", i)