feat(sourcetest): add -v flag with verbose manga list output

When -v is passed, test-sources.sh passes it through to go test -v.
sourcetest.Run uses testing.Verbose() to print the full manga list
from GetPopularManga and GetLatestUpdates, showing title + URL.
This commit is contained in:
achmad
2026-05-14 13:23:29 +07:00
parent 26063e097b
commit 44b50937d5
7 changed files with 371 additions and 245 deletions
+243 -58
View File
@@ -1,11 +1,14 @@
package httpclient
import (
"bytes"
"context"
"fmt"
"io"
"log"
"net/http"
"net/http/cookiejar"
"net/url"
"strconv"
"sync"
"time"
@@ -15,17 +18,58 @@ import (
const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
var verboseLog bool
var (
verboseLog bool
defaultOnce sync.Once
defaultClient *Client
)
func SetVerboseLog(enabled bool) {
verboseLog = enabled
func SetVerboseLog(enabled bool) { verboseLog = enabled }
// DefaultClient returns the shared singleton HTTP client.
// FlareSolverr is auto-configured if the FLARESOLVERR_URL env var is set.
// All sources share the same rate limiter (+ cookie jar) through this client.
func DefaultClient() *Client {
defaultOnce.Do(func() {
defaultClient = newClient()
})
return defaultClient
}
// NewClient creates a standalone client with optional per-source overrides.
// Only create a new client when the source needs different behaviour
// (e.g. a custom rate limit); otherwise use DefaultClient.
func NewClient(opts ...Option) *Client {
c := newClient()
for _, o := range opts {
o(c)
}
return c
}
func newClient() *Client {
jar, _ := cookiejar.New(nil)
c := &Client{
http: &http.Client{Timeout: 30 * time.Second, Jar: jar},
rateLimit: 1,
burst: 1,
userAgent: defaultUserAgent,
limiters: map[string]*rate.Limiter{},
verboseLog: verboseLog,
}
fsClient, err := NewFlareSolverrClient()
if err == nil {
c.fsClient = fsClient
}
return c
}
type Client struct {
http *http.Client
fsClient *FlareSolverrClient
rateLimit float64
burst int
referer string
userAgent string
verboseLog bool
mu sync.Mutex
@@ -45,29 +89,14 @@ func WithTimeout(d time.Duration) Option {
return func(c *Client) { c.http.Timeout = d }
}
func WithReferer(referer string) Option {
return func(c *Client) { c.referer = referer }
func WithUserAgent(ua string) Option {
return func(c *Client) { c.userAgent = ua }
}
func WithVerboseLog(enabled bool) Option {
return func(c *Client) { c.verboseLog = enabled }
}
func NewClient(opts ...Option) *Client {
jar, _ := cookiejar.New(nil)
c := &Client{
http: &http.Client{Timeout: 30 * time.Second, Jar: jar},
rateLimit: 1,
burst: 1,
limiters: map[string]*rate.Limiter{},
verboseLog: verboseLog,
}
for _, o := range opts {
o(c)
}
return c
}
func (c *Client) limiter(host string) *rate.Limiter {
c.mu.Lock()
defer c.mu.Unlock()
@@ -79,23 +108,55 @@ func (c *Client) limiter(host string) *rate.Limiter {
return l
}
// Do tries a direct HTTP request first. If the server returns 403/503 (a
// Cloudflare or DDoS challenge) and FlareSolverr is available, it falls back
// to FlareSolverr raw mode to solve the challenge and return the actual body.
//
// When FlareSolverr is used, the Chrome HTML wrapper is stripped from the
// response so that both JSON and HTML callers receive the real server output.
func (c *Client) Do(req *http.Request) (*http.Response, error) {
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
return nil, err
}
if req.Header.Get("User-Agent") == "" {
req.Header.Set("User-Agent", c.userAgent)
}
// Always route through FlareSolverr when configured. Go's TLS fingerprint
// doesn't match Chrome's, so Cloudflare clearance cookies from FS are
// rejected by Go's net/http — meaning every direct request gets challenged
// again. FS Chrome caches the clearance internally, so subsequent calls
// for the same domain are near-instant.
//
// When FS is not configured, fall back to direct HTTP.
if c.fsClient != nil {
return c.doFS(req, 0)
}
// --- direct-first path (commented out — see TLS fingerprint issue above) ---
// resp, err := c.doDirect(req)
// var directStatus int
// if err == nil {
// directStatus = resp.StatusCode
// if resp.StatusCode != http.StatusForbidden && resp.StatusCode != http.StatusServiceUnavailable {
// return resp, nil
// }
// resp.Body.Close()
// }
// if c.fsClient == nil {
// if err != nil {
// return nil, err
// }
// return nil, fmt.Errorf("HTTP %d (challenge detected but FlareSolverr not configured)", resp.StatusCode)
// }
// return c.doFS(req, directStatus)
return c.doDirect(req)
}
func (c *Client) doDirect(req *http.Request) (*http.Response, error) {
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
return nil, err
}
if c.referer != "" && req.Header.Get("Referer") == "" {
req.Header.Set("Referer", c.referer)
}
if req.Header.Get("User-Agent") == "" {
req.Header.Set("User-Agent", defaultUserAgent)
}
if c.verboseLog {
log.Printf("[httpclient] DIRECT GET %s", req.URL.String())
log.Printf("[httpclient] DIRECT %s %s", req.Method, req.URL.String())
}
const maxRetries = 3
@@ -124,6 +185,156 @@ func (c *Client) doDirect(req *http.Request) (*http.Response, error) {
panic("unreachable")
}
func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, error) {
if c.verboseLog {
log.Printf("[httpclient] FS FALLBACK %s %s", req.Method, req.URL.String())
}
rawURL := req.URL.String()
rawBody, statusCode, fsHeaders, cookies, fsRespURL, err := c.fsClient.GetRaw(req.Context(), rawURL)
if err != nil {
return nil, err
}
// Use the actual response URL from FlareSolverr (follows redirects
// through Chrome) so cookies are associated with the right domain.
respURL := rawURL
if fsRespURL != "" {
respURL = fsRespURL
}
// Feed FlareSolverr cookies into the shared jar so subsequent direct
// requests to the same host skip the challenge.
if len(cookies) > 0 {
if u, uErr := url.Parse(respURL); uErr == nil {
c.http.Jar.SetCookies(u, cookies)
}
}
// When FlareSolverr returns status 200, Chrome rendered the page.
// Check if the body actually contains Cloudflare challenge indicators
// rather than relying on structural heuristics (<pre> wrapper).
if statusCode == 200 {
if isCloudflareChallenge([]byte(rawBody)) {
statusCode = directStatus
}
}
// Build response headers from the actual FS response headers,
// falling back to the request headers for keys not present in the
// FS response (e.g. Content-Type on an empty GET body).
hdr := make(http.Header)
if len(fsHeaders) > 0 {
for k, v := range fsHeaders {
switch val := v.(type) {
case string:
hdr.Set(k, val)
case []any:
for _, sv := range val {
hdr.Add(k, fmt.Sprint(sv))
}
}
}
}
// Ensure Set-Cookie headers from FS cookies are present even if FS
// omitted them from the headers map.
if len(cookies) > 0 {
for _, ck := range cookies {
hdr.Add("Set-Cookie", ck.String())
}
}
// Copy any request headers not present in the FS response (e.g. Host).
for k, v := range req.Header {
if hdr.Get(k) == "" {
hdr[k] = v
}
}
body := stripFSWrapper([]byte(rawBody))
return &http.Response{
StatusCode: statusCode,
Header: hdr,
Body: io.NopCloser(bytes.NewReader(body)),
Request: req,
}, nil
}
// HTTPClient returns the underlying *http.Client (for passing to graphql etc.).
func (c *Client) HTTPClient() *http.Client { return c.http }
// Cookie returns the value of a named cookie stored in the jar for the given
// host (e.g. "mangahub.io"). Returns empty string when the cookie is not found.
func (c *Client) Cookie(name, host string) string {
u := &url.URL{Scheme: "https", Host: host}
for _, ck := range c.http.Jar.Cookies(u) {
if ck.Name == name {
return ck.Value
}
}
return ""
}
// Get is a convenience wrapper around Do. To add custom headers, build the
// request manually and call Do.
func (c *Client) Get(ctx context.Context, urlStr string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
if err != nil {
return nil, err
}
return c.Do(req)
}
// Post is a convenience wrapper around Do.
func (c *Client) Post(ctx context.Context, urlStr string, bodyType string, body io.Reader) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, urlStr, body)
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", bodyType)
return c.Do(req)
}
// isCloudflareChallenge detects whether the response body is a Cloudflare
// challenge page (i.e. FS failed to solve it and Chrome rendered the challenge).
func isCloudflareChallenge(body []byte) bool {
indicators := []string{
"Just a moment...",
"cf_chl_opt",
"challenges.cloudflare.com",
"/cdn-cgi/challenge-platform",
"Enable JavaScript and cookies",
}
for _, ind := range indicators {
if bytes.Contains(body, []byte(ind)) {
return true
}
}
return false
}
// stripFSWrapper removes FlareSolverr's Chrome HTML wrapper.
// FlareSolverr wraps all responses in:
//
// <html><head>...<meta charset...>...</head><body><pre>actual_body</pre></body></html>
//
// If a <pre> tag is found inside the wrapper, its content is returned.
// Otherwise the body is returned unchanged (HTML pages rendered by Chrome).
func stripFSWrapper(body []byte) []byte {
if !bytes.HasPrefix(bytes.TrimSpace(body), []byte("<html")) {
return body
}
preStart := bytes.Index(body, []byte("<pre>"))
if preStart < 0 {
return body
}
preEnd := bytes.LastIndex(body, []byte("</pre>"))
if preEnd <= preStart {
return body
}
return body[preStart+5 : preEnd]
}
func retryAfter(resp *http.Response) time.Duration {
ra := resp.Header.Get("Retry-After")
if ra == "" {
@@ -140,29 +351,3 @@ func retryAfter(resp *http.Response) time.Duration {
}
return 5 * time.Second
}
func (c *Client) Get(ctx context.Context, url string, headers map[string]string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
for k, v := range headers {
req.Header.Set(k, v)
}
return c.Do(req)
}
func (c *Client) Post(ctx context.Context, url string, body io.Reader, contentType string, headers map[string]string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, body)
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", contentType)
for k, v := range headers {
req.Header.Set(k, v)
}
return c.Do(req)
}
// HTTPClient returns the underlying *http.Client (for passing to graphql helper etc.)
func (c *Client) HTTPClient() *http.Client { return c.http }