feat(sourcetest): add -v flag with verbose manga list output
When -v is passed, test-sources.sh passes it through to go test -v. sourcetest.Run uses testing.Verbose() to print the full manga list from GetPopularManga and GetLatestUpdates, showing title + URL.
This commit is contained in:
+243
-58
@@ -1,11 +1,14 @@
|
||||
package httpclient
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -15,17 +18,58 @@ import (
|
||||
|
||||
const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
|
||||
|
||||
var verboseLog bool
|
||||
var (
|
||||
verboseLog bool
|
||||
defaultOnce sync.Once
|
||||
defaultClient *Client
|
||||
)
|
||||
|
||||
func SetVerboseLog(enabled bool) {
|
||||
verboseLog = enabled
|
||||
func SetVerboseLog(enabled bool) { verboseLog = enabled }
|
||||
|
||||
// DefaultClient returns the shared singleton HTTP client.
|
||||
// FlareSolverr is auto-configured if the FLARESOLVERR_URL env var is set.
|
||||
// All sources share the same rate limiter (+ cookie jar) through this client.
|
||||
func DefaultClient() *Client {
|
||||
defaultOnce.Do(func() {
|
||||
defaultClient = newClient()
|
||||
})
|
||||
return defaultClient
|
||||
}
|
||||
|
||||
// NewClient creates a standalone client with optional per-source overrides.
|
||||
// Only create a new client when the source needs different behaviour
|
||||
// (e.g. a custom rate limit); otherwise use DefaultClient.
|
||||
func NewClient(opts ...Option) *Client {
|
||||
c := newClient()
|
||||
for _, o := range opts {
|
||||
o(c)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func newClient() *Client {
|
||||
jar, _ := cookiejar.New(nil)
|
||||
c := &Client{
|
||||
http: &http.Client{Timeout: 30 * time.Second, Jar: jar},
|
||||
rateLimit: 1,
|
||||
burst: 1,
|
||||
userAgent: defaultUserAgent,
|
||||
limiters: map[string]*rate.Limiter{},
|
||||
verboseLog: verboseLog,
|
||||
}
|
||||
fsClient, err := NewFlareSolverrClient()
|
||||
if err == nil {
|
||||
c.fsClient = fsClient
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
type Client struct {
|
||||
http *http.Client
|
||||
fsClient *FlareSolverrClient
|
||||
rateLimit float64
|
||||
burst int
|
||||
referer string
|
||||
userAgent string
|
||||
verboseLog bool
|
||||
|
||||
mu sync.Mutex
|
||||
@@ -45,29 +89,14 @@ func WithTimeout(d time.Duration) Option {
|
||||
return func(c *Client) { c.http.Timeout = d }
|
||||
}
|
||||
|
||||
func WithReferer(referer string) Option {
|
||||
return func(c *Client) { c.referer = referer }
|
||||
func WithUserAgent(ua string) Option {
|
||||
return func(c *Client) { c.userAgent = ua }
|
||||
}
|
||||
|
||||
func WithVerboseLog(enabled bool) Option {
|
||||
return func(c *Client) { c.verboseLog = enabled }
|
||||
}
|
||||
|
||||
func NewClient(opts ...Option) *Client {
|
||||
jar, _ := cookiejar.New(nil)
|
||||
c := &Client{
|
||||
http: &http.Client{Timeout: 30 * time.Second, Jar: jar},
|
||||
rateLimit: 1,
|
||||
burst: 1,
|
||||
limiters: map[string]*rate.Limiter{},
|
||||
verboseLog: verboseLog,
|
||||
}
|
||||
for _, o := range opts {
|
||||
o(c)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Client) limiter(host string) *rate.Limiter {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
@@ -79,23 +108,55 @@ func (c *Client) limiter(host string) *rate.Limiter {
|
||||
return l
|
||||
}
|
||||
|
||||
// Do tries a direct HTTP request first. If the server returns 403/503 (a
|
||||
// Cloudflare or DDoS challenge) and FlareSolverr is available, it falls back
|
||||
// to FlareSolverr raw mode to solve the challenge and return the actual body.
|
||||
//
|
||||
// When FlareSolverr is used, the Chrome HTML wrapper is stripped from the
|
||||
// response so that both JSON and HTML callers receive the real server output.
|
||||
func (c *Client) Do(req *http.Request) (*http.Response, error) {
|
||||
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if req.Header.Get("User-Agent") == "" {
|
||||
req.Header.Set("User-Agent", c.userAgent)
|
||||
}
|
||||
|
||||
// Always route through FlareSolverr when configured. Go's TLS fingerprint
|
||||
// doesn't match Chrome's, so Cloudflare clearance cookies from FS are
|
||||
// rejected by Go's net/http — meaning every direct request gets challenged
|
||||
// again. FS Chrome caches the clearance internally, so subsequent calls
|
||||
// for the same domain are near-instant.
|
||||
//
|
||||
// When FS is not configured, fall back to direct HTTP.
|
||||
if c.fsClient != nil {
|
||||
return c.doFS(req, 0)
|
||||
}
|
||||
|
||||
// --- direct-first path (commented out — see TLS fingerprint issue above) ---
|
||||
// resp, err := c.doDirect(req)
|
||||
// var directStatus int
|
||||
// if err == nil {
|
||||
// directStatus = resp.StatusCode
|
||||
// if resp.StatusCode != http.StatusForbidden && resp.StatusCode != http.StatusServiceUnavailable {
|
||||
// return resp, nil
|
||||
// }
|
||||
// resp.Body.Close()
|
||||
// }
|
||||
// if c.fsClient == nil {
|
||||
// if err != nil {
|
||||
// return nil, err
|
||||
// }
|
||||
// return nil, fmt.Errorf("HTTP %d (challenge detected but FlareSolverr not configured)", resp.StatusCode)
|
||||
// }
|
||||
// return c.doFS(req, directStatus)
|
||||
|
||||
return c.doDirect(req)
|
||||
}
|
||||
|
||||
func (c *Client) doDirect(req *http.Request) (*http.Response, error) {
|
||||
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if c.referer != "" && req.Header.Get("Referer") == "" {
|
||||
req.Header.Set("Referer", c.referer)
|
||||
}
|
||||
if req.Header.Get("User-Agent") == "" {
|
||||
req.Header.Set("User-Agent", defaultUserAgent)
|
||||
}
|
||||
|
||||
if c.verboseLog {
|
||||
log.Printf("[httpclient] DIRECT GET %s", req.URL.String())
|
||||
log.Printf("[httpclient] DIRECT %s %s", req.Method, req.URL.String())
|
||||
}
|
||||
|
||||
const maxRetries = 3
|
||||
@@ -124,6 +185,156 @@ func (c *Client) doDirect(req *http.Request) (*http.Response, error) {
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, error) {
|
||||
if c.verboseLog {
|
||||
log.Printf("[httpclient] FS FALLBACK %s %s", req.Method, req.URL.String())
|
||||
}
|
||||
|
||||
rawURL := req.URL.String()
|
||||
rawBody, statusCode, fsHeaders, cookies, fsRespURL, err := c.fsClient.GetRaw(req.Context(), rawURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Use the actual response URL from FlareSolverr (follows redirects
|
||||
// through Chrome) so cookies are associated with the right domain.
|
||||
respURL := rawURL
|
||||
if fsRespURL != "" {
|
||||
respURL = fsRespURL
|
||||
}
|
||||
|
||||
// Feed FlareSolverr cookies into the shared jar so subsequent direct
|
||||
// requests to the same host skip the challenge.
|
||||
if len(cookies) > 0 {
|
||||
if u, uErr := url.Parse(respURL); uErr == nil {
|
||||
c.http.Jar.SetCookies(u, cookies)
|
||||
}
|
||||
}
|
||||
|
||||
// When FlareSolverr returns status 200, Chrome rendered the page.
|
||||
// Check if the body actually contains Cloudflare challenge indicators
|
||||
// rather than relying on structural heuristics (<pre> wrapper).
|
||||
if statusCode == 200 {
|
||||
if isCloudflareChallenge([]byte(rawBody)) {
|
||||
statusCode = directStatus
|
||||
}
|
||||
}
|
||||
|
||||
// Build response headers from the actual FS response headers,
|
||||
// falling back to the request headers for keys not present in the
|
||||
// FS response (e.g. Content-Type on an empty GET body).
|
||||
hdr := make(http.Header)
|
||||
if len(fsHeaders) > 0 {
|
||||
for k, v := range fsHeaders {
|
||||
switch val := v.(type) {
|
||||
case string:
|
||||
hdr.Set(k, val)
|
||||
case []any:
|
||||
for _, sv := range val {
|
||||
hdr.Add(k, fmt.Sprint(sv))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Ensure Set-Cookie headers from FS cookies are present even if FS
|
||||
// omitted them from the headers map.
|
||||
if len(cookies) > 0 {
|
||||
for _, ck := range cookies {
|
||||
hdr.Add("Set-Cookie", ck.String())
|
||||
}
|
||||
}
|
||||
// Copy any request headers not present in the FS response (e.g. Host).
|
||||
for k, v := range req.Header {
|
||||
if hdr.Get(k) == "" {
|
||||
hdr[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
body := stripFSWrapper([]byte(rawBody))
|
||||
|
||||
return &http.Response{
|
||||
StatusCode: statusCode,
|
||||
Header: hdr,
|
||||
Body: io.NopCloser(bytes.NewReader(body)),
|
||||
Request: req,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// HTTPClient returns the underlying *http.Client (for passing to graphql etc.).
|
||||
func (c *Client) HTTPClient() *http.Client { return c.http }
|
||||
|
||||
// Cookie returns the value of a named cookie stored in the jar for the given
|
||||
// host (e.g. "mangahub.io"). Returns empty string when the cookie is not found.
|
||||
func (c *Client) Cookie(name, host string) string {
|
||||
u := &url.URL{Scheme: "https", Host: host}
|
||||
for _, ck := range c.http.Jar.Cookies(u) {
|
||||
if ck.Name == name {
|
||||
return ck.Value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Get is a convenience wrapper around Do. To add custom headers, build the
|
||||
// request manually and call Do.
|
||||
func (c *Client) Get(ctx context.Context, urlStr string) (*http.Response, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return c.Do(req)
|
||||
}
|
||||
|
||||
// Post is a convenience wrapper around Do.
|
||||
func (c *Client) Post(ctx context.Context, urlStr string, bodyType string, body io.Reader) (*http.Response, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, urlStr, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", bodyType)
|
||||
return c.Do(req)
|
||||
}
|
||||
|
||||
// isCloudflareChallenge detects whether the response body is a Cloudflare
|
||||
// challenge page (i.e. FS failed to solve it and Chrome rendered the challenge).
|
||||
func isCloudflareChallenge(body []byte) bool {
|
||||
indicators := []string{
|
||||
"Just a moment...",
|
||||
"cf_chl_opt",
|
||||
"challenges.cloudflare.com",
|
||||
"/cdn-cgi/challenge-platform",
|
||||
"Enable JavaScript and cookies",
|
||||
}
|
||||
for _, ind := range indicators {
|
||||
if bytes.Contains(body, []byte(ind)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// stripFSWrapper removes FlareSolverr's Chrome HTML wrapper.
|
||||
// FlareSolverr wraps all responses in:
|
||||
//
|
||||
// <html><head>...<meta charset...>...</head><body><pre>actual_body</pre></body></html>
|
||||
//
|
||||
// If a <pre> tag is found inside the wrapper, its content is returned.
|
||||
// Otherwise the body is returned unchanged (HTML pages rendered by Chrome).
|
||||
func stripFSWrapper(body []byte) []byte {
|
||||
if !bytes.HasPrefix(bytes.TrimSpace(body), []byte("<html")) {
|
||||
return body
|
||||
}
|
||||
preStart := bytes.Index(body, []byte("<pre>"))
|
||||
if preStart < 0 {
|
||||
return body
|
||||
}
|
||||
preEnd := bytes.LastIndex(body, []byte("</pre>"))
|
||||
if preEnd <= preStart {
|
||||
return body
|
||||
}
|
||||
return body[preStart+5 : preEnd]
|
||||
}
|
||||
|
||||
func retryAfter(resp *http.Response) time.Duration {
|
||||
ra := resp.Header.Get("Retry-After")
|
||||
if ra == "" {
|
||||
@@ -140,29 +351,3 @@ func retryAfter(resp *http.Response) time.Duration {
|
||||
}
|
||||
return 5 * time.Second
|
||||
}
|
||||
|
||||
func (c *Client) Get(ctx context.Context, url string, headers map[string]string) (*http.Response, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
return c.Do(req)
|
||||
}
|
||||
|
||||
func (c *Client) Post(ctx context.Context, url string, body io.Reader, contentType string, headers map[string]string) (*http.Response, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", contentType)
|
||||
for k, v := range headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
return c.Do(req)
|
||||
}
|
||||
|
||||
// HTTPClient returns the underlying *http.Client (for passing to graphql helper etc.)
|
||||
func (c *Client) HTTPClient() *http.Client { return c.http }
|
||||
|
||||
Reference in New Issue
Block a user