package httpclient import ( "bytes" "context" "fmt" "io" "log" "net/http" "net/http/cookiejar" "net/url" "strconv" "sync" "time" "golang.org/x/time/rate" ) const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36" var ( verboseLog bool defaultOnce sync.Once defaultClient *Client ) func SetVerboseLog(enabled bool) { verboseLog = enabled } // DefaultClient returns the shared singleton HTTP client. // FlareSolverr is auto-configured if the FLARESOLVERR_URL env var is set. // All sources share the same rate limiter (+ cookie jar) through this client. func DefaultClient() *Client { defaultOnce.Do(func() { defaultClient = newClient() }) return defaultClient } // NewClient creates a standalone client with optional per-source overrides. // Only create a new client when the source needs different behaviour // (e.g. a custom rate limit); otherwise use DefaultClient. func NewClient(opts ...Option) *Client { c := newClient() for _, o := range opts { o(c) } return c } func newClient() *Client { jar, _ := cookiejar.New(nil) c := &Client{ http: &http.Client{Timeout: 30 * time.Second, Jar: jar}, rateLimit: 1, burst: 1, userAgent: defaultUserAgent, limiters: map[string]*rate.Limiter{}, verboseLog: verboseLog, } fsClient, err := NewFlareSolverrClient() if err == nil { c.fsClient = fsClient } return c } type Client struct { http *http.Client fsClient *FlareSolverrClient rateLimit float64 burst int userAgent string verboseLog bool mu sync.Mutex limiters map[string]*rate.Limiter } type Option func(*Client) func WithRateLimit(rps float64, burst int) Option { return func(c *Client) { c.rateLimit = rps c.burst = burst } } func WithTimeout(d time.Duration) Option { return func(c *Client) { c.http.Timeout = d } } func WithUserAgent(ua string) Option { return func(c *Client) { c.userAgent = ua } } func WithVerboseLog(enabled bool) Option { return func(c *Client) { c.verboseLog = enabled } } func (c *Client) limiter(host string) *rate.Limiter { c.mu.Lock() defer c.mu.Unlock() l, ok := c.limiters[host] if !ok { l = rate.NewLimiter(rate.Limit(c.rateLimit), c.burst) c.limiters[host] = l } return l } // Do tries a direct HTTP request first. If the server returns 403/503 (a // Cloudflare or DDoS challenge) and FlareSolverr is available, it falls back // to FlareSolverr raw mode to solve the challenge and return the actual body. // // When FlareSolverr is used, the Chrome HTML wrapper is stripped from the // response so that both JSON and HTML callers receive the real server output. func (c *Client) Do(req *http.Request) (*http.Response, error) { if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil { return nil, err } if req.Header.Get("User-Agent") == "" { req.Header.Set("User-Agent", c.userAgent) } // Always route through FlareSolverr when configured. Go's TLS fingerprint // doesn't match Chrome's, so Cloudflare clearance cookies from FS are // rejected by Go's net/http — meaning every direct request gets challenged // again. FS Chrome caches the clearance internally, so subsequent calls // for the same domain are near-instant. // // When FS is not configured, fall back to direct HTTP. if c.fsClient != nil { return c.doFS(req, 0) } // --- direct-first path (commented out — see TLS fingerprint issue above) --- // resp, err := c.doDirect(req) // var directStatus int // if err == nil { // directStatus = resp.StatusCode // if resp.StatusCode != http.StatusForbidden && resp.StatusCode != http.StatusServiceUnavailable { // return resp, nil // } // resp.Body.Close() // } // if c.fsClient == nil { // if err != nil { // return nil, err // } // return nil, fmt.Errorf("HTTP %d (challenge detected but FlareSolverr not configured)", resp.StatusCode) // } // return c.doFS(req, directStatus) return c.doDirect(req) } func (c *Client) doDirect(req *http.Request) (*http.Response, error) { if c.verboseLog { log.Printf("[httpclient] DIRECT %s %s", req.Method, req.URL.String()) } const maxRetries = 3 for attempt := 0; attempt <= maxRetries; attempt++ { resp, err := c.http.Do(req) if err != nil { return nil, err } if c.verboseLog { log.Printf("[httpclient] DIRECT RESPONSE %s status=%d", req.URL.String(), resp.StatusCode) } if resp.StatusCode != http.StatusTooManyRequests { return resp, nil } resp.Body.Close() if attempt == maxRetries { return resp, nil } sleep := retryAfter(resp) select { case <-req.Context().Done(): return nil, req.Context().Err() case <-time.After(sleep): } } panic("unreachable") } func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, error) { if c.verboseLog { log.Printf("[httpclient] FS FALLBACK %s %s", req.Method, req.URL.String()) } rawURL := req.URL.String() rawBody, statusCode, fsHeaders, cookies, fsRespURL, err := c.fsClient.GetRaw(req.Context(), rawURL) if err != nil { return nil, err } // Use the actual response URL from FlareSolverr (follows redirects // through Chrome) so cookies are associated with the right domain. respURL := rawURL if fsRespURL != "" { respURL = fsRespURL } // Feed FlareSolverr cookies into the shared jar so subsequent direct // requests to the same host skip the challenge. if len(cookies) > 0 { if u, uErr := url.Parse(respURL); uErr == nil { c.http.Jar.SetCookies(u, cookies) } } // If FS returned the challenge page instead of the real content, // reject it (HTTP 0 case when directStatus=0). if statusCode == 200 && isCloudflareChallenge([]byte(rawBody)) { if directStatus >= 400 { statusCode = directStatus } else { return nil, fmt.Errorf("FlareSolverr returned challenge page for %s", rawURL) } } // Build response headers from the actual FS response headers, // falling back to the request headers for keys not present in the // FS response (e.g. Content-Type on an empty GET body). hdr := make(http.Header) if len(fsHeaders) > 0 { for k, v := range fsHeaders { switch val := v.(type) { case string: hdr.Set(k, val) case []any: for _, sv := range val { hdr.Add(k, fmt.Sprint(sv)) } } } } // Ensure Set-Cookie headers from FS cookies are present even if FS // omitted them from the headers map. if len(cookies) > 0 { for _, ck := range cookies { hdr.Add("Set-Cookie", ck.String()) } } // Copy any request headers not present in the FS response (e.g. Host). for k, v := range req.Header { if hdr.Get(k) == "" { hdr[k] = v } } body := stripFSWrapper([]byte(rawBody)) return &http.Response{ StatusCode: statusCode, Header: hdr, Body: io.NopCloser(bytes.NewReader(body)), Request: req, }, nil } // HTTPClient returns the underlying *http.Client (for passing to graphql etc.). func (c *Client) HTTPClient() *http.Client { return c.http } // Cookie returns the value of a named cookie stored in the jar for the given // host (e.g. "mangahub.io"). Returns empty string when the cookie is not found. func (c *Client) Cookie(name, host string) string { u := &url.URL{Scheme: "https", Host: host} for _, ck := range c.http.Jar.Cookies(u) { if ck.Name == name { return ck.Value } } return "" } // Get is a convenience wrapper around Do. To add custom headers, build the // request manually and call Do. func (c *Client) Get(ctx context.Context, urlStr string) (*http.Response, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil) if err != nil { return nil, err } return c.Do(req) } // Post is a convenience wrapper around Do. func (c *Client) Post(ctx context.Context, urlStr string, bodyType string, body io.Reader) (*http.Response, error) { req, err := http.NewRequestWithContext(ctx, http.MethodPost, urlStr, body) if err != nil { return nil, err } req.Header.Set("Content-Type", bodyType) return c.Do(req) } // isCloudflareChallenge detects whether the response body is a Cloudflare // challenge page (i.e. FS failed to solve it and Chrome rendered the challenge). func isCloudflareChallenge(body []byte) bool { indicators := []string{ "Just a moment...", "cf_chl_opt", "challenges.cloudflare.com", "/cdn-cgi/challenge-platform", "Enable JavaScript and cookies", } for _, ind := range indicators { if bytes.Contains(body, []byte(ind)) { return true } } return false } // stripFSWrapper removes FlareSolverr's Chrome HTML wrapper. // FlareSolverr wraps all responses in: // //
......actual_body// // If a
tag is found inside the wrapper, its content is returned.
// Otherwise the body is returned unchanged (HTML pages rendered by Chrome).
func stripFSWrapper(body []byte) []byte {
if !bytes.HasPrefix(bytes.TrimSpace(body), []byte(""))
if preStart < 0 {
return body
}
preEnd := bytes.LastIndex(body, []byte(""))
if preEnd <= preStart {
return body
}
return body[preStart+5 : preEnd]
}
func retryAfter(resp *http.Response) time.Duration {
ra := resp.Header.Get("Retry-After")
if ra == "" {
return 5 * time.Second
}
if secs, err := strconv.ParseFloat(ra, 64); err == nil {
return time.Duration(secs * float64(time.Second))
}
if t, err := http.ParseTime(ra); err == nil {
d := time.Until(t)
if d > 0 {
return d
}
}
return 5 * time.Second
}