package httpclient import ( "bytes" "context" "fmt" "io" "log" "net/http" "net/http/cookiejar" "net/url" "strconv" "sync" "time" "golang.org/x/time/rate" ) const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36" var ( verboseLog bool defaultOnce sync.Once defaultClient *Client ) func SetVerboseLog(enabled bool) { verboseLog = enabled } // DefaultClient returns the shared singleton HTTP client. // FlareSolverr is auto-configured if the FLARESOLVERR_URL env var is set. // All sources share the same rate limiter (+ cookie jar) through this client. func DefaultClient() *Client { defaultOnce.Do(func() { defaultClient = newClient() }) return defaultClient } // NewClient creates a standalone client with optional per-source overrides. // Only create a new client when the source needs different behaviour // (e.g. a custom rate limit); otherwise use DefaultClient. func NewClient(opts ...Option) *Client { c := newClient() for _, o := range opts { o(c) } return c } func newClient() *Client { jar, _ := cookiejar.New(nil) c := &Client{ http: &http.Client{Timeout: 30 * time.Second, Jar: jar}, rateLimit: 1, burst: 1, userAgent: defaultUserAgent, limiters: map[string]*rate.Limiter{}, verboseLog: verboseLog, } fsClient, err := NewFlareSolverrClient() if err == nil { c.fsClient = fsClient } return c } type Client struct { http *http.Client fsClient *FlareSolverrClient rateLimit float64 burst int userAgent string verboseLog bool mu sync.Mutex limiters map[string]*rate.Limiter } type Option func(*Client) func WithRateLimit(rps float64, burst int) Option { return func(c *Client) { c.rateLimit = rps c.burst = burst } } func WithTimeout(d time.Duration) Option { return func(c *Client) { c.http.Timeout = d } } func WithUserAgent(ua string) Option { return func(c *Client) { c.userAgent = ua } } func WithVerboseLog(enabled bool) Option { return func(c *Client) { c.verboseLog = enabled } } func (c *Client) limiter(host string) *rate.Limiter { c.mu.Lock() defer c.mu.Unlock() l, ok := c.limiters[host] if !ok { l = rate.NewLimiter(rate.Limit(c.rateLimit), c.burst) c.limiters[host] = l } return l } // Do tries a direct HTTP request first. If the server returns 403/503 (a // Cloudflare or DDoS challenge) and FlareSolverr is available, it falls back // to FlareSolverr raw mode to solve the challenge and return the actual body. // // When FlareSolverr is used, the Chrome HTML wrapper is stripped from the // response so that both JSON and HTML callers receive the real server output. func (c *Client) Do(req *http.Request) (*http.Response, error) { if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil { return nil, err } if req.Header.Get("User-Agent") == "" { req.Header.Set("User-Agent", c.userAgent) } // Always route through FlareSolverr when configured. Go's TLS fingerprint // doesn't match Chrome's, so Cloudflare clearance cookies from FS are // rejected by Go's net/http — meaning every direct request gets challenged // again. FS Chrome caches the clearance internally, so subsequent calls // for the same domain are near-instant. // // When FS is not configured, fall back to direct HTTP. if c.fsClient != nil { return c.doFS(req, 0) } // --- direct-first path (commented out — see TLS fingerprint issue above) --- // resp, err := c.doDirect(req) // var directStatus int // if err == nil { // directStatus = resp.StatusCode // if resp.StatusCode != http.StatusForbidden && resp.StatusCode != http.StatusServiceUnavailable { // return resp, nil // } // resp.Body.Close() // } // if c.fsClient == nil { // if err != nil { // return nil, err // } // return nil, fmt.Errorf("HTTP %d (challenge detected but FlareSolverr not configured)", resp.StatusCode) // } // return c.doFS(req, directStatus) return c.doDirect(req) } func (c *Client) doDirect(req *http.Request) (*http.Response, error) { if c.verboseLog { log.Printf("[httpclient] DIRECT %s %s", req.Method, req.URL.String()) } const maxRetries = 3 for attempt := 0; attempt <= maxRetries; attempt++ { resp, err := c.http.Do(req) if err != nil { return nil, err } if c.verboseLog { log.Printf("[httpclient] DIRECT RESPONSE %s status=%d", req.URL.String(), resp.StatusCode) } if resp.StatusCode != http.StatusTooManyRequests { return resp, nil } resp.Body.Close() if attempt == maxRetries { return resp, nil } sleep := retryAfter(resp) select { case <-req.Context().Done(): return nil, req.Context().Err() case <-time.After(sleep): } } panic("unreachable") } func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, error) { if c.verboseLog { log.Printf("[httpclient] FS FALLBACK %s %s", req.Method, req.URL.String()) } rawURL := req.URL.String() rawBody, statusCode, fsHeaders, cookies, fsRespURL, err := c.fsClient.GetRaw(req.Context(), rawURL) if err != nil { return nil, err } // Use the actual response URL from FlareSolverr (follows redirects // through Chrome) so cookies are associated with the right domain. respURL := rawURL if fsRespURL != "" { respURL = fsRespURL } // Feed FlareSolverr cookies into the shared jar so subsequent direct // requests to the same host skip the challenge. if len(cookies) > 0 { if u, uErr := url.Parse(respURL); uErr == nil { c.http.Jar.SetCookies(u, cookies) } } // When FlareSolverr returns status 200, Chrome rendered the page. // Check if the body actually contains Cloudflare challenge indicators // rather than relying on structural heuristics (
 wrapper).
	if statusCode == 200 {
		if isCloudflareChallenge([]byte(rawBody)) {
			statusCode = directStatus
		}
	}

	// Build response headers from the actual FS response headers,
	// falling back to the request headers for keys not present in the
	// FS response (e.g. Content-Type on an empty GET body).
	hdr := make(http.Header)
	if len(fsHeaders) > 0 {
		for k, v := range fsHeaders {
			switch val := v.(type) {
			case string:
				hdr.Set(k, val)
			case []any:
				for _, sv := range val {
					hdr.Add(k, fmt.Sprint(sv))
				}
			}
		}
	}
	// Ensure Set-Cookie headers from FS cookies are present even if FS
	// omitted them from the headers map.
	if len(cookies) > 0 {
		for _, ck := range cookies {
			hdr.Add("Set-Cookie", ck.String())
		}
	}
	// Copy any request headers not present in the FS response (e.g. Host).
	for k, v := range req.Header {
		if hdr.Get(k) == "" {
			hdr[k] = v
		}
	}

	body := stripFSWrapper([]byte(rawBody))

	return &http.Response{
		StatusCode: statusCode,
		Header:     hdr,
		Body:       io.NopCloser(bytes.NewReader(body)),
		Request:    req,
	}, nil
}

// HTTPClient returns the underlying *http.Client (for passing to graphql etc.).
func (c *Client) HTTPClient() *http.Client { return c.http }

// Cookie returns the value of a named cookie stored in the jar for the given
// host (e.g. "mangahub.io"). Returns empty string when the cookie is not found.
func (c *Client) Cookie(name, host string) string {
	u := &url.URL{Scheme: "https", Host: host}
	for _, ck := range c.http.Jar.Cookies(u) {
		if ck.Name == name {
			return ck.Value
		}
	}
	return ""
}

// Get is a convenience wrapper around Do. To add custom headers, build the
// request manually and call Do.
func (c *Client) Get(ctx context.Context, urlStr string) (*http.Response, error) {
	req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
	if err != nil {
		return nil, err
	}
	return c.Do(req)
}

// Post is a convenience wrapper around Do.
func (c *Client) Post(ctx context.Context, urlStr string, bodyType string, body io.Reader) (*http.Response, error) {
	req, err := http.NewRequestWithContext(ctx, http.MethodPost, urlStr, body)
	if err != nil {
		return nil, err
	}
	req.Header.Set("Content-Type", bodyType)
	return c.Do(req)
}

// isCloudflareChallenge detects whether the response body is a Cloudflare
// challenge page (i.e. FS failed to solve it and Chrome rendered the challenge).
func isCloudflareChallenge(body []byte) bool {
	indicators := []string{
		"Just a moment...",
		"cf_chl_opt",
		"challenges.cloudflare.com",
		"/cdn-cgi/challenge-platform",
		"Enable JavaScript and cookies",
	}
	for _, ind := range indicators {
		if bytes.Contains(body, []byte(ind)) {
			return true
		}
	}
	return false
}

// stripFSWrapper removes FlareSolverr's Chrome HTML wrapper.
// FlareSolverr wraps all responses in:
//
//	......
actual_body
// // If a
 tag is found inside the wrapper, its content is returned.
// Otherwise the body is returned unchanged (HTML pages rendered by Chrome).
func stripFSWrapper(body []byte) []byte {
	if !bytes.HasPrefix(bytes.TrimSpace(body), []byte(""))
	if preStart < 0 {
		return body
	}
	preEnd := bytes.LastIndex(body, []byte("
")) if preEnd <= preStart { return body } return body[preStart+5 : preEnd] } func retryAfter(resp *http.Response) time.Duration { ra := resp.Header.Get("Retry-After") if ra == "" { return 5 * time.Second } if secs, err := strconv.ParseFloat(ra, 64); err == nil { return time.Duration(secs * float64(time.Second)) } if t, err := http.ParseTime(ra); err == nil { d := time.Until(t) if d > 0 { return d } } return 5 * time.Second }