package httpclient import ( "bytes" "context" "fmt" "io" "log" "net/http" "net/url" "strconv" "sync" "time" "github.com/sardanioss/httpcloak" "golang.org/x/time/rate" ) const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36" var ( verboseLog bool defaultOnce sync.Once defaultClient *Client ) func SetVerboseLog(enabled bool) { verboseLog = enabled } func DefaultClient() *Client { defaultOnce.Do(func() { defaultClient = newClient() }) return defaultClient } func NewClient(opts ...Option) *Client { c := newClient() for _, o := range opts { o(c) } return c } func newClient() *Client { hc := httpcloak.NewSession("chrome-latest", httpcloak.WithSessionTimeout(30*time.Second), ) c := &Client{ hc: hc, fsClient: nil, rateLimit: 1, burst: 1, userAgent: defaultUserAgent, limiters: map[string]*rate.Limiter{}, verboseLog: verboseLog, } fsClient, err := NewFlareSolverrClient() if err == nil { c.fsClient = fsClient } return c } type Client struct { hc *httpcloak.Session fsClient *FlareSolverrClient rateLimit float64 burst int userAgent string verboseLog bool mu sync.Mutex limiters map[string]*rate.Limiter } type Option func(*Client) func WithRateLimit(rps float64, burst int) Option { return func(c *Client) { c.rateLimit = rps c.burst = burst } } func WithTimeout(d time.Duration) Option { return func(c *Client) { c.hc = httpcloak.NewSession("chrome-latest", httpcloak.WithSessionTimeout(d), ) } } func WithUserAgent(ua string) Option { return func(c *Client) { c.userAgent = ua } } func WithVerboseLog(enabled bool) Option { return func(c *Client) { c.verboseLog = enabled } } func (c *Client) limiter(host string) *rate.Limiter { c.mu.Lock() defer c.mu.Unlock() l, ok := c.limiters[host] if !ok { l = rate.NewLimiter(rate.Limit(c.rateLimit), c.burst) c.limiters[host] = l } return l } // Do tries a direct request via httpcloak (Chrome TLS fingerprint) first. // httpcloak's TLS fingerprint matches Chrome, so if we already have a // cf_clearance cookie from a previous FlareSolverr solve, Cloudflare won't // challenge us. If we do get challenged (403/503), falls back to FlareSolverr. func (c *Client) Do(req *http.Request) (*http.Response, error) { if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil { return nil, err } resp, err := c.doDirect(req) var directStatus int if err == nil { directStatus = resp.StatusCode if resp.StatusCode != http.StatusForbidden && resp.StatusCode != http.StatusServiceUnavailable { return resp, nil } resp.Body.Close() } if c.fsClient == nil { if err != nil { return nil, err } return nil, fmt.Errorf("HTTP %d (challenge detected but FlareSolverr not configured)", resp.StatusCode) } return c.doFS(req, directStatus) } func (c *Client) doDirect(req *http.Request) (*http.Response, error) { if c.verboseLog { log.Printf("[httpclient] DIRECT %s %s", req.Method, req.URL.String()) } if req.Header.Get("User-Agent") == "" { req.Header.Set("User-Agent", c.userAgent) } hreq := &httpcloak.Request{ Method: req.Method, URL: req.URL.String(), Headers: req.Header, } if req.Body != nil { hreq.Body = req.Body } hresp, err := c.hc.Do(req.Context(), hreq) if err != nil { return nil, err } body, err := hresp.Bytes() if err != nil { return nil, err } if c.verboseLog { log.Printf("[httpclient] DIRECT RESPONSE %s status=%d", req.URL.String(), hresp.StatusCode) } return &http.Response{ StatusCode: hresp.StatusCode, Header: hresp.Headers, Body: io.NopCloser(bytes.NewReader(body)), ContentLength: int64(len(body)), Request: req, }, nil } func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, error) { if c.verboseLog { log.Printf("[httpclient] FS FALLBACK %s %s", req.Method, req.URL.String()) } rawURL := req.URL.String() rawBody, statusCode, fsHeaders, cookies, fsRespURL, err := c.fsClient.GetRaw(req.Context(), rawURL) if err != nil { return nil, err } respURL := rawURL if fsRespURL != "" { respURL = fsRespURL } // Feed FS cookies into the httpcloak session for subsequent direct requests if len(cookies) > 0 { if parsedRespURL, uErr := url.Parse(respURL); uErr == nil { for _, ck := range cookies { if ck.Domain == "" { ck.Domain = parsedRespURL.Host } c.hc.SetCookie(ck.Name, ck.Value) } } } // Check if FS returned challenge page instead of real content if statusCode == 200 && isCloudflareChallenge([]byte(rawBody)) { if directStatus >= 400 { statusCode = directStatus } else { return nil, fmt.Errorf("FlareSolverr returned challenge page for %s", rawURL) } } hdr := make(http.Header) if len(fsHeaders) > 0 { for k, v := range fsHeaders { switch val := v.(type) { case string: hdr.Set(k, val) case []any: for _, sv := range val { hdr.Add(k, fmt.Sprint(sv)) } } } } if len(cookies) > 0 { for _, ck := range cookies { hdr.Add("Set-Cookie", ck.String()) } } for k, v := range req.Header { if hdr.Get(k) == "" { hdr[k] = v } } body := stripFSWrapper([]byte(rawBody)) return &http.Response{ StatusCode: statusCode, Header: hdr, Body: io.NopCloser(bytes.NewReader(body)), Request: req, }, nil } func (c *Client) HTTPClient() *http.Client { return &http.Client{ Transport: hcTransport{c.hc}, Timeout: 30 * time.Second, } } // hcTransport wraps httpcloak.Session as an http.RoundTripper type hcTransport struct { hc *httpcloak.Session } func (t hcTransport) RoundTrip(req *http.Request) (*http.Response, error) { var body io.Reader if req.Body != nil { body = req.Body } hreq := &httpcloak.Request{ Method: req.Method, URL: req.URL.String(), Headers: req.Header, Body: body, } hresp, err := t.hc.Do(req.Context(), hreq) if err != nil { return nil, err } bodyBytes, err := hresp.Bytes() if err != nil { return nil, err } return &http.Response{ StatusCode: hresp.StatusCode, Header: hresp.Headers, Body: io.NopCloser(bytes.NewReader(bodyBytes)), ContentLength: int64(len(bodyBytes)), Request: req, }, nil } func (c *Client) Cookie(name, host string) string { cks := c.hc.GetCookies() if v, ok := cks[name]; ok { return v } return "" } // Get is a convenience wrapper around Do. func (c *Client) Get(ctx context.Context, urlStr string) (*http.Response, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil) if err != nil { return nil, err } return c.Do(req) } // Post is a convenience wrapper around Do. func (c *Client) Post(ctx context.Context, urlStr string, bodyType string, body io.Reader) (*http.Response, error) { req, err := http.NewRequestWithContext(ctx, http.MethodPost, urlStr, body) if err != nil { return nil, err } req.Header.Set("Content-Type", bodyType) return c.Do(req) } // isCloudflareChallenge detects whether the response body is a Cloudflare // challenge page (i.e. FS failed to solve it and Chrome rendered the challenge). func isCloudflareChallenge(body []byte) bool { indicators := []string{ "Just a moment...", "cf_chl_opt", "challenges.cloudflare.com", "/cdn-cgi/challenge-platform", "Enable JavaScript and cookies", } for _, ind := range indicators { if bytes.Contains(body, []byte(ind)) { return true } } return false } // stripFSWrapper removes FlareSolverr's Chrome HTML wrapper. func stripFSWrapper(body []byte) []byte { if !bytes.HasPrefix(bytes.TrimSpace(body), []byte("")) if preStart < 0 { return body } preEnd := bytes.LastIndex(body, []byte("")) if preEnd <= preStart { return body } return body[preStart+5 : preEnd] } func retryAfter(resp *http.Response) time.Duration { ra := resp.Header.Get("Retry-After") if ra == "" { return 5 * time.Second } if secs, err := strconv.ParseFloat(ra, 64); err == nil { return time.Duration(secs * float64(time.Second)) } if t, err := http.ParseTime(ra); err == nil { d := time.Until(t) if d > 0 { return d } } return 5 * time.Second }