8c642905b7
- Use httpcloak.Session (Chrome JA3/JA4 fingerprint) as primary transport - Adaptive: direct request via httpcloak first; FlareSolverr fallback on 403/503 - FS cookies fed into httpcloak session so subsequent requests reuse cf_clearance (Chrome fingerprint + cookie = no re-challenge) - FlareSolverr timeout increased to 120s for slow challenges - Sanitize FS cookie values (strip quotes/newlines to avoid Go cookie warnings) - Remove go-cfscraper dependency (pure JS solver was fragile)
367 lines
8.1 KiB
Go
Executable File
367 lines
8.1 KiB
Go
Executable File
package httpclient
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"net/url"
|
|
"strconv"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/sardanioss/httpcloak"
|
|
"golang.org/x/time/rate"
|
|
)
|
|
|
|
const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
|
|
|
|
var (
|
|
verboseLog bool
|
|
defaultOnce sync.Once
|
|
defaultClient *Client
|
|
)
|
|
|
|
func SetVerboseLog(enabled bool) { verboseLog = enabled }
|
|
|
|
func DefaultClient() *Client {
|
|
defaultOnce.Do(func() {
|
|
defaultClient = newClient()
|
|
})
|
|
return defaultClient
|
|
}
|
|
|
|
func NewClient(opts ...Option) *Client {
|
|
c := newClient()
|
|
for _, o := range opts {
|
|
o(c)
|
|
}
|
|
return c
|
|
}
|
|
|
|
func newClient() *Client {
|
|
hc := httpcloak.NewSession("chrome-latest",
|
|
httpcloak.WithSessionTimeout(30*time.Second),
|
|
)
|
|
c := &Client{
|
|
hc: hc,
|
|
fsClient: nil,
|
|
rateLimit: 1,
|
|
burst: 1,
|
|
userAgent: defaultUserAgent,
|
|
limiters: map[string]*rate.Limiter{},
|
|
verboseLog: verboseLog,
|
|
}
|
|
fsClient, err := NewFlareSolverrClient()
|
|
if err == nil {
|
|
c.fsClient = fsClient
|
|
}
|
|
return c
|
|
}
|
|
|
|
type Client struct {
|
|
hc *httpcloak.Session
|
|
fsClient *FlareSolverrClient
|
|
rateLimit float64
|
|
burst int
|
|
userAgent string
|
|
verboseLog bool
|
|
|
|
mu sync.Mutex
|
|
limiters map[string]*rate.Limiter
|
|
}
|
|
|
|
type Option func(*Client)
|
|
|
|
func WithRateLimit(rps float64, burst int) Option {
|
|
return func(c *Client) {
|
|
c.rateLimit = rps
|
|
c.burst = burst
|
|
}
|
|
}
|
|
|
|
func WithTimeout(d time.Duration) Option {
|
|
return func(c *Client) { c.hc = httpcloak.NewSession("chrome-latest",
|
|
httpcloak.WithSessionTimeout(d),
|
|
) }
|
|
}
|
|
|
|
func WithUserAgent(ua string) Option {
|
|
return func(c *Client) { c.userAgent = ua }
|
|
}
|
|
|
|
func WithVerboseLog(enabled bool) Option {
|
|
return func(c *Client) { c.verboseLog = enabled }
|
|
}
|
|
|
|
func (c *Client) limiter(host string) *rate.Limiter {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
l, ok := c.limiters[host]
|
|
if !ok {
|
|
l = rate.NewLimiter(rate.Limit(c.rateLimit), c.burst)
|
|
c.limiters[host] = l
|
|
}
|
|
return l
|
|
}
|
|
|
|
// Do tries a direct request via httpcloak (Chrome TLS fingerprint) first.
|
|
// httpcloak's TLS fingerprint matches Chrome, so if we already have a
|
|
// cf_clearance cookie from a previous FlareSolverr solve, Cloudflare won't
|
|
// challenge us. If we do get challenged (403/503), falls back to FlareSolverr.
|
|
func (c *Client) Do(req *http.Request) (*http.Response, error) {
|
|
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp, err := c.doDirect(req)
|
|
var directStatus int
|
|
if err == nil {
|
|
directStatus = resp.StatusCode
|
|
if resp.StatusCode != http.StatusForbidden && resp.StatusCode != http.StatusServiceUnavailable {
|
|
return resp, nil
|
|
}
|
|
resp.Body.Close()
|
|
}
|
|
|
|
if c.fsClient == nil {
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return nil, fmt.Errorf("HTTP %d (challenge detected but FlareSolverr not configured)", resp.StatusCode)
|
|
}
|
|
|
|
return c.doFS(req, directStatus)
|
|
}
|
|
|
|
func (c *Client) doDirect(req *http.Request) (*http.Response, error) {
|
|
if c.verboseLog {
|
|
log.Printf("[httpclient] DIRECT %s %s", req.Method, req.URL.String())
|
|
}
|
|
if req.Header.Get("User-Agent") == "" {
|
|
req.Header.Set("User-Agent", c.userAgent)
|
|
}
|
|
|
|
hreq := &httpcloak.Request{
|
|
Method: req.Method,
|
|
URL: req.URL.String(),
|
|
Headers: req.Header,
|
|
}
|
|
if req.Body != nil {
|
|
hreq.Body = req.Body
|
|
}
|
|
|
|
hresp, err := c.hc.Do(req.Context(), hreq)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
body, err := hresp.Bytes()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if c.verboseLog {
|
|
log.Printf("[httpclient] DIRECT RESPONSE %s status=%d", req.URL.String(), hresp.StatusCode)
|
|
}
|
|
|
|
return &http.Response{
|
|
StatusCode: hresp.StatusCode,
|
|
Header: hresp.Headers,
|
|
Body: io.NopCloser(bytes.NewReader(body)),
|
|
ContentLength: int64(len(body)),
|
|
Request: req,
|
|
}, nil
|
|
}
|
|
|
|
func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, error) {
|
|
if c.verboseLog {
|
|
log.Printf("[httpclient] FS FALLBACK %s %s", req.Method, req.URL.String())
|
|
}
|
|
|
|
rawURL := req.URL.String()
|
|
rawBody, statusCode, fsHeaders, cookies, fsRespURL, err := c.fsClient.GetRaw(req.Context(), rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
respURL := rawURL
|
|
if fsRespURL != "" {
|
|
respURL = fsRespURL
|
|
}
|
|
|
|
// Feed FS cookies into the httpcloak session for subsequent direct requests
|
|
if len(cookies) > 0 {
|
|
if parsedRespURL, uErr := url.Parse(respURL); uErr == nil {
|
|
for _, ck := range cookies {
|
|
if ck.Domain == "" {
|
|
ck.Domain = parsedRespURL.Host
|
|
}
|
|
c.hc.SetCookie(ck.Name, ck.Value)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check if FS returned challenge page instead of real content
|
|
if statusCode == 200 && isCloudflareChallenge([]byte(rawBody)) {
|
|
if directStatus >= 400 {
|
|
statusCode = directStatus
|
|
} else {
|
|
return nil, fmt.Errorf("FlareSolverr returned challenge page for %s", rawURL)
|
|
}
|
|
}
|
|
|
|
hdr := make(http.Header)
|
|
if len(fsHeaders) > 0 {
|
|
for k, v := range fsHeaders {
|
|
switch val := v.(type) {
|
|
case string:
|
|
hdr.Set(k, val)
|
|
case []any:
|
|
for _, sv := range val {
|
|
hdr.Add(k, fmt.Sprint(sv))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if len(cookies) > 0 {
|
|
for _, ck := range cookies {
|
|
hdr.Add("Set-Cookie", ck.String())
|
|
}
|
|
}
|
|
for k, v := range req.Header {
|
|
if hdr.Get(k) == "" {
|
|
hdr[k] = v
|
|
}
|
|
}
|
|
|
|
body := stripFSWrapper([]byte(rawBody))
|
|
|
|
return &http.Response{
|
|
StatusCode: statusCode,
|
|
Header: hdr,
|
|
Body: io.NopCloser(bytes.NewReader(body)),
|
|
Request: req,
|
|
}, nil
|
|
}
|
|
|
|
func (c *Client) HTTPClient() *http.Client {
|
|
return &http.Client{
|
|
Transport: hcTransport{c.hc},
|
|
Timeout: 30 * time.Second,
|
|
}
|
|
}
|
|
|
|
// hcTransport wraps httpcloak.Session as an http.RoundTripper
|
|
type hcTransport struct {
|
|
hc *httpcloak.Session
|
|
}
|
|
|
|
func (t hcTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
|
var body io.Reader
|
|
if req.Body != nil {
|
|
body = req.Body
|
|
}
|
|
hreq := &httpcloak.Request{
|
|
Method: req.Method,
|
|
URL: req.URL.String(),
|
|
Headers: req.Header,
|
|
Body: body,
|
|
}
|
|
hresp, err := t.hc.Do(req.Context(), hreq)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
bodyBytes, err := hresp.Bytes()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &http.Response{
|
|
StatusCode: hresp.StatusCode,
|
|
Header: hresp.Headers,
|
|
Body: io.NopCloser(bytes.NewReader(bodyBytes)),
|
|
ContentLength: int64(len(bodyBytes)),
|
|
Request: req,
|
|
}, nil
|
|
}
|
|
|
|
func (c *Client) Cookie(name, host string) string {
|
|
cks := c.hc.GetCookies()
|
|
if v, ok := cks[name]; ok {
|
|
return v
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// Get is a convenience wrapper around Do.
|
|
func (c *Client) Get(ctx context.Context, urlStr string) (*http.Response, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return c.Do(req)
|
|
}
|
|
|
|
// Post is a convenience wrapper around Do.
|
|
func (c *Client) Post(ctx context.Context, urlStr string, bodyType string, body io.Reader) (*http.Response, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, urlStr, body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Content-Type", bodyType)
|
|
return c.Do(req)
|
|
}
|
|
|
|
// isCloudflareChallenge detects whether the response body is a Cloudflare
|
|
// challenge page (i.e. FS failed to solve it and Chrome rendered the challenge).
|
|
func isCloudflareChallenge(body []byte) bool {
|
|
indicators := []string{
|
|
"Just a moment...",
|
|
"cf_chl_opt",
|
|
"challenges.cloudflare.com",
|
|
"/cdn-cgi/challenge-platform",
|
|
"Enable JavaScript and cookies",
|
|
}
|
|
for _, ind := range indicators {
|
|
if bytes.Contains(body, []byte(ind)) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// stripFSWrapper removes FlareSolverr's Chrome HTML wrapper.
|
|
func stripFSWrapper(body []byte) []byte {
|
|
if !bytes.HasPrefix(bytes.TrimSpace(body), []byte("<html")) {
|
|
return body
|
|
}
|
|
preStart := bytes.Index(body, []byte("<pre>"))
|
|
if preStart < 0 {
|
|
return body
|
|
}
|
|
preEnd := bytes.LastIndex(body, []byte("</pre>"))
|
|
if preEnd <= preStart {
|
|
return body
|
|
}
|
|
return body[preStart+5 : preEnd]
|
|
}
|
|
|
|
func retryAfter(resp *http.Response) time.Duration {
|
|
ra := resp.Header.Get("Retry-After")
|
|
if ra == "" {
|
|
return 5 * time.Second
|
|
}
|
|
if secs, err := strconv.ParseFloat(ra, 64); err == nil {
|
|
return time.Duration(secs * float64(time.Second))
|
|
}
|
|
if t, err := http.ParseTime(ra); err == nil {
|
|
d := time.Until(t)
|
|
if d > 0 {
|
|
return d
|
|
}
|
|
}
|
|
return 5 * time.Second
|
|
}
|