feat: replace net/http with httpcloak for Chrome TLS fingerprint

- Use httpcloak.Session (Chrome JA3/JA4 fingerprint) as primary transport
- Adaptive: direct request via httpcloak first; FlareSolverr fallback on 403/503
- FS cookies fed into httpcloak session so subsequent requests reuse
  cf_clearance (Chrome fingerprint + cookie = no re-challenge)
- FlareSolverr timeout increased to 120s for slow challenges
- Sanitize FS cookie values (strip quotes/newlines to avoid Go cookie warnings)
- Remove go-cfscraper dependency (pure JS solver was fragile)
This commit is contained in:
achmad
2026-05-14 22:31:09 +07:00
parent bfa66d8102
commit 8c642905b7
4 changed files with 160 additions and 88 deletions
+18
View File
@@ -10,13 +10,31 @@ require (
)
require (
github.com/andybalholm/brotli v1.2.0 // indirect
github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/dlclark/regexp2 v1.11.4 // indirect
github.com/dop251/goja v0.0.0-20251008123653-cf18d89f3cf6 // indirect
github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
github.com/google/pprof v0.0.0-20230207041349-798e818bf904 // indirect
github.com/jackc/pgerrcode v0.0.0-20220416144525-469b46aa5efa // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/jackc/puddle/v2 v2.2.2 // indirect
github.com/klauspost/compress v1.18.2 // indirect
github.com/miekg/dns v1.1.69 // indirect
github.com/sardanioss/http v1.2.0 // indirect
github.com/sardanioss/httpcloak v1.6.0 // indirect
github.com/sardanioss/net v1.2.1 // indirect
github.com/sardanioss/qpack v0.6.2 // indirect
github.com/sardanioss/quic-go v1.2.18 // indirect
github.com/sardanioss/udpbara v1.1.0 // indirect
github.com/sardanioss/utls v1.10.2 // indirect
github.com/sriharsha-y/go-cfscraper v1.0.0 // indirect
golang.org/x/crypto v0.51.0 // indirect
golang.org/x/mod v0.35.0 // indirect
golang.org/x/net v0.53.0 // indirect
golang.org/x/sync v0.20.0 // indirect
golang.org/x/sys v0.44.0 // indirect
golang.org/x/text v0.37.0 // indirect
golang.org/x/tools v0.44.0 // indirect
)
+35
View File
@@ -4,6 +4,8 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/PuerkitoBio/goquery v1.12.0 h1:pAcL4g3WRXekcB9AU/y1mbKez2dbY2AajVhtkO8RIBo=
github.com/PuerkitoBio/goquery v1.12.0/go.mod h1:802ej+gV2y7bbIhOIoPY5sT183ZW0YFofScC4q/hIpQ=
github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
@@ -17,23 +19,31 @@ github.com/dhui/dktest v0.4.6 h1:+DPKyScKSEp3VLtbMDHcUq6V5Lm5zfZZVb0Sk7Ahom4=
github.com/dhui/dktest v0.4.6/go.mod h1:JHTSYDtKkvFNFHJKqCzVzqXecyv+tKt8EzceOmQOgbU=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI=
github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/dop251/goja v0.0.0-20251008123653-cf18d89f3cf6 h1:6dE1TmjqkY6tehR4A67gDNhvDtuZ54ocu7ab4K9o540=
github.com/dop251/goja v0.0.0-20251008123653-cf18d89f3cf6/go.mod h1:MxLav0peU43GgvwVgNbLAj1s/bSGboKkhuULvq/7hx4=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU=
github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA=
github.com/golang-migrate/migrate/v4 v4.19.1/go.mod h1:CTcgfjxhaUtsLipnLoQRWCrjYXycRz/g5+RWDuYgPrE=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20230207041349-798e818bf904 h1:4/hN5RUoecvl+RmJRE2YxKWtnnQls6rQjjW5oV7qg2U=
github.com/google/pprof v0.0.0-20230207041349-798e818bf904/go.mod h1:uglQLonpP8qtYCYyzA+8c/9qtqgA3qsXGYqCPKARAFg=
github.com/jackc/pgerrcode v0.0.0-20220416144525-469b46aa5efa h1:s+4MhCQ6YrzisK6hFJUX53drDT4UsSW3DEhKn0ifuHw=
github.com/jackc/pgerrcode v0.0.0-20220416144525-469b46aa5efa/go.mod h1:a/s9Lp5W7n/DD0VrVoyJ00FbP2ytTPDVOivvn2bMlds=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
@@ -44,8 +54,12 @@ github.com/jackc/pgx/v5 v5.9.2 h1:3ZhOzMWnR4yJ+RW1XImIPsD1aNSz4T4fyP7zlQb56hw=
github.com/jackc/pgx/v5 v5.9.2/go.mod h1:mal1tBGAFfLHvZzaYh77YS/eC6IX9OWbRV1QIIM0Jn4=
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk=
github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/miekg/dns v1.1.69 h1:Kb7Y/1Jo+SG+a2GtfoFUfDkG//csdRPwRLkCsxDG9Sc=
github.com/miekg/dns v1.1.69/go.mod h1:7OyjD9nEba5OkqQ/hB4fy3PIoxafSZJtducccIelz3g=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
@@ -61,6 +75,22 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sardanioss/http v1.2.0 h1:Zq3uxnYDLeugTvaYaCzR3Tp1qDERbA2pHgkkFWRuETs=
github.com/sardanioss/http v1.2.0/go.mod h1:Bn2qBFItWB9mLCxWW+tnwDe0stlrxhXaVZKtrLn4dc0=
github.com/sardanioss/httpcloak v1.6.0 h1:KrPhi2Ju8+7hJT8Gk7SSNqX6KsUtjmkNkQrDbmYn1tc=
github.com/sardanioss/httpcloak v1.6.0/go.mod h1:Ivfv+zWaVEDJDftqwXzDPTMOY3F/YD724cSGjGoHw1o=
github.com/sardanioss/net v1.2.1 h1:AzzECGrjYMwcGIUAd1h52SuwISrNjKiZKSrQL7UH+fg=
github.com/sardanioss/net v1.2.1/go.mod h1:jfBAWR1FCMNBh3Pl6kVPOjrKtvBvmKEO300GkAAwj3s=
github.com/sardanioss/qpack v0.6.2 h1:ZVMyheNFfHRUIH3vyJy/bXBJSZVFgffFTwBWy42tRvo=
github.com/sardanioss/qpack v0.6.2/go.mod h1:RSs0PpIh6d66DzAdANPGs9eHV/AbROwpW/Egpy0kIvQ=
github.com/sardanioss/quic-go v1.2.18 h1:OKgOwLjImu+u1xShNJHRGVbkU6tz7fSH9Oz7p9uPlCY=
github.com/sardanioss/quic-go v1.2.18/go.mod h1:SoE0McVgyPOPJYaStntVdJXsCh8hGwaf/Dr/2aP1w1U=
github.com/sardanioss/udpbara v1.1.0 h1:fe71FKnCD/c9J27gY7IyjPM1Zt1gbklDsqJBVpW9Usk=
github.com/sardanioss/udpbara v1.1.0/go.mod h1:aNCe+94AMrx1FiSPusvGPQnsJ6TPEc1RaL/8H7BCwl4=
github.com/sardanioss/utls v1.10.2 h1:cS4PVVsVpBrxNFimvena6IHt+8oiIREW3wBz4tAqTbM=
github.com/sardanioss/utls v1.10.2/go.mod h1:3sXK05Ir31HiMGINYV1uMDFRv/z9JL+QY9ITi/WEbV0=
github.com/sriharsha-y/go-cfscraper v1.0.0 h1:QoN84Rgtq1guKolXpkSlx5rqrgdJuj4VEgmk4ZOMYSI=
github.com/sriharsha-y/go-cfscraper v1.0.0/go.mod h1:mI78PDHfCzD4uq6SzYcXCyp7ZG7I6LynvgU3ENfxYOA=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
@@ -90,6 +120,8 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM=
golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
@@ -126,6 +158,7 @@ golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ=
golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -156,6 +189,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
golang.org/x/tools v0.44.0 h1:UP4ajHPIcuMjT1GqzDWRlalUEoY+uzoZKnhOjbIPD2c=
golang.org/x/tools v0.44.0/go.mod h1:KA0AfVErSdxRZIsOVipbv3rQhVXTnlU6UhKxHd1seDI=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+105 -86
View File
@@ -7,28 +7,25 @@ import (
"io"
"log"
"net/http"
"net/http/cookiejar"
"net/url"
"strconv"
"sync"
"time"
"github.com/sardanioss/httpcloak"
"golang.org/x/time/rate"
)
const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
var (
verboseLog bool
defaultOnce sync.Once
verboseLog bool
defaultOnce sync.Once
defaultClient *Client
)
func SetVerboseLog(enabled bool) { verboseLog = enabled }
// DefaultClient returns the shared singleton HTTP client.
// FlareSolverr is auto-configured if the FLARESOLVERR_URL env var is set.
// All sources share the same rate limiter (+ cookie jar) through this client.
func DefaultClient() *Client {
defaultOnce.Do(func() {
defaultClient = newClient()
@@ -36,9 +33,6 @@ func DefaultClient() *Client {
return defaultClient
}
// NewClient creates a standalone client with optional per-source overrides.
// Only create a new client when the source needs different behaviour
// (e.g. a custom rate limit); otherwise use DefaultClient.
func NewClient(opts ...Option) *Client {
c := newClient()
for _, o := range opts {
@@ -48,9 +42,12 @@ func NewClient(opts ...Option) *Client {
}
func newClient() *Client {
jar, _ := cookiejar.New(nil)
hc := httpcloak.NewSession("chrome-latest",
httpcloak.WithSessionTimeout(30*time.Second),
)
c := &Client{
http: &http.Client{Timeout: 30 * time.Second, Jar: jar},
hc: hc,
fsClient: nil,
rateLimit: 1,
burst: 1,
userAgent: defaultUserAgent,
@@ -65,11 +62,11 @@ func newClient() *Client {
}
type Client struct {
http *http.Client
fsClient *FlareSolverrClient
rateLimit float64
burst int
userAgent string
hc *httpcloak.Session
fsClient *FlareSolverrClient
rateLimit float64
burst int
userAgent string
verboseLog bool
mu sync.Mutex
@@ -86,7 +83,9 @@ func WithRateLimit(rps float64, burst int) Option {
}
func WithTimeout(d time.Duration) Option {
return func(c *Client) { c.http.Timeout = d }
return func(c *Client) { c.hc = httpcloak.NewSession("chrome-latest",
httpcloak.WithSessionTimeout(d),
) }
}
func WithUserAgent(ua string) Option {
@@ -108,25 +107,15 @@ func (c *Client) limiter(host string) *rate.Limiter {
return l
}
// Do tries a direct HTTP request first. If the server returns 403/503 (a
// Cloudflare or DDoS challenge) and FlareSolverr is available, it falls back
// to FlareSolverr raw mode to solve the challenge and return the actual body.
//
// When FlareSolverr is used, the Chrome HTML wrapper is stripped from the
// response so that both JSON and HTML callers receive the real server output.
// Do tries a direct request via httpcloak (Chrome TLS fingerprint) first.
// httpcloak's TLS fingerprint matches Chrome, so if we already have a
// cf_clearance cookie from a previous FlareSolverr solve, Cloudflare won't
// challenge us. If we do get challenged (403/503), falls back to FlareSolverr.
func (c *Client) Do(req *http.Request) (*http.Response, error) {
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
return nil, err
}
if req.Header.Get("User-Agent") == "" {
req.Header.Set("User-Agent", c.userAgent)
}
// Try direct first. Most sites don't have Cloudflare, so this is fast.
// For Cloudflare sites, Go's TLS fingerprint doesn't match Chrome's, so
// the direct request gets 403 — fall back to FlareSolverr. FS uses a
// persistent session (Chrome caches cf_clearance), so only the first
// FS request per domain solves the challenge; subsequent ones are fast.
resp, err := c.doDirect(req)
var directStatus int
if err == nil {
@@ -151,31 +140,40 @@ func (c *Client) doDirect(req *http.Request) (*http.Response, error) {
if c.verboseLog {
log.Printf("[httpclient] DIRECT %s %s", req.Method, req.URL.String())
}
const maxRetries = 3
for attempt := 0; attempt <= maxRetries; attempt++ {
resp, err := c.http.Do(req)
if err != nil {
return nil, err
}
if c.verboseLog {
log.Printf("[httpclient] DIRECT RESPONSE %s status=%d", req.URL.String(), resp.StatusCode)
}
if resp.StatusCode != http.StatusTooManyRequests {
return resp, nil
}
resp.Body.Close()
if attempt == maxRetries {
return resp, nil
}
sleep := retryAfter(resp)
select {
case <-req.Context().Done():
return nil, req.Context().Err()
case <-time.After(sleep):
}
if req.Header.Get("User-Agent") == "" {
req.Header.Set("User-Agent", c.userAgent)
}
panic("unreachable")
hreq := &httpcloak.Request{
Method: req.Method,
URL: req.URL.String(),
Headers: req.Header,
}
if req.Body != nil {
hreq.Body = req.Body
}
hresp, err := c.hc.Do(req.Context(), hreq)
if err != nil {
return nil, err
}
body, err := hresp.Bytes()
if err != nil {
return nil, err
}
if c.verboseLog {
log.Printf("[httpclient] DIRECT RESPONSE %s status=%d", req.URL.String(), hresp.StatusCode)
}
return &http.Response{
StatusCode: hresp.StatusCode,
Header: hresp.Headers,
Body: io.NopCloser(bytes.NewReader(body)),
ContentLength: int64(len(body)),
Request: req,
}, nil
}
func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, error) {
@@ -189,23 +187,24 @@ func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, erro
return nil, err
}
// Use the actual response URL from FlareSolverr (follows redirects
// through Chrome) so cookies are associated with the right domain.
respURL := rawURL
if fsRespURL != "" {
respURL = fsRespURL
}
// Feed FlareSolverr cookies into the shared jar so subsequent direct
// requests to the same host skip the challenge.
// Feed FS cookies into the httpcloak session for subsequent direct requests
if len(cookies) > 0 {
if u, uErr := url.Parse(respURL); uErr == nil {
c.http.Jar.SetCookies(u, cookies)
if parsedRespURL, uErr := url.Parse(respURL); uErr == nil {
for _, ck := range cookies {
if ck.Domain == "" {
ck.Domain = parsedRespURL.Host
}
c.hc.SetCookie(ck.Name, ck.Value)
}
}
}
// If FS returned the challenge page instead of the real content,
// reject it (HTTP 0 case when directStatus=0).
// Check if FS returned challenge page instead of real content
if statusCode == 200 && isCloudflareChallenge([]byte(rawBody)) {
if directStatus >= 400 {
statusCode = directStatus
@@ -214,9 +213,6 @@ func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, erro
}
}
// Build response headers from the actual FS response headers,
// falling back to the request headers for keys not present in the
// FS response (e.g. Content-Type on an empty GET body).
hdr := make(http.Header)
if len(fsHeaders) > 0 {
for k, v := range fsHeaders {
@@ -230,14 +226,11 @@ func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, erro
}
}
}
// Ensure Set-Cookie headers from FS cookies are present even if FS
// omitted them from the headers map.
if len(cookies) > 0 {
for _, ck := range cookies {
hdr.Add("Set-Cookie", ck.String())
}
}
// Copy any request headers not present in the FS response (e.g. Host).
for k, v := range req.Header {
if hdr.Get(k) == "" {
hdr[k] = v
@@ -254,23 +247,55 @@ func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, erro
}, nil
}
// HTTPClient returns the underlying *http.Client (for passing to graphql etc.).
func (c *Client) HTTPClient() *http.Client { return c.http }
func (c *Client) HTTPClient() *http.Client {
return &http.Client{
Transport: hcTransport{c.hc},
Timeout: 30 * time.Second,
}
}
// hcTransport wraps httpcloak.Session as an http.RoundTripper
type hcTransport struct {
hc *httpcloak.Session
}
func (t hcTransport) RoundTrip(req *http.Request) (*http.Response, error) {
var body io.Reader
if req.Body != nil {
body = req.Body
}
hreq := &httpcloak.Request{
Method: req.Method,
URL: req.URL.String(),
Headers: req.Header,
Body: body,
}
hresp, err := t.hc.Do(req.Context(), hreq)
if err != nil {
return nil, err
}
bodyBytes, err := hresp.Bytes()
if err != nil {
return nil, err
}
return &http.Response{
StatusCode: hresp.StatusCode,
Header: hresp.Headers,
Body: io.NopCloser(bytes.NewReader(bodyBytes)),
ContentLength: int64(len(bodyBytes)),
Request: req,
}, nil
}
// Cookie returns the value of a named cookie stored in the jar for the given
// host (e.g. "mangahub.io"). Returns empty string when the cookie is not found.
func (c *Client) Cookie(name, host string) string {
u := &url.URL{Scheme: "https", Host: host}
for _, ck := range c.http.Jar.Cookies(u) {
if ck.Name == name {
return ck.Value
}
cks := c.hc.GetCookies()
if v, ok := cks[name]; ok {
return v
}
return ""
}
// Get is a convenience wrapper around Do. To add custom headers, build the
// request manually and call Do.
// Get is a convenience wrapper around Do.
func (c *Client) Get(ctx context.Context, urlStr string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
if err != nil {
@@ -308,12 +333,6 @@ func isCloudflareChallenge(body []byte) bool {
}
// stripFSWrapper removes FlareSolverr's Chrome HTML wrapper.
// FlareSolverr wraps all responses in:
//
// <html><head>...<meta charset...>...</head><body><pre>actual_body</pre></body></html>
//
// If a <pre> tag is found inside the wrapper, its content is returned.
// Otherwise the body is returned unchanged (HTML pages rendered by Chrome).
func stripFSWrapper(body []byte) []byte {
if !bytes.HasPrefix(bytes.TrimSpace(body), []byte("<html")) {
return body
+2 -2
View File
@@ -84,7 +84,7 @@ func (f *FlareSolverrClient) GetRaw(ctx context.Context, url string) (body strin
reqData := fsRequest{
Cmd: "request.get",
URL: url,
MaxTimeout: 60000,
MaxTimeout: 120000,
Raw: true,
Session: f.sessionID,
}
@@ -146,7 +146,7 @@ func (f *FlareSolverrClient) request(ctx context.Context, cmd, url, body string,
req := fsRequest{
Cmd: cmd,
URL: url,
MaxTimeout: 60000,
MaxTimeout: 120000,
Session: f.sessionID,
}
if body != "" {