feat: replace net/http with httpcloak for Chrome TLS fingerprint
- Use httpcloak.Session (Chrome JA3/JA4 fingerprint) as primary transport - Adaptive: direct request via httpcloak first; FlareSolverr fallback on 403/503 - FS cookies fed into httpcloak session so subsequent requests reuse cf_clearance (Chrome fingerprint + cookie = no re-challenge) - FlareSolverr timeout increased to 120s for slow challenges - Sanitize FS cookie values (strip quotes/newlines to avoid Go cookie warnings) - Remove go-cfscraper dependency (pure JS solver was fragile)
This commit is contained in:
@@ -10,13 +10,31 @@ require (
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/andybalholm/brotli v1.2.0 // indirect
|
||||
github.com/andybalholm/cascadia v1.3.3 // indirect
|
||||
github.com/dlclark/regexp2 v1.11.4 // indirect
|
||||
github.com/dop251/goja v0.0.0-20251008123653-cf18d89f3cf6 // indirect
|
||||
github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
|
||||
github.com/google/pprof v0.0.0-20230207041349-798e818bf904 // indirect
|
||||
github.com/jackc/pgerrcode v0.0.0-20220416144525-469b46aa5efa // indirect
|
||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||
github.com/jackc/puddle/v2 v2.2.2 // indirect
|
||||
github.com/klauspost/compress v1.18.2 // indirect
|
||||
github.com/miekg/dns v1.1.69 // indirect
|
||||
github.com/sardanioss/http v1.2.0 // indirect
|
||||
github.com/sardanioss/httpcloak v1.6.0 // indirect
|
||||
github.com/sardanioss/net v1.2.1 // indirect
|
||||
github.com/sardanioss/qpack v0.6.2 // indirect
|
||||
github.com/sardanioss/quic-go v1.2.18 // indirect
|
||||
github.com/sardanioss/udpbara v1.1.0 // indirect
|
||||
github.com/sardanioss/utls v1.10.2 // indirect
|
||||
github.com/sriharsha-y/go-cfscraper v1.0.0 // indirect
|
||||
golang.org/x/crypto v0.51.0 // indirect
|
||||
golang.org/x/mod v0.35.0 // indirect
|
||||
golang.org/x/net v0.53.0 // indirect
|
||||
golang.org/x/sync v0.20.0 // indirect
|
||||
golang.org/x/sys v0.44.0 // indirect
|
||||
golang.org/x/text v0.37.0 // indirect
|
||||
golang.org/x/tools v0.44.0 // indirect
|
||||
)
|
||||
|
||||
@@ -4,6 +4,8 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/PuerkitoBio/goquery v1.12.0 h1:pAcL4g3WRXekcB9AU/y1mbKez2dbY2AajVhtkO8RIBo=
|
||||
github.com/PuerkitoBio/goquery v1.12.0/go.mod h1:802ej+gV2y7bbIhOIoPY5sT183ZW0YFofScC4q/hIpQ=
|
||||
github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
|
||||
github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
|
||||
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
||||
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
|
||||
@@ -17,23 +19,31 @@ github.com/dhui/dktest v0.4.6 h1:+DPKyScKSEp3VLtbMDHcUq6V5Lm5zfZZVb0Sk7Ahom4=
|
||||
github.com/dhui/dktest v0.4.6/go.mod h1:JHTSYDtKkvFNFHJKqCzVzqXecyv+tKt8EzceOmQOgbU=
|
||||
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
|
||||
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
|
||||
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
|
||||
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||
github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI=
|
||||
github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
|
||||
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
|
||||
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
|
||||
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
||||
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||
github.com/dop251/goja v0.0.0-20251008123653-cf18d89f3cf6 h1:6dE1TmjqkY6tehR4A67gDNhvDtuZ54ocu7ab4K9o540=
|
||||
github.com/dop251/goja v0.0.0-20251008123653-cf18d89f3cf6/go.mod h1:MxLav0peU43GgvwVgNbLAj1s/bSGboKkhuULvq/7hx4=
|
||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU=
|
||||
github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg=
|
||||
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
||||
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
||||
github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA=
|
||||
github.com/golang-migrate/migrate/v4 v4.19.1/go.mod h1:CTcgfjxhaUtsLipnLoQRWCrjYXycRz/g5+RWDuYgPrE=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/pprof v0.0.0-20230207041349-798e818bf904 h1:4/hN5RUoecvl+RmJRE2YxKWtnnQls6rQjjW5oV7qg2U=
|
||||
github.com/google/pprof v0.0.0-20230207041349-798e818bf904/go.mod h1:uglQLonpP8qtYCYyzA+8c/9qtqgA3qsXGYqCPKARAFg=
|
||||
github.com/jackc/pgerrcode v0.0.0-20220416144525-469b46aa5efa h1:s+4MhCQ6YrzisK6hFJUX53drDT4UsSW3DEhKn0ifuHw=
|
||||
github.com/jackc/pgerrcode v0.0.0-20220416144525-469b46aa5efa/go.mod h1:a/s9Lp5W7n/DD0VrVoyJ00FbP2ytTPDVOivvn2bMlds=
|
||||
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
||||
@@ -44,8 +54,12 @@ github.com/jackc/pgx/v5 v5.9.2 h1:3ZhOzMWnR4yJ+RW1XImIPsD1aNSz4T4fyP7zlQb56hw=
|
||||
github.com/jackc/pgx/v5 v5.9.2/go.mod h1:mal1tBGAFfLHvZzaYh77YS/eC6IX9OWbRV1QIIM0Jn4=
|
||||
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
|
||||
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||
github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk=
|
||||
github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
||||
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
||||
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||
github.com/miekg/dns v1.1.69 h1:Kb7Y/1Jo+SG+a2GtfoFUfDkG//csdRPwRLkCsxDG9Sc=
|
||||
github.com/miekg/dns v1.1.69/go.mod h1:7OyjD9nEba5OkqQ/hB4fy3PIoxafSZJtducccIelz3g=
|
||||
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
|
||||
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
|
||||
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
|
||||
@@ -61,6 +75,22 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/sardanioss/http v1.2.0 h1:Zq3uxnYDLeugTvaYaCzR3Tp1qDERbA2pHgkkFWRuETs=
|
||||
github.com/sardanioss/http v1.2.0/go.mod h1:Bn2qBFItWB9mLCxWW+tnwDe0stlrxhXaVZKtrLn4dc0=
|
||||
github.com/sardanioss/httpcloak v1.6.0 h1:KrPhi2Ju8+7hJT8Gk7SSNqX6KsUtjmkNkQrDbmYn1tc=
|
||||
github.com/sardanioss/httpcloak v1.6.0/go.mod h1:Ivfv+zWaVEDJDftqwXzDPTMOY3F/YD724cSGjGoHw1o=
|
||||
github.com/sardanioss/net v1.2.1 h1:AzzECGrjYMwcGIUAd1h52SuwISrNjKiZKSrQL7UH+fg=
|
||||
github.com/sardanioss/net v1.2.1/go.mod h1:jfBAWR1FCMNBh3Pl6kVPOjrKtvBvmKEO300GkAAwj3s=
|
||||
github.com/sardanioss/qpack v0.6.2 h1:ZVMyheNFfHRUIH3vyJy/bXBJSZVFgffFTwBWy42tRvo=
|
||||
github.com/sardanioss/qpack v0.6.2/go.mod h1:RSs0PpIh6d66DzAdANPGs9eHV/AbROwpW/Egpy0kIvQ=
|
||||
github.com/sardanioss/quic-go v1.2.18 h1:OKgOwLjImu+u1xShNJHRGVbkU6tz7fSH9Oz7p9uPlCY=
|
||||
github.com/sardanioss/quic-go v1.2.18/go.mod h1:SoE0McVgyPOPJYaStntVdJXsCh8hGwaf/Dr/2aP1w1U=
|
||||
github.com/sardanioss/udpbara v1.1.0 h1:fe71FKnCD/c9J27gY7IyjPM1Zt1gbklDsqJBVpW9Usk=
|
||||
github.com/sardanioss/udpbara v1.1.0/go.mod h1:aNCe+94AMrx1FiSPusvGPQnsJ6TPEc1RaL/8H7BCwl4=
|
||||
github.com/sardanioss/utls v1.10.2 h1:cS4PVVsVpBrxNFimvena6IHt+8oiIREW3wBz4tAqTbM=
|
||||
github.com/sardanioss/utls v1.10.2/go.mod h1:3sXK05Ir31HiMGINYV1uMDFRv/z9JL+QY9ITi/WEbV0=
|
||||
github.com/sriharsha-y/go-cfscraper v1.0.0 h1:QoN84Rgtq1guKolXpkSlx5rqrgdJuj4VEgmk4ZOMYSI=
|
||||
github.com/sriharsha-y/go-cfscraper v1.0.0/go.mod h1:mI78PDHfCzD4uq6SzYcXCyp7ZG7I6LynvgU3ENfxYOA=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
@@ -90,6 +120,8 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM=
|
||||
golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
@@ -126,6 +158,7 @@ golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
|
||||
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||
golang.org/x/sys v0.44.0 h1:ildZl3J4uzeKP07r2F++Op7E9B29JRUy+a27EibtBTQ=
|
||||
golang.org/x/sys v0.44.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
@@ -156,6 +189,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
|
||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
|
||||
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
|
||||
golang.org/x/tools v0.44.0 h1:UP4ajHPIcuMjT1GqzDWRlalUEoY+uzoZKnhOjbIPD2c=
|
||||
golang.org/x/tools v0.44.0/go.mod h1:KA0AfVErSdxRZIsOVipbv3rQhVXTnlU6UhKxHd1seDI=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
||||
+105
-86
@@ -7,28 +7,25 @@ import (
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/sardanioss/httpcloak"
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
|
||||
|
||||
var (
|
||||
verboseLog bool
|
||||
defaultOnce sync.Once
|
||||
verboseLog bool
|
||||
defaultOnce sync.Once
|
||||
defaultClient *Client
|
||||
)
|
||||
|
||||
func SetVerboseLog(enabled bool) { verboseLog = enabled }
|
||||
|
||||
// DefaultClient returns the shared singleton HTTP client.
|
||||
// FlareSolverr is auto-configured if the FLARESOLVERR_URL env var is set.
|
||||
// All sources share the same rate limiter (+ cookie jar) through this client.
|
||||
func DefaultClient() *Client {
|
||||
defaultOnce.Do(func() {
|
||||
defaultClient = newClient()
|
||||
@@ -36,9 +33,6 @@ func DefaultClient() *Client {
|
||||
return defaultClient
|
||||
}
|
||||
|
||||
// NewClient creates a standalone client with optional per-source overrides.
|
||||
// Only create a new client when the source needs different behaviour
|
||||
// (e.g. a custom rate limit); otherwise use DefaultClient.
|
||||
func NewClient(opts ...Option) *Client {
|
||||
c := newClient()
|
||||
for _, o := range opts {
|
||||
@@ -48,9 +42,12 @@ func NewClient(opts ...Option) *Client {
|
||||
}
|
||||
|
||||
func newClient() *Client {
|
||||
jar, _ := cookiejar.New(nil)
|
||||
hc := httpcloak.NewSession("chrome-latest",
|
||||
httpcloak.WithSessionTimeout(30*time.Second),
|
||||
)
|
||||
c := &Client{
|
||||
http: &http.Client{Timeout: 30 * time.Second, Jar: jar},
|
||||
hc: hc,
|
||||
fsClient: nil,
|
||||
rateLimit: 1,
|
||||
burst: 1,
|
||||
userAgent: defaultUserAgent,
|
||||
@@ -65,11 +62,11 @@ func newClient() *Client {
|
||||
}
|
||||
|
||||
type Client struct {
|
||||
http *http.Client
|
||||
fsClient *FlareSolverrClient
|
||||
rateLimit float64
|
||||
burst int
|
||||
userAgent string
|
||||
hc *httpcloak.Session
|
||||
fsClient *FlareSolverrClient
|
||||
rateLimit float64
|
||||
burst int
|
||||
userAgent string
|
||||
verboseLog bool
|
||||
|
||||
mu sync.Mutex
|
||||
@@ -86,7 +83,9 @@ func WithRateLimit(rps float64, burst int) Option {
|
||||
}
|
||||
|
||||
func WithTimeout(d time.Duration) Option {
|
||||
return func(c *Client) { c.http.Timeout = d }
|
||||
return func(c *Client) { c.hc = httpcloak.NewSession("chrome-latest",
|
||||
httpcloak.WithSessionTimeout(d),
|
||||
) }
|
||||
}
|
||||
|
||||
func WithUserAgent(ua string) Option {
|
||||
@@ -108,25 +107,15 @@ func (c *Client) limiter(host string) *rate.Limiter {
|
||||
return l
|
||||
}
|
||||
|
||||
// Do tries a direct HTTP request first. If the server returns 403/503 (a
|
||||
// Cloudflare or DDoS challenge) and FlareSolverr is available, it falls back
|
||||
// to FlareSolverr raw mode to solve the challenge and return the actual body.
|
||||
//
|
||||
// When FlareSolverr is used, the Chrome HTML wrapper is stripped from the
|
||||
// response so that both JSON and HTML callers receive the real server output.
|
||||
// Do tries a direct request via httpcloak (Chrome TLS fingerprint) first.
|
||||
// httpcloak's TLS fingerprint matches Chrome, so if we already have a
|
||||
// cf_clearance cookie from a previous FlareSolverr solve, Cloudflare won't
|
||||
// challenge us. If we do get challenged (403/503), falls back to FlareSolverr.
|
||||
func (c *Client) Do(req *http.Request) (*http.Response, error) {
|
||||
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if req.Header.Get("User-Agent") == "" {
|
||||
req.Header.Set("User-Agent", c.userAgent)
|
||||
}
|
||||
|
||||
// Try direct first. Most sites don't have Cloudflare, so this is fast.
|
||||
// For Cloudflare sites, Go's TLS fingerprint doesn't match Chrome's, so
|
||||
// the direct request gets 403 — fall back to FlareSolverr. FS uses a
|
||||
// persistent session (Chrome caches cf_clearance), so only the first
|
||||
// FS request per domain solves the challenge; subsequent ones are fast.
|
||||
resp, err := c.doDirect(req)
|
||||
var directStatus int
|
||||
if err == nil {
|
||||
@@ -151,31 +140,40 @@ func (c *Client) doDirect(req *http.Request) (*http.Response, error) {
|
||||
if c.verboseLog {
|
||||
log.Printf("[httpclient] DIRECT %s %s", req.Method, req.URL.String())
|
||||
}
|
||||
|
||||
const maxRetries = 3
|
||||
for attempt := 0; attempt <= maxRetries; attempt++ {
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if c.verboseLog {
|
||||
log.Printf("[httpclient] DIRECT RESPONSE %s status=%d", req.URL.String(), resp.StatusCode)
|
||||
}
|
||||
if resp.StatusCode != http.StatusTooManyRequests {
|
||||
return resp, nil
|
||||
}
|
||||
resp.Body.Close()
|
||||
if attempt == maxRetries {
|
||||
return resp, nil
|
||||
}
|
||||
sleep := retryAfter(resp)
|
||||
select {
|
||||
case <-req.Context().Done():
|
||||
return nil, req.Context().Err()
|
||||
case <-time.After(sleep):
|
||||
}
|
||||
if req.Header.Get("User-Agent") == "" {
|
||||
req.Header.Set("User-Agent", c.userAgent)
|
||||
}
|
||||
panic("unreachable")
|
||||
|
||||
hreq := &httpcloak.Request{
|
||||
Method: req.Method,
|
||||
URL: req.URL.String(),
|
||||
Headers: req.Header,
|
||||
}
|
||||
if req.Body != nil {
|
||||
hreq.Body = req.Body
|
||||
}
|
||||
|
||||
hresp, err := c.hc.Do(req.Context(), hreq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
body, err := hresp.Bytes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if c.verboseLog {
|
||||
log.Printf("[httpclient] DIRECT RESPONSE %s status=%d", req.URL.String(), hresp.StatusCode)
|
||||
}
|
||||
|
||||
return &http.Response{
|
||||
StatusCode: hresp.StatusCode,
|
||||
Header: hresp.Headers,
|
||||
Body: io.NopCloser(bytes.NewReader(body)),
|
||||
ContentLength: int64(len(body)),
|
||||
Request: req,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, error) {
|
||||
@@ -189,23 +187,24 @@ func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, erro
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Use the actual response URL from FlareSolverr (follows redirects
|
||||
// through Chrome) so cookies are associated with the right domain.
|
||||
respURL := rawURL
|
||||
if fsRespURL != "" {
|
||||
respURL = fsRespURL
|
||||
}
|
||||
|
||||
// Feed FlareSolverr cookies into the shared jar so subsequent direct
|
||||
// requests to the same host skip the challenge.
|
||||
// Feed FS cookies into the httpcloak session for subsequent direct requests
|
||||
if len(cookies) > 0 {
|
||||
if u, uErr := url.Parse(respURL); uErr == nil {
|
||||
c.http.Jar.SetCookies(u, cookies)
|
||||
if parsedRespURL, uErr := url.Parse(respURL); uErr == nil {
|
||||
for _, ck := range cookies {
|
||||
if ck.Domain == "" {
|
||||
ck.Domain = parsedRespURL.Host
|
||||
}
|
||||
c.hc.SetCookie(ck.Name, ck.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If FS returned the challenge page instead of the real content,
|
||||
// reject it (HTTP 0 case when directStatus=0).
|
||||
// Check if FS returned challenge page instead of real content
|
||||
if statusCode == 200 && isCloudflareChallenge([]byte(rawBody)) {
|
||||
if directStatus >= 400 {
|
||||
statusCode = directStatus
|
||||
@@ -214,9 +213,6 @@ func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, erro
|
||||
}
|
||||
}
|
||||
|
||||
// Build response headers from the actual FS response headers,
|
||||
// falling back to the request headers for keys not present in the
|
||||
// FS response (e.g. Content-Type on an empty GET body).
|
||||
hdr := make(http.Header)
|
||||
if len(fsHeaders) > 0 {
|
||||
for k, v := range fsHeaders {
|
||||
@@ -230,14 +226,11 @@ func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, erro
|
||||
}
|
||||
}
|
||||
}
|
||||
// Ensure Set-Cookie headers from FS cookies are present even if FS
|
||||
// omitted them from the headers map.
|
||||
if len(cookies) > 0 {
|
||||
for _, ck := range cookies {
|
||||
hdr.Add("Set-Cookie", ck.String())
|
||||
}
|
||||
}
|
||||
// Copy any request headers not present in the FS response (e.g. Host).
|
||||
for k, v := range req.Header {
|
||||
if hdr.Get(k) == "" {
|
||||
hdr[k] = v
|
||||
@@ -254,23 +247,55 @@ func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, erro
|
||||
}, nil
|
||||
}
|
||||
|
||||
// HTTPClient returns the underlying *http.Client (for passing to graphql etc.).
|
||||
func (c *Client) HTTPClient() *http.Client { return c.http }
|
||||
func (c *Client) HTTPClient() *http.Client {
|
||||
return &http.Client{
|
||||
Transport: hcTransport{c.hc},
|
||||
Timeout: 30 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
// hcTransport wraps httpcloak.Session as an http.RoundTripper
|
||||
type hcTransport struct {
|
||||
hc *httpcloak.Session
|
||||
}
|
||||
|
||||
func (t hcTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
var body io.Reader
|
||||
if req.Body != nil {
|
||||
body = req.Body
|
||||
}
|
||||
hreq := &httpcloak.Request{
|
||||
Method: req.Method,
|
||||
URL: req.URL.String(),
|
||||
Headers: req.Header,
|
||||
Body: body,
|
||||
}
|
||||
hresp, err := t.hc.Do(req.Context(), hreq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
bodyBytes, err := hresp.Bytes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &http.Response{
|
||||
StatusCode: hresp.StatusCode,
|
||||
Header: hresp.Headers,
|
||||
Body: io.NopCloser(bytes.NewReader(bodyBytes)),
|
||||
ContentLength: int64(len(bodyBytes)),
|
||||
Request: req,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Cookie returns the value of a named cookie stored in the jar for the given
|
||||
// host (e.g. "mangahub.io"). Returns empty string when the cookie is not found.
|
||||
func (c *Client) Cookie(name, host string) string {
|
||||
u := &url.URL{Scheme: "https", Host: host}
|
||||
for _, ck := range c.http.Jar.Cookies(u) {
|
||||
if ck.Name == name {
|
||||
return ck.Value
|
||||
}
|
||||
cks := c.hc.GetCookies()
|
||||
if v, ok := cks[name]; ok {
|
||||
return v
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Get is a convenience wrapper around Do. To add custom headers, build the
|
||||
// request manually and call Do.
|
||||
// Get is a convenience wrapper around Do.
|
||||
func (c *Client) Get(ctx context.Context, urlStr string) (*http.Response, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
|
||||
if err != nil {
|
||||
@@ -308,12 +333,6 @@ func isCloudflareChallenge(body []byte) bool {
|
||||
}
|
||||
|
||||
// stripFSWrapper removes FlareSolverr's Chrome HTML wrapper.
|
||||
// FlareSolverr wraps all responses in:
|
||||
//
|
||||
// <html><head>...<meta charset...>...</head><body><pre>actual_body</pre></body></html>
|
||||
//
|
||||
// If a <pre> tag is found inside the wrapper, its content is returned.
|
||||
// Otherwise the body is returned unchanged (HTML pages rendered by Chrome).
|
||||
func stripFSWrapper(body []byte) []byte {
|
||||
if !bytes.HasPrefix(bytes.TrimSpace(body), []byte("<html")) {
|
||||
return body
|
||||
|
||||
@@ -84,7 +84,7 @@ func (f *FlareSolverrClient) GetRaw(ctx context.Context, url string) (body strin
|
||||
reqData := fsRequest{
|
||||
Cmd: "request.get",
|
||||
URL: url,
|
||||
MaxTimeout: 60000,
|
||||
MaxTimeout: 120000,
|
||||
Raw: true,
|
||||
Session: f.sessionID,
|
||||
}
|
||||
@@ -146,7 +146,7 @@ func (f *FlareSolverrClient) request(ctx context.Context, cmd, url, body string,
|
||||
req := fsRequest{
|
||||
Cmd: cmd,
|
||||
URL: url,
|
||||
MaxTimeout: 60000,
|
||||
MaxTimeout: 120000,
|
||||
Session: f.sessionID,
|
||||
}
|
||||
if body != "" {
|
||||
|
||||
Reference in New Issue
Block a user