Files
goyomi/internal/httpclient/client.go
T
achmad 6d45576790 fix(httpclient): fix HTTP 0 status and restore FS session default
- Guard isCloudflareChallenge with directStatus >= 400 to prevent
  overriding status to 0 when no direct request was made
- When FS returns challenge page without a prior direct status,
  return an error instead of silently passing HTTP 0
- Restore default FS session ID to 'goyomi' — without a session,
  each request spawns a new Chrome, causing timeouts under load
- Add Message field to FlareSolverrResponse for better error reporting
- Document FLARESOLVERR_SESSION env var: shared session = fast after
  1st request, but serializes. Set empty for parallel (resource-heavy).
2026-05-14 13:54:11 +07:00

355 lines
9.4 KiB
Go
Executable File

package httpclient
import (
"bytes"
"context"
"fmt"
"io"
"log"
"net/http"
"net/http/cookiejar"
"net/url"
"strconv"
"sync"
"time"
"golang.org/x/time/rate"
)
const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
var (
verboseLog bool
defaultOnce sync.Once
defaultClient *Client
)
func SetVerboseLog(enabled bool) { verboseLog = enabled }
// DefaultClient returns the shared singleton HTTP client.
// FlareSolverr is auto-configured if the FLARESOLVERR_URL env var is set.
// All sources share the same rate limiter (+ cookie jar) through this client.
func DefaultClient() *Client {
defaultOnce.Do(func() {
defaultClient = newClient()
})
return defaultClient
}
// NewClient creates a standalone client with optional per-source overrides.
// Only create a new client when the source needs different behaviour
// (e.g. a custom rate limit); otherwise use DefaultClient.
func NewClient(opts ...Option) *Client {
c := newClient()
for _, o := range opts {
o(c)
}
return c
}
func newClient() *Client {
jar, _ := cookiejar.New(nil)
c := &Client{
http: &http.Client{Timeout: 30 * time.Second, Jar: jar},
rateLimit: 1,
burst: 1,
userAgent: defaultUserAgent,
limiters: map[string]*rate.Limiter{},
verboseLog: verboseLog,
}
fsClient, err := NewFlareSolverrClient()
if err == nil {
c.fsClient = fsClient
}
return c
}
type Client struct {
http *http.Client
fsClient *FlareSolverrClient
rateLimit float64
burst int
userAgent string
verboseLog bool
mu sync.Mutex
limiters map[string]*rate.Limiter
}
type Option func(*Client)
func WithRateLimit(rps float64, burst int) Option {
return func(c *Client) {
c.rateLimit = rps
c.burst = burst
}
}
func WithTimeout(d time.Duration) Option {
return func(c *Client) { c.http.Timeout = d }
}
func WithUserAgent(ua string) Option {
return func(c *Client) { c.userAgent = ua }
}
func WithVerboseLog(enabled bool) Option {
return func(c *Client) { c.verboseLog = enabled }
}
func (c *Client) limiter(host string) *rate.Limiter {
c.mu.Lock()
defer c.mu.Unlock()
l, ok := c.limiters[host]
if !ok {
l = rate.NewLimiter(rate.Limit(c.rateLimit), c.burst)
c.limiters[host] = l
}
return l
}
// Do tries a direct HTTP request first. If the server returns 403/503 (a
// Cloudflare or DDoS challenge) and FlareSolverr is available, it falls back
// to FlareSolverr raw mode to solve the challenge and return the actual body.
//
// When FlareSolverr is used, the Chrome HTML wrapper is stripped from the
// response so that both JSON and HTML callers receive the real server output.
func (c *Client) Do(req *http.Request) (*http.Response, error) {
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
return nil, err
}
if req.Header.Get("User-Agent") == "" {
req.Header.Set("User-Agent", c.userAgent)
}
// Always route through FlareSolverr when configured. Go's TLS fingerprint
// doesn't match Chrome's, so Cloudflare clearance cookies from FS are
// rejected by Go's net/http — meaning every direct request gets challenged
// again. FS Chrome caches the clearance internally, so subsequent calls
// for the same domain are near-instant.
//
// When FS is not configured, fall back to direct HTTP.
if c.fsClient != nil {
return c.doFS(req, 0)
}
// --- direct-first path (commented out — see TLS fingerprint issue above) ---
// resp, err := c.doDirect(req)
// var directStatus int
// if err == nil {
// directStatus = resp.StatusCode
// if resp.StatusCode != http.StatusForbidden && resp.StatusCode != http.StatusServiceUnavailable {
// return resp, nil
// }
// resp.Body.Close()
// }
// if c.fsClient == nil {
// if err != nil {
// return nil, err
// }
// return nil, fmt.Errorf("HTTP %d (challenge detected but FlareSolverr not configured)", resp.StatusCode)
// }
// return c.doFS(req, directStatus)
return c.doDirect(req)
}
func (c *Client) doDirect(req *http.Request) (*http.Response, error) {
if c.verboseLog {
log.Printf("[httpclient] DIRECT %s %s", req.Method, req.URL.String())
}
const maxRetries = 3
for attempt := 0; attempt <= maxRetries; attempt++ {
resp, err := c.http.Do(req)
if err != nil {
return nil, err
}
if c.verboseLog {
log.Printf("[httpclient] DIRECT RESPONSE %s status=%d", req.URL.String(), resp.StatusCode)
}
if resp.StatusCode != http.StatusTooManyRequests {
return resp, nil
}
resp.Body.Close()
if attempt == maxRetries {
return resp, nil
}
sleep := retryAfter(resp)
select {
case <-req.Context().Done():
return nil, req.Context().Err()
case <-time.After(sleep):
}
}
panic("unreachable")
}
func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, error) {
if c.verboseLog {
log.Printf("[httpclient] FS FALLBACK %s %s", req.Method, req.URL.String())
}
rawURL := req.URL.String()
rawBody, statusCode, fsHeaders, cookies, fsRespURL, err := c.fsClient.GetRaw(req.Context(), rawURL)
if err != nil {
return nil, err
}
// Use the actual response URL from FlareSolverr (follows redirects
// through Chrome) so cookies are associated with the right domain.
respURL := rawURL
if fsRespURL != "" {
respURL = fsRespURL
}
// Feed FlareSolverr cookies into the shared jar so subsequent direct
// requests to the same host skip the challenge.
if len(cookies) > 0 {
if u, uErr := url.Parse(respURL); uErr == nil {
c.http.Jar.SetCookies(u, cookies)
}
}
// If FS returned the challenge page instead of the real content,
// reject it (HTTP 0 case when directStatus=0).
if statusCode == 200 && isCloudflareChallenge([]byte(rawBody)) {
if directStatus >= 400 {
statusCode = directStatus
} else {
return nil, fmt.Errorf("FlareSolverr returned challenge page for %s", rawURL)
}
}
// Build response headers from the actual FS response headers,
// falling back to the request headers for keys not present in the
// FS response (e.g. Content-Type on an empty GET body).
hdr := make(http.Header)
if len(fsHeaders) > 0 {
for k, v := range fsHeaders {
switch val := v.(type) {
case string:
hdr.Set(k, val)
case []any:
for _, sv := range val {
hdr.Add(k, fmt.Sprint(sv))
}
}
}
}
// Ensure Set-Cookie headers from FS cookies are present even if FS
// omitted them from the headers map.
if len(cookies) > 0 {
for _, ck := range cookies {
hdr.Add("Set-Cookie", ck.String())
}
}
// Copy any request headers not present in the FS response (e.g. Host).
for k, v := range req.Header {
if hdr.Get(k) == "" {
hdr[k] = v
}
}
body := stripFSWrapper([]byte(rawBody))
return &http.Response{
StatusCode: statusCode,
Header: hdr,
Body: io.NopCloser(bytes.NewReader(body)),
Request: req,
}, nil
}
// HTTPClient returns the underlying *http.Client (for passing to graphql etc.).
func (c *Client) HTTPClient() *http.Client { return c.http }
// Cookie returns the value of a named cookie stored in the jar for the given
// host (e.g. "mangahub.io"). Returns empty string when the cookie is not found.
func (c *Client) Cookie(name, host string) string {
u := &url.URL{Scheme: "https", Host: host}
for _, ck := range c.http.Jar.Cookies(u) {
if ck.Name == name {
return ck.Value
}
}
return ""
}
// Get is a convenience wrapper around Do. To add custom headers, build the
// request manually and call Do.
func (c *Client) Get(ctx context.Context, urlStr string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
if err != nil {
return nil, err
}
return c.Do(req)
}
// Post is a convenience wrapper around Do.
func (c *Client) Post(ctx context.Context, urlStr string, bodyType string, body io.Reader) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, urlStr, body)
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", bodyType)
return c.Do(req)
}
// isCloudflareChallenge detects whether the response body is a Cloudflare
// challenge page (i.e. FS failed to solve it and Chrome rendered the challenge).
func isCloudflareChallenge(body []byte) bool {
indicators := []string{
"Just a moment...",
"cf_chl_opt",
"challenges.cloudflare.com",
"/cdn-cgi/challenge-platform",
"Enable JavaScript and cookies",
}
for _, ind := range indicators {
if bytes.Contains(body, []byte(ind)) {
return true
}
}
return false
}
// stripFSWrapper removes FlareSolverr's Chrome HTML wrapper.
// FlareSolverr wraps all responses in:
//
// <html><head>...<meta charset...>...</head><body><pre>actual_body</pre></body></html>
//
// If a <pre> tag is found inside the wrapper, its content is returned.
// Otherwise the body is returned unchanged (HTML pages rendered by Chrome).
func stripFSWrapper(body []byte) []byte {
if !bytes.HasPrefix(bytes.TrimSpace(body), []byte("<html")) {
return body
}
preStart := bytes.Index(body, []byte("<pre>"))
if preStart < 0 {
return body
}
preEnd := bytes.LastIndex(body, []byte("</pre>"))
if preEnd <= preStart {
return body
}
return body[preStart+5 : preEnd]
}
func retryAfter(resp *http.Response) time.Duration {
ra := resp.Header.Get("Retry-After")
if ra == "" {
return 5 * time.Second
}
if secs, err := strconv.ParseFloat(ra, 64); err == nil {
return time.Duration(secs * float64(time.Second))
}
if t, err := http.ParseTime(ra); err == nil {
d := time.Until(t)
if d > 0 {
return d
}
}
return 5 * time.Second
}