44b50937d5
When -v is passed, test-sources.sh passes it through to go test -v. sourcetest.Run uses testing.Verbose() to print the full manga list from GetPopularManga and GetLatestUpdates, showing title + URL.
354 lines
9.3 KiB
Go
Executable File
354 lines
9.3 KiB
Go
Executable File
package httpclient
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"net/http/cookiejar"
|
|
"net/url"
|
|
"strconv"
|
|
"sync"
|
|
"time"
|
|
|
|
"golang.org/x/time/rate"
|
|
)
|
|
|
|
const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
|
|
|
|
var (
|
|
verboseLog bool
|
|
defaultOnce sync.Once
|
|
defaultClient *Client
|
|
)
|
|
|
|
func SetVerboseLog(enabled bool) { verboseLog = enabled }
|
|
|
|
// DefaultClient returns the shared singleton HTTP client.
|
|
// FlareSolverr is auto-configured if the FLARESOLVERR_URL env var is set.
|
|
// All sources share the same rate limiter (+ cookie jar) through this client.
|
|
func DefaultClient() *Client {
|
|
defaultOnce.Do(func() {
|
|
defaultClient = newClient()
|
|
})
|
|
return defaultClient
|
|
}
|
|
|
|
// NewClient creates a standalone client with optional per-source overrides.
|
|
// Only create a new client when the source needs different behaviour
|
|
// (e.g. a custom rate limit); otherwise use DefaultClient.
|
|
func NewClient(opts ...Option) *Client {
|
|
c := newClient()
|
|
for _, o := range opts {
|
|
o(c)
|
|
}
|
|
return c
|
|
}
|
|
|
|
func newClient() *Client {
|
|
jar, _ := cookiejar.New(nil)
|
|
c := &Client{
|
|
http: &http.Client{Timeout: 30 * time.Second, Jar: jar},
|
|
rateLimit: 1,
|
|
burst: 1,
|
|
userAgent: defaultUserAgent,
|
|
limiters: map[string]*rate.Limiter{},
|
|
verboseLog: verboseLog,
|
|
}
|
|
fsClient, err := NewFlareSolverrClient()
|
|
if err == nil {
|
|
c.fsClient = fsClient
|
|
}
|
|
return c
|
|
}
|
|
|
|
type Client struct {
|
|
http *http.Client
|
|
fsClient *FlareSolverrClient
|
|
rateLimit float64
|
|
burst int
|
|
userAgent string
|
|
verboseLog bool
|
|
|
|
mu sync.Mutex
|
|
limiters map[string]*rate.Limiter
|
|
}
|
|
|
|
type Option func(*Client)
|
|
|
|
func WithRateLimit(rps float64, burst int) Option {
|
|
return func(c *Client) {
|
|
c.rateLimit = rps
|
|
c.burst = burst
|
|
}
|
|
}
|
|
|
|
func WithTimeout(d time.Duration) Option {
|
|
return func(c *Client) { c.http.Timeout = d }
|
|
}
|
|
|
|
func WithUserAgent(ua string) Option {
|
|
return func(c *Client) { c.userAgent = ua }
|
|
}
|
|
|
|
func WithVerboseLog(enabled bool) Option {
|
|
return func(c *Client) { c.verboseLog = enabled }
|
|
}
|
|
|
|
func (c *Client) limiter(host string) *rate.Limiter {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
l, ok := c.limiters[host]
|
|
if !ok {
|
|
l = rate.NewLimiter(rate.Limit(c.rateLimit), c.burst)
|
|
c.limiters[host] = l
|
|
}
|
|
return l
|
|
}
|
|
|
|
// Do tries a direct HTTP request first. If the server returns 403/503 (a
|
|
// Cloudflare or DDoS challenge) and FlareSolverr is available, it falls back
|
|
// to FlareSolverr raw mode to solve the challenge and return the actual body.
|
|
//
|
|
// When FlareSolverr is used, the Chrome HTML wrapper is stripped from the
|
|
// response so that both JSON and HTML callers receive the real server output.
|
|
func (c *Client) Do(req *http.Request) (*http.Response, error) {
|
|
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
|
|
return nil, err
|
|
}
|
|
if req.Header.Get("User-Agent") == "" {
|
|
req.Header.Set("User-Agent", c.userAgent)
|
|
}
|
|
|
|
// Always route through FlareSolverr when configured. Go's TLS fingerprint
|
|
// doesn't match Chrome's, so Cloudflare clearance cookies from FS are
|
|
// rejected by Go's net/http — meaning every direct request gets challenged
|
|
// again. FS Chrome caches the clearance internally, so subsequent calls
|
|
// for the same domain are near-instant.
|
|
//
|
|
// When FS is not configured, fall back to direct HTTP.
|
|
if c.fsClient != nil {
|
|
return c.doFS(req, 0)
|
|
}
|
|
|
|
// --- direct-first path (commented out — see TLS fingerprint issue above) ---
|
|
// resp, err := c.doDirect(req)
|
|
// var directStatus int
|
|
// if err == nil {
|
|
// directStatus = resp.StatusCode
|
|
// if resp.StatusCode != http.StatusForbidden && resp.StatusCode != http.StatusServiceUnavailable {
|
|
// return resp, nil
|
|
// }
|
|
// resp.Body.Close()
|
|
// }
|
|
// if c.fsClient == nil {
|
|
// if err != nil {
|
|
// return nil, err
|
|
// }
|
|
// return nil, fmt.Errorf("HTTP %d (challenge detected but FlareSolverr not configured)", resp.StatusCode)
|
|
// }
|
|
// return c.doFS(req, directStatus)
|
|
|
|
return c.doDirect(req)
|
|
}
|
|
|
|
func (c *Client) doDirect(req *http.Request) (*http.Response, error) {
|
|
if c.verboseLog {
|
|
log.Printf("[httpclient] DIRECT %s %s", req.Method, req.URL.String())
|
|
}
|
|
|
|
const maxRetries = 3
|
|
for attempt := 0; attempt <= maxRetries; attempt++ {
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if c.verboseLog {
|
|
log.Printf("[httpclient] DIRECT RESPONSE %s status=%d", req.URL.String(), resp.StatusCode)
|
|
}
|
|
if resp.StatusCode != http.StatusTooManyRequests {
|
|
return resp, nil
|
|
}
|
|
resp.Body.Close()
|
|
if attempt == maxRetries {
|
|
return resp, nil
|
|
}
|
|
sleep := retryAfter(resp)
|
|
select {
|
|
case <-req.Context().Done():
|
|
return nil, req.Context().Err()
|
|
case <-time.After(sleep):
|
|
}
|
|
}
|
|
panic("unreachable")
|
|
}
|
|
|
|
func (c *Client) doFS(req *http.Request, directStatus int) (*http.Response, error) {
|
|
if c.verboseLog {
|
|
log.Printf("[httpclient] FS FALLBACK %s %s", req.Method, req.URL.String())
|
|
}
|
|
|
|
rawURL := req.URL.String()
|
|
rawBody, statusCode, fsHeaders, cookies, fsRespURL, err := c.fsClient.GetRaw(req.Context(), rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Use the actual response URL from FlareSolverr (follows redirects
|
|
// through Chrome) so cookies are associated with the right domain.
|
|
respURL := rawURL
|
|
if fsRespURL != "" {
|
|
respURL = fsRespURL
|
|
}
|
|
|
|
// Feed FlareSolverr cookies into the shared jar so subsequent direct
|
|
// requests to the same host skip the challenge.
|
|
if len(cookies) > 0 {
|
|
if u, uErr := url.Parse(respURL); uErr == nil {
|
|
c.http.Jar.SetCookies(u, cookies)
|
|
}
|
|
}
|
|
|
|
// When FlareSolverr returns status 200, Chrome rendered the page.
|
|
// Check if the body actually contains Cloudflare challenge indicators
|
|
// rather than relying on structural heuristics (<pre> wrapper).
|
|
if statusCode == 200 {
|
|
if isCloudflareChallenge([]byte(rawBody)) {
|
|
statusCode = directStatus
|
|
}
|
|
}
|
|
|
|
// Build response headers from the actual FS response headers,
|
|
// falling back to the request headers for keys not present in the
|
|
// FS response (e.g. Content-Type on an empty GET body).
|
|
hdr := make(http.Header)
|
|
if len(fsHeaders) > 0 {
|
|
for k, v := range fsHeaders {
|
|
switch val := v.(type) {
|
|
case string:
|
|
hdr.Set(k, val)
|
|
case []any:
|
|
for _, sv := range val {
|
|
hdr.Add(k, fmt.Sprint(sv))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Ensure Set-Cookie headers from FS cookies are present even if FS
|
|
// omitted them from the headers map.
|
|
if len(cookies) > 0 {
|
|
for _, ck := range cookies {
|
|
hdr.Add("Set-Cookie", ck.String())
|
|
}
|
|
}
|
|
// Copy any request headers not present in the FS response (e.g. Host).
|
|
for k, v := range req.Header {
|
|
if hdr.Get(k) == "" {
|
|
hdr[k] = v
|
|
}
|
|
}
|
|
|
|
body := stripFSWrapper([]byte(rawBody))
|
|
|
|
return &http.Response{
|
|
StatusCode: statusCode,
|
|
Header: hdr,
|
|
Body: io.NopCloser(bytes.NewReader(body)),
|
|
Request: req,
|
|
}, nil
|
|
}
|
|
|
|
// HTTPClient returns the underlying *http.Client (for passing to graphql etc.).
|
|
func (c *Client) HTTPClient() *http.Client { return c.http }
|
|
|
|
// Cookie returns the value of a named cookie stored in the jar for the given
|
|
// host (e.g. "mangahub.io"). Returns empty string when the cookie is not found.
|
|
func (c *Client) Cookie(name, host string) string {
|
|
u := &url.URL{Scheme: "https", Host: host}
|
|
for _, ck := range c.http.Jar.Cookies(u) {
|
|
if ck.Name == name {
|
|
return ck.Value
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// Get is a convenience wrapper around Do. To add custom headers, build the
|
|
// request manually and call Do.
|
|
func (c *Client) Get(ctx context.Context, urlStr string) (*http.Response, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return c.Do(req)
|
|
}
|
|
|
|
// Post is a convenience wrapper around Do.
|
|
func (c *Client) Post(ctx context.Context, urlStr string, bodyType string, body io.Reader) (*http.Response, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, urlStr, body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Content-Type", bodyType)
|
|
return c.Do(req)
|
|
}
|
|
|
|
// isCloudflareChallenge detects whether the response body is a Cloudflare
|
|
// challenge page (i.e. FS failed to solve it and Chrome rendered the challenge).
|
|
func isCloudflareChallenge(body []byte) bool {
|
|
indicators := []string{
|
|
"Just a moment...",
|
|
"cf_chl_opt",
|
|
"challenges.cloudflare.com",
|
|
"/cdn-cgi/challenge-platform",
|
|
"Enable JavaScript and cookies",
|
|
}
|
|
for _, ind := range indicators {
|
|
if bytes.Contains(body, []byte(ind)) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// stripFSWrapper removes FlareSolverr's Chrome HTML wrapper.
|
|
// FlareSolverr wraps all responses in:
|
|
//
|
|
// <html><head>...<meta charset...>...</head><body><pre>actual_body</pre></body></html>
|
|
//
|
|
// If a <pre> tag is found inside the wrapper, its content is returned.
|
|
// Otherwise the body is returned unchanged (HTML pages rendered by Chrome).
|
|
func stripFSWrapper(body []byte) []byte {
|
|
if !bytes.HasPrefix(bytes.TrimSpace(body), []byte("<html")) {
|
|
return body
|
|
}
|
|
preStart := bytes.Index(body, []byte("<pre>"))
|
|
if preStart < 0 {
|
|
return body
|
|
}
|
|
preEnd := bytes.LastIndex(body, []byte("</pre>"))
|
|
if preEnd <= preStart {
|
|
return body
|
|
}
|
|
return body[preStart+5 : preEnd]
|
|
}
|
|
|
|
func retryAfter(resp *http.Response) time.Duration {
|
|
ra := resp.Header.Get("Retry-After")
|
|
if ra == "" {
|
|
return 5 * time.Second
|
|
}
|
|
if secs, err := strconv.ParseFloat(ra, 64); err == nil {
|
|
return time.Duration(secs * float64(time.Second))
|
|
}
|
|
if t, err := http.ParseTime(ra); err == nil {
|
|
d := time.Until(t)
|
|
if d > 0 {
|
|
return d
|
|
}
|
|
}
|
|
return 5 * time.Second
|
|
}
|