feat: initial Phase 1 implementation — core framework + Docker

- Data types (SManga, SChapter, Page, MangasPage, all Filter variants)
- Source interfaces (Source, CatalogueSource) with MD5-based ID generation matching Tachiyomi/Suwayomi
- HTTP client with per-host rate limiting, cookie jar, and 429 retry
- FlareSolverr v1 client (FLARESOLVERR_URL env)
- Generic GraphQL POST helper
- goquery HTML parser wrappers
- Source registry (panics on duplicate ID)
- Multi-stage Dockerfile (golang:1.26-alpine + distroless) and compose.yml (postgres, flaresolverr, app)
This commit is contained in:
achmad
2026-05-10 21:23:24 +07:00
commit 85d2ea6143
23 changed files with 2864 additions and 0 deletions
+147
View File
@@ -0,0 +1,147 @@
package httpclient
import (
"context"
"io"
"net/http"
"net/http/cookiejar"
"strconv"
"sync"
"time"
"golang.org/x/time/rate"
)
const defaultUserAgent = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
type Client struct {
http *http.Client
rateLimit float64
burst int
referer string
mu sync.Mutex
limiters map[string]*rate.Limiter
}
type Option func(*Client)
func WithRateLimit(rps float64, burst int) Option {
return func(c *Client) {
c.rateLimit = rps
c.burst = burst
}
}
func WithTimeout(d time.Duration) Option {
return func(c *Client) { c.http.Timeout = d }
}
func WithReferer(referer string) Option {
return func(c *Client) { c.referer = referer }
}
func NewClient(opts ...Option) *Client {
jar, _ := cookiejar.New(nil)
c := &Client{
http: &http.Client{
Timeout: 30 * time.Second,
Jar: jar,
},
rateLimit: 1,
burst: 1,
limiters: map[string]*rate.Limiter{},
}
for _, o := range opts {
o(c)
}
return c
}
func (c *Client) limiter(host string) *rate.Limiter {
c.mu.Lock()
defer c.mu.Unlock()
l, ok := c.limiters[host]
if !ok {
l = rate.NewLimiter(rate.Limit(c.rateLimit), c.burst)
c.limiters[host] = l
}
return l
}
func (c *Client) Do(req *http.Request) (*http.Response, error) {
if err := c.limiter(req.URL.Host).Wait(req.Context()); err != nil {
return nil, err
}
if c.referer != "" && req.Header.Get("Referer") == "" {
req.Header.Set("Referer", c.referer)
}
if req.Header.Get("User-Agent") == "" {
req.Header.Set("User-Agent", defaultUserAgent)
}
const maxRetries = 3
for attempt := 0; attempt <= maxRetries; attempt++ {
resp, err := c.http.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusTooManyRequests {
return resp, nil
}
resp.Body.Close()
if attempt == maxRetries {
return resp, nil
}
sleep := retryAfter(resp)
select {
case <-req.Context().Done():
return nil, req.Context().Err()
case <-time.After(sleep):
}
}
panic("unreachable")
}
func retryAfter(resp *http.Response) time.Duration {
ra := resp.Header.Get("Retry-After")
if ra == "" {
return 5 * time.Second
}
if secs, err := strconv.ParseFloat(ra, 64); err == nil {
return time.Duration(secs * float64(time.Second))
}
if t, err := http.ParseTime(ra); err == nil {
d := time.Until(t)
if d > 0 {
return d
}
}
return 5 * time.Second
}
func (c *Client) Get(ctx context.Context, url string, headers map[string]string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
for k, v := range headers {
req.Header.Set(k, v)
}
return c.Do(req)
}
func (c *Client) Post(ctx context.Context, url string, body io.Reader, contentType string, headers map[string]string) (*http.Response, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, body)
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", contentType)
for k, v := range headers {
req.Header.Set(k, v)
}
return c.Do(req)
}
// HTTPClient returns the underlying *http.Client (for passing to graphql helper etc.)
func (c *Client) HTTPClient() *http.Client { return c.http }
+95
View File
@@ -0,0 +1,95 @@
package httpclient
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"os"
)
type FlareSolverrClient struct {
endpoint string
client *http.Client
}
func NewFlareSolverrClient() (*FlareSolverrClient, error) {
ep := os.Getenv("FLARESOLVERR_URL")
if ep == "" {
return nil, fmt.Errorf("FLARESOLVERR_URL not set")
}
return &FlareSolverrClient{
endpoint: ep,
client: &http.Client{},
}, nil
}
type flareSolverrRequest struct {
Cmd string `json:"cmd"`
URL string `json:"url"`
MaxTimeout int `json:"maxTimeout"`
}
type FlareSolverrResponse struct {
Status string `json:"status"`
Solution struct {
Response string `json:"response"`
Cookies []fsCookie `json:"cookies"`
Headers map[string]any `json:"headers"`
URL string `json:"url"`
Status int `json:"status"`
} `json:"solution"`
}
type fsCookie struct {
Name string `json:"name"`
Value string `json:"value"`
Domain string `json:"domain"`
Path string `json:"path"`
Expires float64 `json:"expires"`
HTTPOnly bool `json:"httpOnly"`
Secure bool `json:"secure"`
}
// Get fetches a Cloudflare-protected URL via FlareSolverr.
// Returns rendered HTML and extracted cookies.
func (f *FlareSolverrClient) Get(ctx context.Context, url string) (html string, cookies []*http.Cookie, err error) {
payload, _ := json.Marshal(flareSolverrRequest{
Cmd: "request.get",
URL: url,
MaxTimeout: 60000,
})
req, err := http.NewRequestWithContext(ctx, http.MethodPost, f.endpoint+"/v1", bytes.NewReader(payload))
if err != nil {
return "", nil, err
}
req.Header.Set("Content-Type", "application/json")
resp, err := f.client.Do(req)
if err != nil {
return "", nil, err
}
defer resp.Body.Close()
var fsResp FlareSolverrResponse
if err := json.NewDecoder(resp.Body).Decode(&fsResp); err != nil {
return "", nil, err
}
if fsResp.Status != "ok" {
return "", nil, fmt.Errorf("flaresolverr: status %q", fsResp.Status)
}
for _, c := range fsResp.Solution.Cookies {
cookies = append(cookies, &http.Cookie{
Name: c.Name,
Value: c.Value,
Domain: c.Domain,
Path: c.Path,
HttpOnly: c.HTTPOnly,
Secure: c.Secure,
})
}
return fsResp.Solution.Response, cookies, nil
}
+55
View File
@@ -0,0 +1,55 @@
package httpclient
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
)
type GraphQLRequest struct {
Query string `json:"query"`
Variables any `json:"variables,omitempty"`
}
type graphQLResponse[T any] struct {
Data T `json:"data"`
Errors []struct {
Message string `json:"message"`
} `json:"errors"`
}
// Post sends a GraphQL request and unmarshals the `data` field into T.
func Post[T any](ctx context.Context, client *http.Client, url string, req GraphQLRequest, headers map[string]string) (T, error) {
var zero T
body, err := json.Marshal(req)
if err != nil {
return zero, err
}
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return zero, err
}
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Accept", "application/json")
for k, v := range headers {
httpReq.Header.Set(k, v)
}
resp, err := client.Do(httpReq)
if err != nil {
return zero, err
}
defer resp.Body.Close()
var gqlResp graphQLResponse[T]
if err := json.NewDecoder(resp.Body).Decode(&gqlResp); err != nil {
return zero, err
}
if len(gqlResp.Errors) > 0 {
return zero, fmt.Errorf("graphql: %s", gqlResp.Errors[0].Message)
}
return gqlResp.Data, nil
}
+42
View File
@@ -0,0 +1,42 @@
package httpclient
const (
androidUA = "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Mobile Safari/537.36"
desktopUA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
)
func AndroidUA() string { return androidUA }
func DesktopUA() string { return desktopUA }
func JSONHeaders() map[string]string {
return map[string]string{
"Content-Type": "application/json",
"Accept": "application/json",
}
}
func FormHeaders() map[string]string {
return map[string]string{
"Content-Type": "application/x-www-form-urlencoded",
}
}
func WithRefererHeader(headers map[string]string, referer string) map[string]string {
out := clone(headers)
out["Referer"] = referer
return out
}
func WithOrigin(headers map[string]string, origin string) map[string]string {
out := clone(headers)
out["Origin"] = origin
return out
}
func clone(m map[string]string) map[string]string {
out := make(map[string]string, len(m)+1)
for k, v := range m {
out[k] = v
}
return out
}
+67
View File
@@ -0,0 +1,67 @@
package parser
import (
"net/http"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
)
func Parse(html string) (*goquery.Document, error) {
return goquery.NewDocumentFromReader(strings.NewReader(html))
}
func ParseResponse(resp *http.Response) (*goquery.Document, error) {
defer resp.Body.Close()
return goquery.NewDocumentFromReader(resp.Body)
}
func Select(doc *goquery.Document, css string) *goquery.Selection {
return doc.Find(css)
}
func SelectFrom(sel *goquery.Selection, css string) *goquery.Selection {
return sel.Find(css)
}
func Attr(sel *goquery.Selection, name string) string {
val, _ := sel.Attr(name)
return val
}
// AbsURL resolves a relative URL attribute against baseURL.
func AbsURL(sel *goquery.Selection, attr string, baseURL string) string {
val := Attr(sel, attr)
if val == "" {
return ""
}
base, err := url.Parse(baseURL)
if err != nil {
return val
}
ref, err := url.Parse(val)
if err != nil {
return val
}
return base.ResolveReference(ref).String()
}
// OwnText returns the text content of the element excluding child elements.
func OwnText(sel *goquery.Selection) string {
clone := sel.Clone()
clone.Children().Remove()
return strings.TrimSpace(clone.Text())
}
func TextTrim(sel *goquery.Selection) string {
return strings.TrimSpace(sel.Text())
}
func First(sel *goquery.Selection) *goquery.Selection {
return sel.First()
}
func Each(sel *goquery.Selection, fn func(i int, s *goquery.Selection)) {
sel.Each(fn)
}
+43
View File
@@ -0,0 +1,43 @@
package registry
import (
"fmt"
"sort"
"sync"
"goyomi/internal/source"
)
var (
mu sync.RWMutex
sources = map[int64]source.CatalogueSource{}
)
// Register adds a source. Panics on duplicate ID — caught at startup.
func Register(s source.CatalogueSource) {
mu.Lock()
defer mu.Unlock()
if _, exists := sources[s.ID()]; exists {
panic(fmt.Sprintf("registry: duplicate source ID %d (%s/%s)", s.ID(), s.Name(), s.Lang()))
}
sources[s.ID()] = s
}
func Get(id int64) (source.CatalogueSource, bool) {
mu.RLock()
defer mu.RUnlock()
s, ok := sources[id]
return s, ok
}
// All returns all registered sources sorted by ID.
func All() []source.CatalogueSource {
mu.RLock()
defer mu.RUnlock()
out := make([]source.CatalogueSource, 0, len(sources))
for _, s := range sources {
out = append(out, s)
}
sort.Slice(out, func(i, j int) bool { return out[i].ID() < out[j].ID() })
return out
}
+39
View File
@@ -0,0 +1,39 @@
package registry_test
import (
"testing"
"goyomi/internal/registry"
"goyomi/internal/source"
)
type mockSource struct {
id int64
name string
lang string
}
func (m *mockSource) ID() int64 { return m.id }
func (m *mockSource) Name() string { return m.name }
func (m *mockSource) Lang() string { return m.lang }
func (m *mockSource) SupportsLatest() bool { return false }
func (m *mockSource) GetPopularManga(page int) (source.MangasPage, error) { return source.MangasPage{}, nil }
func (m *mockSource) GetLatestUpdates(page int) (source.MangasPage, error) { return source.MangasPage{}, nil }
func (m *mockSource) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
return source.MangasPage{}, nil
}
func (m *mockSource) GetMangaDetails(manga source.SManga) (source.SManga, error) { return manga, nil }
func (m *mockSource) GetChapterList(manga source.SManga) ([]source.SChapter, error) { return nil, nil }
func (m *mockSource) GetPageList(chapter source.SChapter) ([]source.Page, error) { return nil, nil }
func (m *mockSource) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
func (m *mockSource) GetFilterList() []source.Filter { return nil }
func TestDuplicateIDPanics(t *testing.T) {
defer func() {
if r := recover(); r == nil {
t.Error("expected panic on duplicate source ID, got none")
}
}()
registry.Register(&mockSource{id: 9999, name: "A", lang: "en"})
registry.Register(&mockSource{id: 9999, name: "B", lang: "en"})
}
+59
View File
@@ -0,0 +1,59 @@
package source
import (
"crypto/md5"
"strings"
)
// Source is the base interface for all sources.
type Source interface {
ID() int64
Name() string
Lang() string
}
// CatalogueSource is the full interface every source must implement.
type CatalogueSource interface {
Source
SupportsLatest() bool
GetPopularManga(page int) (MangasPage, error)
GetLatestUpdates(page int) (MangasPage, error)
GetSearchManga(page int, query string, filters []Filter) (MangasPage, error)
GetMangaDetails(manga SManga) (SManga, error)
GetChapterList(manga SManga) ([]SChapter, error)
GetPageList(chapter SChapter) ([]Page, error)
// GetImageURL resolves the final image URL for a page.
// Sources that embed image URLs directly in pages return page.ImageURL unchanged.
GetImageURL(page Page) (string, error)
GetFilterList() []Filter
}
// GenerateSourceID replicates Tachiyomi/Suwayomi HttpSource.generateId:
//
// key = "${name.lowercase()}/$lang/$versionId"
// MD5(key) → first 8 bytes as big-endian int64, sign bit cleared (& Long.MAX_VALUE)
func GenerateSourceID(name, lang string) int64 {
return GenerateSourceIDv(name, lang, 1)
}
func GenerateSourceIDv(name, lang string, versionID int) int64 {
key := strings.ToLower(name) + "/" + lang + "/" + itoa(versionID)
b := md5.Sum([]byte(key))
var id int64
for i := 0; i < 8; i++ {
id |= int64(b[i]) << (8 * (7 - i))
}
return id & int64(^uint64(0)>>1) // clear sign bit (& Long.MAX_VALUE)
}
func itoa(n int) string {
if n == 0 {
return "0"
}
digits := []byte{}
for n > 0 {
digits = append([]byte{byte('0' + n%10)}, digits...)
n /= 10
}
return string(digits)
}
+28
View File
@@ -0,0 +1,28 @@
package source_test
import (
"testing"
"goyomi/internal/source"
)
func TestGenerateSourceID(t *testing.T) {
// IDs computed from the same MD5 formula as Tachiyomi/Suwayomi HttpSource.generateId:
// key = "${name.lowercase()}/$lang/1", MD5 → first 8 bytes big-endian, sign bit cleared.
cases := []struct {
name string
lang string
want int64
}{
{"MangaDex", "en", 2499283573021220255},
{"MangaDex", "all", 6404943692147160087},
{"HeanCms", "en", 6473152836656709188},
}
for _, tc := range cases {
got := source.GenerateSourceID(tc.name, tc.lang)
if got != tc.want {
t.Errorf("GenerateSourceID(%q, %q) = %d, want %d", tc.name, tc.lang, got, tc.want)
}
}
}
+115
View File
@@ -0,0 +1,115 @@
package source
const (
StatusUnknown = 0
StatusOngoing = 1
StatusCompleted = 2
StatusLicensed = 3
StatusHiatus = 5
StatusCancelled = 6
)
type SManga struct {
URL string
Title string
Artist string
Author string
Description string
Genre string // comma-separated
Status int
ThumbnailURL string
Initialized bool
}
type SChapter struct {
URL string
Name string
DateUpload int64 // unix milliseconds
ChapterNumber float32
Scanlator string
}
type Page struct {
Index int
URL string
ImageURL string
}
type MangasPage struct {
Mangas []SManga
HasNextPage bool
}
// Filter interface
type Filter interface {
Name() string
Value() any
}
// TextFilter — free-text input
type TextFilter struct {
FilterName string
Text string
}
func (f *TextFilter) Name() string { return f.FilterName }
func (f *TextFilter) Value() any { return f.Text }
// CheckboxFilter — boolean
type CheckboxFilter struct {
FilterName string
State bool
}
func (f *CheckboxFilter) Name() string { return f.FilterName }
func (f *CheckboxFilter) Value() any { return f.State }
// TriStateFilter — 0=ignore, 1=include, 2=exclude
type TriStateFilter struct {
FilterName string
State int
}
func (f *TriStateFilter) Name() string { return f.FilterName }
func (f *TriStateFilter) Value() any { return f.State }
// SelectFilter — dropdown
type SelectFilter struct {
FilterName string
Values []string
Selected int
}
func (f *SelectFilter) Name() string { return f.FilterName }
func (f *SelectFilter) Value() any { return f.Selected }
// SortFilter
type SortSelection struct {
Index int
Ascending bool
}
type SortFilter struct {
FilterName string
Values []string
Selection SortSelection
}
func (f *SortFilter) Name() string { return f.FilterName }
func (f *SortFilter) Value() any { return f.Selection }
// GroupFilter — container of sub-filters
type GroupFilter struct {
FilterName string
Filters []Filter
}
func (f *GroupFilter) Name() string { return f.FilterName }
func (f *GroupFilter) Value() any { return f.Filters }