523 lines
16 KiB
Go
Executable File
523 lines
16 KiB
Go
Executable File
// Package initmanga implements the InitManga manga base.
|
||
// HTML scraping (UIkit-based); pages use AES-256-CBC with PBKDF2-SHA512 key derivation.
|
||
package initmanga
|
||
|
||
import (
|
||
"context"
|
||
"crypto/aes"
|
||
"crypto/cipher"
|
||
"crypto/sha512"
|
||
"encoding/base64"
|
||
"encoding/hex"
|
||
"encoding/json"
|
||
"fmt"
|
||
"net/http"
|
||
"regexp"
|
||
"strings"
|
||
|
||
"github.com/PuerkitoBio/goquery"
|
||
"golang.org/x/crypto/pbkdf2"
|
||
|
||
"goyomi/internal/httpclient"
|
||
"goyomi/internal/source"
|
||
"goyomi/sources/base/util"
|
||
)
|
||
|
||
type Config struct {
|
||
Name string
|
||
BaseURL string
|
||
Lang string
|
||
PopularUrlSlug string // e.g. "manga" or "seri"
|
||
LatestUrlSlug string // e.g. "manga"
|
||
}
|
||
|
||
type Source struct {
|
||
cfg Config
|
||
client *httpclient.Client
|
||
id int64
|
||
}
|
||
|
||
func New(cfg Config) *Source {
|
||
if cfg.PopularUrlSlug == "" {
|
||
cfg.PopularUrlSlug = "seri"
|
||
}
|
||
if cfg.LatestUrlSlug == "" {
|
||
cfg.LatestUrlSlug = cfg.PopularUrlSlug
|
||
}
|
||
c := httpclient.NewClient(httpclient.WithRateLimit(1, 2))
|
||
return &Source{cfg: cfg, client: c, id: source.GenerateSourceID(cfg.Name, cfg.Lang)}
|
||
}
|
||
|
||
func (s *Source) ID() int64 { return s.id }
|
||
func (s *Source) Name() string { return s.cfg.Name }
|
||
func (s *Source) Lang() string { return s.cfg.Lang }
|
||
func (s *Source) SupportsLatest() bool { return true }
|
||
|
||
func (s *Source) base() string { return strings.TrimRight(s.cfg.BaseURL, "/") }
|
||
|
||
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
||
resp, err := s.client.Do(req)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
defer resp.Body.Close()
|
||
if resp.StatusCode != http.StatusOK {
|
||
return nil, fmt.Errorf("initmanga: HTTP %d", resp.StatusCode)
|
||
}
|
||
return goquery.NewDocumentFromReader(resp.Body)
|
||
}
|
||
|
||
const popularMangaSelector = "div.manga-item-grid > div.uk-panel.uk-position-relative, " +
|
||
"div.manga-item-grid > div.uk-panel:not(.manga-item-ranking):not(.user-item-info), " +
|
||
"div.uk-panel.uk-position-relative, " +
|
||
"div.uk-panel:not(.manga-item-ranking):not(.user-item-info)"
|
||
|
||
const nextPageSelector = "head link[rel=next], link[rel=next], " +
|
||
"ul.uk-pagination li:not(.uk-disabled) a[aria-label=\"Sonraki sayfa\"], " +
|
||
"a:contains(Sonraki sayfa), a:contains(Next page), a.next"
|
||
|
||
func mangaFromElement(el *goquery.Selection, baseURL string) source.SManga {
|
||
m := source.SManga{}
|
||
link := el.Find("h3 a, div.uk-overflow-hidden a").First()
|
||
if link.Length() == 0 {
|
||
link = el.Find("a").First()
|
||
}
|
||
m.URL, _ = link.Attr("href")
|
||
m.Title = strings.TrimSpace(el.Find("h3").Text())
|
||
if m.Title == "" {
|
||
clone := el.Find("a").Clone()
|
||
clone.Find("span, small").Remove()
|
||
m.Title = strings.TrimSpace(clone.Text())
|
||
}
|
||
imgEl := el.Find("img")
|
||
if src := imgEl.AttrOr("data-src", ""); src != "" {
|
||
m.ThumbnailURL = util.AbsURL(baseURL, src)
|
||
} else if src := imgEl.AttrOr("src", ""); src != "" {
|
||
m.ThumbnailURL = util.AbsURL(baseURL, src)
|
||
}
|
||
return m
|
||
}
|
||
|
||
func (s *Source) parsePage(doc *goquery.Document) source.MangasPage {
|
||
var mangas []source.SManga
|
||
doc.Find(popularMangaSelector).Each(func(_ int, el *goquery.Selection) {
|
||
m := mangaFromElement(el, s.cfg.BaseURL)
|
||
if m.URL != "" && m.Title != "" {
|
||
mangas = append(mangas, m)
|
||
}
|
||
})
|
||
hasNext := doc.Find(nextPageSelector).Length() > 0
|
||
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
|
||
}
|
||
|
||
func (s *Source) fetchListing(slug string, page int) (source.MangasPage, error) {
|
||
var u string
|
||
if page == 1 {
|
||
u = fmt.Sprintf("%s/%s/", s.base(), slug)
|
||
} else {
|
||
u = fmt.Sprintf("%s/%s/page/%d/", s.base(), slug, page)
|
||
}
|
||
doc, err := s.get(context.Background(), u)
|
||
if err != nil {
|
||
return source.MangasPage{}, err
|
||
}
|
||
return s.parsePage(doc), nil
|
||
}
|
||
|
||
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
||
return s.fetchListing(s.cfg.PopularUrlSlug, page)
|
||
}
|
||
|
||
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
||
return s.fetchListing(s.cfg.LatestUrlSlug, page)
|
||
}
|
||
|
||
type searchDTO struct {
|
||
Title *string `json:"title"`
|
||
URL *string `json:"url"`
|
||
Thumb *string `json:"thumb"`
|
||
}
|
||
|
||
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
||
u := fmt.Sprintf("%s/wp-json/initlise/v1/search?term=%s&page=%d", s.base(), query, page)
|
||
req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, u, nil)
|
||
if err != nil {
|
||
return source.MangasPage{}, err
|
||
}
|
||
req.Header.Set("Referer", s.cfg.BaseURL+"/")
|
||
resp, err := s.client.Do(req)
|
||
if err != nil {
|
||
return source.MangasPage{}, err
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
body := make([]byte, 0, 4096)
|
||
buf := make([]byte, 4096)
|
||
for {
|
||
n, readErr := resp.Body.Read(buf)
|
||
body = append(body, buf[:n]...)
|
||
if readErr != nil {
|
||
break
|
||
}
|
||
}
|
||
|
||
bodyStr := strings.TrimSpace(string(body))
|
||
if strings.HasPrefix(bodyStr, "<") {
|
||
// HTML response — parse as list page
|
||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(bodyStr))
|
||
if err != nil {
|
||
return source.MangasPage{}, err
|
||
}
|
||
return s.parsePage(doc), nil
|
||
}
|
||
|
||
var dtos []searchDTO
|
||
if err := json.Unmarshal(body, &dtos); err != nil {
|
||
return source.MangasPage{}, err
|
||
}
|
||
var mangas []source.SManga
|
||
for _, dto := range dtos {
|
||
m := source.SManga{}
|
||
if dto.Title != nil {
|
||
m.Title = strings.TrimSpace(*dto.Title)
|
||
}
|
||
if dto.URL != nil {
|
||
m.URL = *dto.URL
|
||
}
|
||
if dto.Thumb != nil {
|
||
m.ThumbnailURL = *dto.Thumb
|
||
}
|
||
if m.URL != "" && m.Title != "" {
|
||
mangas = append(mangas, m)
|
||
}
|
||
}
|
||
return source.MangasPage{Mangas: mangas, HasNextPage: false}, nil
|
||
}
|
||
|
||
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
||
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, manga.URL))
|
||
if err != nil {
|
||
return manga, err
|
||
}
|
||
result := source.SManga{URL: manga.URL}
|
||
|
||
descClone := doc.Find("div#manga-description").Clone()
|
||
descClone.Find("a, span").Remove()
|
||
result.Description = strings.TrimSpace(descClone.Text())
|
||
if altName := strings.TrimSpace(doc.Find("span#comic-othername").Text()); altName != "" {
|
||
result.Description += "\n\nAlternatif Başlık: " + altName
|
||
}
|
||
|
||
var genres []string
|
||
doc.Find("span.uk-label-contest").Each(func(_ int, el *goquery.Selection) {
|
||
t := strings.TrimPrefix(strings.TrimSpace(el.Text()), "#")
|
||
if t != "" {
|
||
genres = append(genres, t)
|
||
}
|
||
})
|
||
if len(genres) == 0 {
|
||
doc.Find("div#genre-tags a").Each(func(_ int, el *goquery.Selection) {
|
||
if t := strings.TrimSpace(el.Text()); t != "" {
|
||
genres = append(genres, t)
|
||
}
|
||
})
|
||
}
|
||
result.Genre = strings.Join(genres, ", ")
|
||
|
||
result.Author = strings.TrimSpace(doc.Find("div.manga-info-details a").FilterFunction(func(_ int, s *goquery.Selection) bool {
|
||
parent, _ := s.Parent().Html()
|
||
return strings.Contains(parent, "Yazar")
|
||
}).Text())
|
||
if result.Author == "" {
|
||
t := doc.Find("div.manga-info-details").FilterFunction(func(_ int, el *goquery.Selection) bool {
|
||
return strings.Contains(el.Text(), "Yazar")
|
||
}).Text()
|
||
result.Author = strings.TrimSpace(strings.SplitN(strings.SplitN(t, "Yazar:", 2)[len(strings.SplitN(t, "Yazar:", 2))-1], "Çizer:", 2)[0])
|
||
}
|
||
|
||
statusText := strings.ToLower(strings.TrimSpace(
|
||
doc.Find("span#manga-status, div.manga-status-ribbons span.manga-status-ribbon__text").First().Text()))
|
||
if statusText == "" {
|
||
t := doc.Find("div.manga-info-details").FilterFunction(func(_ int, el *goquery.Selection) bool {
|
||
return strings.Contains(el.Text(), "Durum")
|
||
}).Text()
|
||
statusText = strings.ToLower(strings.TrimSpace(strings.SplitN(t, "Durum:", 2)[len(strings.SplitN(t, "Durum:", 2))-1]))
|
||
}
|
||
switch {
|
||
case strings.Contains(statusText, "güncel") || strings.Contains(statusText, "devam") || strings.Contains(statusText, "ongoing"):
|
||
result.Status = source.StatusOngoing
|
||
case strings.Contains(statusText, "tamamland") || strings.Contains(statusText, "bitti") || strings.Contains(statusText, "completed"):
|
||
result.Status = source.StatusCompleted
|
||
case strings.Contains(statusText, "ara ver") || strings.Contains(statusText, "sezon") || strings.Contains(statusText, "hiatus"):
|
||
result.Status = source.StatusHiatus
|
||
case strings.Contains(statusText, "bırakıldı") || strings.Contains(statusText, "iptal") || strings.Contains(statusText, "dropped"):
|
||
result.Status = source.StatusCancelled
|
||
default:
|
||
result.Status = source.StatusUnknown
|
||
}
|
||
|
||
for _, sel := range []string{"div.story-cover-wrap img", "div.single-thumb img", "a.story-cover img"} {
|
||
if img := doc.Find(sel).First(); img.Length() > 0 {
|
||
if src := img.AttrOr("abs:src", img.AttrOr("src", "")); src != "" {
|
||
result.ThumbnailURL = util.AbsURL(s.cfg.BaseURL, src)
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
title := strings.TrimSpace(doc.Find("h1").First().Text())
|
||
if title == "" {
|
||
title = strings.TrimSpace(doc.Find("h2.uk-h3").First().Text())
|
||
}
|
||
result.Title = title
|
||
if result.Title == "" {
|
||
result.Title = manga.Title
|
||
}
|
||
return result, nil
|
||
}
|
||
|
||
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
||
mangaURL := util.AbsURL(s.cfg.BaseURL, manga.URL)
|
||
doc, err := s.get(context.Background(), mangaURL)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
var chapters []source.SChapter
|
||
chapterFromEl := func(el *goquery.Selection) source.SChapter {
|
||
ch := source.SChapter{}
|
||
el.Find("a").First().Each(func(_ int, a *goquery.Selection) {
|
||
ch.URL, _ = a.Attr("href")
|
||
})
|
||
rawName := strings.TrimSpace(el.Find("h3").Text())
|
||
ch.Name = strings.TrimSpace(lastPart(rawName, "–", "-"))
|
||
if ch.Name == "" {
|
||
ch.Name = rawName
|
||
}
|
||
dateStr := el.Find("time").AttrOr("datetime", "")
|
||
ch.DateUpload = util.ParseAbsoluteDate(dateStr, "2006-01-02T15:04:05")
|
||
return ch
|
||
}
|
||
|
||
doc.Find("div.chapter-item").Each(func(_ int, el *goquery.Selection) {
|
||
if ch := chapterFromEl(el); ch.URL != "" {
|
||
chapters = append(chapters, ch)
|
||
}
|
||
})
|
||
|
||
// paginate
|
||
page := 2
|
||
for {
|
||
paginURL := strings.TrimRight(mangaURL, "/") + "/bolum/page/" + fmt.Sprintf("%d", page) + "/"
|
||
nextDoc, err := s.get(context.Background(), paginURL)
|
||
if err != nil {
|
||
break
|
||
}
|
||
items := nextDoc.Find("div.chapter-item")
|
||
if items.Length() == 0 {
|
||
break
|
||
}
|
||
items.Each(func(_ int, el *goquery.Selection) {
|
||
if ch := chapterFromEl(el); ch.URL != "" {
|
||
chapters = append(chapters, ch)
|
||
}
|
||
})
|
||
if nextDoc.Find("ul.uk-pagination a[href^=http]").Length() == 0 {
|
||
break
|
||
}
|
||
page++
|
||
}
|
||
return chapters, nil
|
||
}
|
||
|
||
func lastPart(s string, seps ...string) string {
|
||
for _, sep := range seps {
|
||
if idx := strings.LastIndex(s, sep); idx >= 0 {
|
||
return s[idx+len(sep):]
|
||
}
|
||
}
|
||
return s
|
||
}
|
||
|
||
// Regexes for page decryption
|
||
var (
|
||
reEncryptedData = regexp.MustCompile(`(?s)var\s+InitMangaEncryptedChapter\s*=\s*(\{.*?\});`)
|
||
reInitMangaChunk = regexp.MustCompile(`(?s)InitMangaEncryptedChapter\s*=\s*(\{.*?\})`)
|
||
reDecryptKey = regexp.MustCompile(`["']?decryption_key["']?\s*[:=]\s*["']([^"']+)["']`)
|
||
reSmartKey = regexp.MustCompile(`(?s)InitMangaData[\s\S]*?decryption_key["']?\s*[:=]\s*["']([^"']+)["']`)
|
||
)
|
||
|
||
type encryptedChapter struct {
|
||
Ciphertext string `json:"ciphertext"`
|
||
IV string `json:"iv"`
|
||
Salt string `json:"salt"`
|
||
}
|
||
|
||
func decryptWithPassphrase(ciphertextB64, passphrase, saltHex, ivHex string) (string, error) {
|
||
salt, err := hex.DecodeString(saltHex)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
iv, err := hex.DecodeString(ivHex)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
ct, err := base64.StdEncoding.DecodeString(ciphertextB64)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
key := pbkdf2.Key([]byte(passphrase), salt, 999, 32, sha512.New)
|
||
block, err := aes.NewCipher(key)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
if len(ct)%aes.BlockSize != 0 {
|
||
return "", fmt.Errorf("initmanga: ciphertext not block-aligned")
|
||
}
|
||
mode := cipher.NewCBCDecrypter(block, iv)
|
||
mode.CryptBlocks(ct, ct)
|
||
// PKCS5 unpad
|
||
if len(ct) == 0 {
|
||
return "", fmt.Errorf("initmanga: empty decrypted block")
|
||
}
|
||
padLen := int(ct[len(ct)-1])
|
||
if padLen == 0 || padLen > aes.BlockSize || padLen > len(ct) {
|
||
return "", fmt.Errorf("initmanga: invalid padding %d", padLen)
|
||
}
|
||
return string(ct[:len(ct)-padLen]), nil
|
||
}
|
||
|
||
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
||
doc, err := s.get(context.Background(), util.AbsURL(s.cfg.BaseURL, chapter.URL))
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
html, _ := doc.Html()
|
||
|
||
// --- Path 1: script[src*=dmFyIElua] base64-encoded data ---
|
||
var encObj encryptedChapter
|
||
doc.Find("script[src]").Each(func(_ int, el *goquery.Selection) {
|
||
if encObj.Ciphertext != "" {
|
||
return
|
||
}
|
||
src := el.AttrOr("src", "")
|
||
if !strings.Contains(src, "dmFyIElua") {
|
||
return
|
||
}
|
||
b64 := strings.TrimSuffix(
|
||
strings.SplitN(src, "base64,", 2)[len(strings.SplitN(src, "base64,", 2))-1], "\"")
|
||
raw, err := base64.StdEncoding.DecodeString(b64)
|
||
if err != nil {
|
||
return
|
||
}
|
||
decoded := string(raw)
|
||
// extract InitMangaEncryptedChapter JSON
|
||
jsonStr := ""
|
||
if m := reInitMangaChunk.FindStringSubmatch(decoded); len(m) > 1 {
|
||
jsonStr = m[1]
|
||
} else if idx := strings.Index(decoded, "InitMangaEncryptedChapter="); idx >= 0 {
|
||
jsonStr = strings.SplitN(decoded[idx+len("InitMangaEncryptedChapter="):], ";", 2)[0]
|
||
}
|
||
if jsonStr == "" {
|
||
return
|
||
}
|
||
json.Unmarshal([]byte(jsonStr), &encObj) //nolint:errcheck
|
||
})
|
||
|
||
// --- Path 2: inline JS var InitMangaEncryptedChapter ---
|
||
if encObj.Ciphertext == "" {
|
||
if m := reEncryptedData.FindStringSubmatch(html); len(m) > 1 {
|
||
json.Unmarshal([]byte(m[1]), &encObj) //nolint:errcheck
|
||
}
|
||
}
|
||
|
||
if encObj.Ciphertext != "" {
|
||
// Find decryption key
|
||
rawKey := ""
|
||
// Try script#init-main-js-extra src (base64-encoded)
|
||
doc.Find("script#init-main-js-extra[src]").First().Each(func(_ int, el *goquery.Selection) {
|
||
src := el.AttrOr("src", "")
|
||
if !strings.Contains(src, "base64,") {
|
||
return
|
||
}
|
||
b64 := strings.SplitN(src, "base64,", 2)[1]
|
||
b64 = strings.TrimSuffix(b64, "\"")
|
||
if raw, err := base64.StdEncoding.DecodeString(b64); err == nil {
|
||
if m := reDecryptKey.FindStringSubmatch(string(raw)); len(m) > 1 {
|
||
rawKey = m[1]
|
||
}
|
||
}
|
||
})
|
||
if rawKey == "" {
|
||
if m := reSmartKey.FindStringSubmatch(html); len(m) > 1 {
|
||
rawKey = m[1]
|
||
}
|
||
}
|
||
if rawKey != "" {
|
||
passBytes, err := base64.StdEncoding.DecodeString(rawKey)
|
||
if err == nil {
|
||
passphrase := string(passBytes)
|
||
if content, err := decryptWithPassphrase(encObj.Ciphertext, passphrase, encObj.Salt, encObj.IV); err == nil {
|
||
return parseDecryptedPages(content, s.cfg.BaseURL), nil
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Fallback: plain img tags
|
||
return fallbackPages(doc, s.cfg.BaseURL), nil
|
||
}
|
||
|
||
func parseDecryptedPages(content, baseURL string) []source.Page {
|
||
trimmed := strings.TrimSpace(content)
|
||
if strings.HasPrefix(trimmed, "<") {
|
||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(trimmed))
|
||
if err != nil {
|
||
return nil
|
||
}
|
||
var pages []source.Page
|
||
doc.Find("img").Each(func(i int, img *goquery.Selection) {
|
||
src := img.AttrOr("data-src", img.AttrOr("src", ""))
|
||
if src == "" || strings.HasPrefix(src, "data:") {
|
||
return
|
||
}
|
||
pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(baseURL, src)})
|
||
})
|
||
return pages
|
||
}
|
||
// Try JSON array of URLs
|
||
var srcs []string
|
||
if err := json.Unmarshal([]byte(trimmed), &srcs); err == nil {
|
||
pages := make([]source.Page, 0, len(srcs))
|
||
for i, src := range srcs {
|
||
switch {
|
||
case strings.HasPrefix(src, "//"):
|
||
src = "https:" + src
|
||
case strings.HasPrefix(src, "/"):
|
||
src = strings.TrimRight(baseURL, "/") + src
|
||
}
|
||
pages = append(pages, source.Page{Index: i, ImageURL: src})
|
||
}
|
||
return pages
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func fallbackPages(doc *goquery.Document, baseURL string) []source.Page {
|
||
var pages []source.Page
|
||
doc.Find("div#chapter-content img[src]").Each(func(i int, img *goquery.Selection) {
|
||
src := img.AttrOr("src", img.AttrOr("data-src", ""))
|
||
if src != "" {
|
||
pages = append(pages, source.Page{Index: i, ImageURL: util.AbsURL(baseURL, src)})
|
||
}
|
||
})
|
||
return pages
|
||
}
|
||
|
||
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
|
||
func (s *Source) GetFilterList() []source.Filter { return nil }
|