316ae2f9db
Add 8 all/ sources (7 Masonry, 1 Madara) and 38 en/ sources spanning Madara, MangaThemesia, MadTheme, Keyoapp, and Guya bases, plus 8 earlier all/ standalone sources from the previous session (ahottie, akuma, allporncomicsco, asmhentai, baobua, beauty3600000, buondua, comicfury, comicgrowl, comicklive, comicsvalley, comikey, commitstrip, coomer). Also annotates phase4-standalone.md with base-class tags for 43 additional unimplemented en/ sources identified in a full scan.
251 lines
7.0 KiB
Go
251 lines
7.0 KiB
Go
// Package baobua implements the BaoBua adult photo gallery source.
|
|
// FlareSolverr required; no full-text search; category filter; recursive page pagination.
|
|
package baobua
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"goyomi/internal/httpclient/flare"
|
|
"goyomi/internal/registry"
|
|
"goyomi/internal/source"
|
|
)
|
|
|
|
const siteURL = "https://baobua.net"
|
|
|
|
var wpComRe = regexp.MustCompile(`^https://i\d+\.wp\.com/`)
|
|
|
|
var categoryNames = []string{
|
|
"All", "Ao-yem", "Asia", "Beauty", "Bikini", "China", "Cosplay",
|
|
"Japan", "Nude", "Sexy", "Top", "Tattoo", "Vietnam",
|
|
}
|
|
|
|
var categorySlugs = []string{
|
|
"", "Ao-yem", "Asia", "beauty", "Bikini", "China", "Cosplay",
|
|
"Japan", "Nude", "Sexy", "Top", "tattoo", "Vietnam",
|
|
}
|
|
|
|
type Source struct {
|
|
client *flare.Client
|
|
id int64
|
|
}
|
|
|
|
func New() *Source {
|
|
return &Source{
|
|
client: flare.NewClient(flare.WithRateLimit(3, 1)),
|
|
id: source.GenerateSourceID("BaoBua", "all"),
|
|
}
|
|
}
|
|
|
|
func (s *Source) ID() int64 { return s.id }
|
|
func (s *Source) Name() string { return "BaoBua" }
|
|
func (s *Source) Lang() string { return "all" }
|
|
func (s *Source) SupportsLatest() bool { return false }
|
|
|
|
func normalizeImageURL(u string) string {
|
|
if wpComRe.MatchString(u) {
|
|
u = wpComRe.ReplaceAllString(u, "https://")
|
|
u = strings.Replace(u, "?w=640", "", 1)
|
|
}
|
|
return u
|
|
}
|
|
|
|
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("Referer", siteURL+"/")
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("baobua: HTTP %d", resp.StatusCode)
|
|
}
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func (s *Source) parseMangas(doc *goquery.Document) source.MangasPage {
|
|
var mangas []source.SManga
|
|
doc.Find(".product-item").Each(func(_ int, el *goquery.Selection) {
|
|
href := el.Find("a").First().AttrOr("href", "")
|
|
if href == "" {
|
|
return
|
|
}
|
|
parsed, err := url.Parse(href)
|
|
if err != nil {
|
|
return
|
|
}
|
|
title := strings.TrimSpace(el.Find(".product-title").Text())
|
|
if title == "" {
|
|
return
|
|
}
|
|
m := source.SManga{URL: parsed.Path, Title: title}
|
|
if src := el.Find("img.product-imgreal").First().AttrOr("src", ""); src != "" {
|
|
m.ThumbnailURL = normalizeImageURL(src)
|
|
}
|
|
mangas = append(mangas, m)
|
|
})
|
|
hasNext := doc.Find(".pagination-custom .nextPage").Length() > 0
|
|
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
|
|
}
|
|
|
|
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
|
|
doc, err := s.get(context.Background(), fmt.Sprintf("%s/?page=%d", siteURL, page))
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangas(doc), nil
|
|
}
|
|
|
|
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
|
|
return source.MangasPage{}, fmt.Errorf("baobua: latest not supported")
|
|
}
|
|
|
|
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
|
|
// Direct URL lookup
|
|
if strings.Contains(query, "baobua.net") {
|
|
parsed, err := url.Parse(query)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
doc, err := s.get(context.Background(), query)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
if doc.Find(".product-item").Length() == 0 && doc.Find(".article-body").Length() > 0 {
|
|
m := s.parseMangaDetailsDoc(doc)
|
|
m.URL = parsed.Path
|
|
return source.MangasPage{Mangas: []source.SManga{m}, HasNextPage: false}, nil
|
|
}
|
|
return s.parseMangas(doc), nil
|
|
}
|
|
|
|
// Category filter
|
|
for _, f := range filters {
|
|
if sf, ok := f.(*source.SelectFilter); ok && sf.FilterName == "Category" {
|
|
idx := sf.Selected
|
|
if idx > 0 && idx < len(categorySlugs) {
|
|
u := fmt.Sprintf("%s/category/%s/?page=%d", siteURL, categorySlugs[idx], page)
|
|
doc, err := s.get(context.Background(), u)
|
|
if err != nil {
|
|
return source.MangasPage{}, err
|
|
}
|
|
return s.parseMangas(doc), nil
|
|
}
|
|
}
|
|
}
|
|
|
|
if strings.TrimSpace(query) != "" {
|
|
return source.MangasPage{}, fmt.Errorf("baobua: full-text search is not supported")
|
|
}
|
|
return s.GetPopularManga(page)
|
|
}
|
|
|
|
func (s *Source) parseMangaDetailsDoc(doc *goquery.Document) source.SManga {
|
|
m := source.SManga{Status: source.StatusCompleted}
|
|
m.Title = strings.TrimSpace(
|
|
doc.Find(".product-title, h1, .article-title, .post-title").First().Text(),
|
|
)
|
|
if src := doc.Find("img.product-imgreal, .article-body img").First().AttrOr("src", ""); src != "" {
|
|
m.ThumbnailURL = normalizeImageURL(src)
|
|
}
|
|
var genres []string
|
|
doc.Find(".article-tags a").Each(func(_ int, a *goquery.Selection) {
|
|
if t := strings.TrimSpace(a.Text()); t != "" {
|
|
genres = append(genres, t)
|
|
}
|
|
})
|
|
m.Genre = strings.Join(genres, ", ")
|
|
return m
|
|
}
|
|
|
|
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
|
|
doc, err := s.get(context.Background(), siteURL+manga.URL)
|
|
if err != nil {
|
|
return manga, err
|
|
}
|
|
result := s.parseMangaDetailsDoc(doc)
|
|
result.URL = manga.URL
|
|
if result.Title == "" {
|
|
result.Title = manga.Title
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
|
|
doc, err := s.get(context.Background(), siteURL+manga.URL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
chapterURL := manga.URL
|
|
if canonical := doc.Find("link[rel=canonical]").First(); canonical.Length() > 0 {
|
|
if href := canonical.AttrOr("href", ""); href != "" {
|
|
if parsed, err := url.Parse(href); err == nil {
|
|
chapterURL = parsed.Path
|
|
}
|
|
}
|
|
}
|
|
var date int64
|
|
if dateStr := strings.TrimSpace(doc.Find(".article-date-comment .date").Text()); dateStr != "" {
|
|
// "Mon Jan 02 2006" or "Mon Jan 2 2006"
|
|
for _, layout := range []string{"Mon Jan 02 2006", "Mon Jan 2 2006"} {
|
|
if t, err := time.Parse(layout, dateStr); err == nil {
|
|
date = t.UnixMilli()
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return []source.SChapter{{URL: chapterURL, Name: "Gallery", DateUpload: date}}, nil
|
|
}
|
|
|
|
func (s *Source) recursivePages(rawURL string, offset int) ([]source.Page, error) {
|
|
doc, err := s.get(context.Background(), rawURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var pages []source.Page
|
|
doc.Find(".article-body img").Each(func(_ int, img *goquery.Selection) {
|
|
src := img.AttrOr("src", "")
|
|
if src == "" {
|
|
src = img.AttrOr("data-src", "")
|
|
}
|
|
if src != "" {
|
|
pages = append(pages, source.Page{Index: offset + len(pages), ImageURL: normalizeImageURL(src)})
|
|
}
|
|
})
|
|
nextURL := doc.Find("a.page-numbers:contains(Next)").First().AttrOr("href", "")
|
|
if nextURL != "" {
|
|
extra, err := s.recursivePages(nextURL, offset+len(pages))
|
|
if err == nil {
|
|
pages = append(pages, extra...)
|
|
}
|
|
}
|
|
return pages, nil
|
|
}
|
|
|
|
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
|
|
return s.recursivePages(siteURL+chapter.URL, 0)
|
|
}
|
|
|
|
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
|
|
|
|
func (s *Source) GetFilterList() []source.Filter {
|
|
return []source.Filter{
|
|
&source.SelectFilter{FilterName: "Category", Values: categoryNames},
|
|
}
|
|
}
|
|
|
|
func init() {
|
|
registry.Register(New())
|
|
}
|