Files
goyomi/sources/all/baobua/baobua.go
T
achmad 316ae2f9db feat: implement phase 4 batch — 54 base-class wrapper sources
Add 8 all/ sources (7 Masonry, 1 Madara) and 38 en/ sources spanning
Madara, MangaThemesia, MadTheme, Keyoapp, and Guya bases, plus 8 earlier
all/ standalone sources from the previous session (ahottie, akuma,
allporncomicsco, asmhentai, baobua, beauty3600000, buondua, comicfury,
comicgrowl, comicklive, comicsvalley, comikey, commitstrip, coomer).

Also annotates phase4-standalone.md with base-class tags for 43 additional
unimplemented en/ sources identified in a full scan.
2026-05-13 23:11:26 +07:00

251 lines
7.0 KiB
Go

// Package baobua implements the BaoBua adult photo gallery source.
// FlareSolverr required; no full-text search; category filter; recursive page pagination.
package baobua
import (
"context"
"fmt"
"net/http"
"net/url"
"regexp"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"goyomi/internal/httpclient/flare"
"goyomi/internal/registry"
"goyomi/internal/source"
)
const siteURL = "https://baobua.net"
var wpComRe = regexp.MustCompile(`^https://i\d+\.wp\.com/`)
var categoryNames = []string{
"All", "Ao-yem", "Asia", "Beauty", "Bikini", "China", "Cosplay",
"Japan", "Nude", "Sexy", "Top", "Tattoo", "Vietnam",
}
var categorySlugs = []string{
"", "Ao-yem", "Asia", "beauty", "Bikini", "China", "Cosplay",
"Japan", "Nude", "Sexy", "Top", "tattoo", "Vietnam",
}
type Source struct {
client *flare.Client
id int64
}
func New() *Source {
return &Source{
client: flare.NewClient(flare.WithRateLimit(3, 1)),
id: source.GenerateSourceID("BaoBua", "all"),
}
}
func (s *Source) ID() int64 { return s.id }
func (s *Source) Name() string { return "BaoBua" }
func (s *Source) Lang() string { return "all" }
func (s *Source) SupportsLatest() bool { return false }
func normalizeImageURL(u string) string {
if wpComRe.MatchString(u) {
u = wpComRe.ReplaceAllString(u, "https://")
u = strings.Replace(u, "?w=640", "", 1)
}
return u
}
func (s *Source) get(ctx context.Context, rawURL string) (*goquery.Document, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Referer", siteURL+"/")
resp, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("baobua: HTTP %d", resp.StatusCode)
}
return goquery.NewDocumentFromReader(resp.Body)
}
func (s *Source) parseMangas(doc *goquery.Document) source.MangasPage {
var mangas []source.SManga
doc.Find(".product-item").Each(func(_ int, el *goquery.Selection) {
href := el.Find("a").First().AttrOr("href", "")
if href == "" {
return
}
parsed, err := url.Parse(href)
if err != nil {
return
}
title := strings.TrimSpace(el.Find(".product-title").Text())
if title == "" {
return
}
m := source.SManga{URL: parsed.Path, Title: title}
if src := el.Find("img.product-imgreal").First().AttrOr("src", ""); src != "" {
m.ThumbnailURL = normalizeImageURL(src)
}
mangas = append(mangas, m)
})
hasNext := doc.Find(".pagination-custom .nextPage").Length() > 0
return source.MangasPage{Mangas: mangas, HasNextPage: hasNext}
}
func (s *Source) GetPopularManga(page int) (source.MangasPage, error) {
doc, err := s.get(context.Background(), fmt.Sprintf("%s/?page=%d", siteURL, page))
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangas(doc), nil
}
func (s *Source) GetLatestUpdates(page int) (source.MangasPage, error) {
return source.MangasPage{}, fmt.Errorf("baobua: latest not supported")
}
func (s *Source) GetSearchManga(page int, query string, filters []source.Filter) (source.MangasPage, error) {
// Direct URL lookup
if strings.Contains(query, "baobua.net") {
parsed, err := url.Parse(query)
if err != nil {
return source.MangasPage{}, err
}
doc, err := s.get(context.Background(), query)
if err != nil {
return source.MangasPage{}, err
}
if doc.Find(".product-item").Length() == 0 && doc.Find(".article-body").Length() > 0 {
m := s.parseMangaDetailsDoc(doc)
m.URL = parsed.Path
return source.MangasPage{Mangas: []source.SManga{m}, HasNextPage: false}, nil
}
return s.parseMangas(doc), nil
}
// Category filter
for _, f := range filters {
if sf, ok := f.(*source.SelectFilter); ok && sf.FilterName == "Category" {
idx := sf.Selected
if idx > 0 && idx < len(categorySlugs) {
u := fmt.Sprintf("%s/category/%s/?page=%d", siteURL, categorySlugs[idx], page)
doc, err := s.get(context.Background(), u)
if err != nil {
return source.MangasPage{}, err
}
return s.parseMangas(doc), nil
}
}
}
if strings.TrimSpace(query) != "" {
return source.MangasPage{}, fmt.Errorf("baobua: full-text search is not supported")
}
return s.GetPopularManga(page)
}
func (s *Source) parseMangaDetailsDoc(doc *goquery.Document) source.SManga {
m := source.SManga{Status: source.StatusCompleted}
m.Title = strings.TrimSpace(
doc.Find(".product-title, h1, .article-title, .post-title").First().Text(),
)
if src := doc.Find("img.product-imgreal, .article-body img").First().AttrOr("src", ""); src != "" {
m.ThumbnailURL = normalizeImageURL(src)
}
var genres []string
doc.Find(".article-tags a").Each(func(_ int, a *goquery.Selection) {
if t := strings.TrimSpace(a.Text()); t != "" {
genres = append(genres, t)
}
})
m.Genre = strings.Join(genres, ", ")
return m
}
func (s *Source) GetMangaDetails(manga source.SManga) (source.SManga, error) {
doc, err := s.get(context.Background(), siteURL+manga.URL)
if err != nil {
return manga, err
}
result := s.parseMangaDetailsDoc(doc)
result.URL = manga.URL
if result.Title == "" {
result.Title = manga.Title
}
return result, nil
}
func (s *Source) GetChapterList(manga source.SManga) ([]source.SChapter, error) {
doc, err := s.get(context.Background(), siteURL+manga.URL)
if err != nil {
return nil, err
}
chapterURL := manga.URL
if canonical := doc.Find("link[rel=canonical]").First(); canonical.Length() > 0 {
if href := canonical.AttrOr("href", ""); href != "" {
if parsed, err := url.Parse(href); err == nil {
chapterURL = parsed.Path
}
}
}
var date int64
if dateStr := strings.TrimSpace(doc.Find(".article-date-comment .date").Text()); dateStr != "" {
// "Mon Jan 02 2006" or "Mon Jan 2 2006"
for _, layout := range []string{"Mon Jan 02 2006", "Mon Jan 2 2006"} {
if t, err := time.Parse(layout, dateStr); err == nil {
date = t.UnixMilli()
break
}
}
}
return []source.SChapter{{URL: chapterURL, Name: "Gallery", DateUpload: date}}, nil
}
func (s *Source) recursivePages(rawURL string, offset int) ([]source.Page, error) {
doc, err := s.get(context.Background(), rawURL)
if err != nil {
return nil, err
}
var pages []source.Page
doc.Find(".article-body img").Each(func(_ int, img *goquery.Selection) {
src := img.AttrOr("src", "")
if src == "" {
src = img.AttrOr("data-src", "")
}
if src != "" {
pages = append(pages, source.Page{Index: offset + len(pages), ImageURL: normalizeImageURL(src)})
}
})
nextURL := doc.Find("a.page-numbers:contains(Next)").First().AttrOr("href", "")
if nextURL != "" {
extra, err := s.recursivePages(nextURL, offset+len(pages))
if err == nil {
pages = append(pages, extra...)
}
}
return pages, nil
}
func (s *Source) GetPageList(chapter source.SChapter) ([]source.Page, error) {
return s.recursivePages(siteURL+chapter.URL, 0)
}
func (s *Source) GetImageURL(page source.Page) (string, error) { return page.ImageURL, nil }
func (s *Source) GetFilterList() []source.Filter {
return []source.Filter{
&source.SelectFilter{FilterName: "Category", Values: categoryNames},
}
}
func init() {
registry.Register(New())
}