Files
goyomi/sources/base/util/util.go
T
achmad ca609ccae7 phase3: implement first 20 base sources + shared util
Ports bases from previous session:
util (shared helpers), bakkin, fmreader, foolslide, gigaviewer,
gmanga, grouple, guya, heancms, hentaihand, kemono, madara,
madtheme, mangadventure, mangahub, mangathemesia, mangaworld,
mmrcms, senkuro, wpcomics.
2026-05-10 22:15:11 +07:00

182 lines
5.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package util
import (
"encoding/json"
"regexp"
"strconv"
"strings"
"time"
"unicode"
)
// ParseRelativeDate converts strings like "2 days ago", "3 hours ago" to unix milliseconds.
func ParseRelativeDate(s string) int64 {
s = strings.TrimSpace(strings.ToLower(s))
now := time.Now()
if strings.Contains(s, "just now") || strings.Contains(s, "sekarang") {
return now.UnixMilli()
}
if strings.Contains(s, "today") {
y, m, d := now.Date()
return time.Date(y, m, d, 0, 0, 0, 0, now.Location()).UnixMilli()
}
if strings.Contains(s, "yesterday") {
y, m, d := now.AddDate(0, 0, -1).Date()
return time.Date(y, m, d, 0, 0, 0, 0, now.Location()).UnixMilli()
}
num := extractLeadingNumber(s)
if num == 0 {
return 0
}
switch {
case anyWord(s, "second", "segundo", "giây", "detik"):
return now.Add(-time.Duration(num) * time.Second).UnixMilli()
case anyWord(s, "minute", "minuto", "min", "dakika", "phút", "menit"):
return now.Add(-time.Duration(num) * time.Minute).UnixMilli()
case anyWord(s, "hour", "hora", "heure", "saat", "jam", "giờ", "ore"):
return now.Add(-time.Duration(num) * time.Hour).UnixMilli()
case anyWord(s, "day", "día", "dia", "jour", "gün", "hari", "ngày", "วัน", "giorni"):
return now.AddDate(0, 0, -num).UnixMilli()
case anyWord(s, "week", "semana", "tuần"):
return now.AddDate(0, 0, -num*7).UnixMilli()
case anyWord(s, "month", "mes", "tháng"):
return now.AddDate(0, -num, 0).UnixMilli()
case anyWord(s, "year", "año", "năm"):
return now.AddDate(-num, 0, 0).UnixMilli()
}
return 0
}
func extractLeadingNumber(s string) int {
for i, c := range s {
if unicode.IsDigit(c) {
end := i + 1
for end < len(s) && s[end] >= '0' && s[end] <= '9' {
end++
}
n, _ := strconv.Atoi(s[i:end])
return n
}
}
return 0
}
func anyWord(s string, words ...string) bool {
for _, w := range words {
if strings.Contains(s, w) {
return true
}
}
return false
}
// ParseAbsoluteDate parses a date string using common Go reference time layouts.
// layout uses Go time format (e.g. "January 02, 2006", "2006-01-02").
func ParseAbsoluteDate(s, layout string) int64 {
s = strings.TrimSpace(s)
if s == "" {
return 0
}
t, err := time.ParseInLocation(layout, s, time.UTC)
if err != nil {
return 0
}
return t.UnixMilli()
}
// SlugFromURL returns the last non-empty path segment of a URL string.
func SlugFromURL(rawURL string) string {
rawURL = strings.TrimRight(rawURL, "/")
idx := strings.LastIndex(rawURL, "/")
if idx < 0 {
return rawURL
}
slug := rawURL[idx+1:]
if q := strings.IndexByte(slug, '?'); q >= 0 {
slug = slug[:q]
}
if f := strings.IndexByte(slug, '#'); f >= 0 {
slug = slug[:f]
}
return slug
}
var htmlEntityRe = regexp.MustCompile(`&[a-zA-Z]+;|&#\d+;`)
var multiSpaceRe = regexp.MustCompile(`\s+`)
// CleanText decodes common HTML entities and normalises whitespace.
func CleanText(s string) string {
replacer := strings.NewReplacer(
"&amp;", "&", "&lt;", "<", "&gt;", ">",
"&quot;", `"`, "&#39;", "'", "&apos;", "'",
"&nbsp;", " ", "&#160;", " ",
)
s = replacer.Replace(s)
s = htmlEntityRe.ReplaceAllString(s, "")
return strings.TrimSpace(multiSpaceRe.ReplaceAllString(s, " "))
}
// StatusFromString maps common status strings to source.Status* constants.
func StatusFromString(s string) int {
s = strings.ToLower(strings.TrimSpace(s))
switch {
case anyWord(s, "ongoing", "en cours", "releasing", "publishing", "airing", "devam", "laufend", "em lançamento", "актуален"):
return 1 // StatusOngoing
case anyWord(s, "completed", "complete", "terminé", "finalizado", "abgeschlossen", "завершён", "tamamlandı"):
return 2 // StatusCompleted
case anyWord(s, "licensed"):
return 3 // StatusLicensed
case anyWord(s, "hiatus", "on hiatus", "en pause"):
return 5 // StatusHiatus
case anyWord(s, "cancelled", "canceled", "dropped", "abandonné", "заброшено"):
return 6 // StatusCancelled
}
return 0 // StatusUnknown
}
// nextDataRe matches the JSON blob inside a NextJS __NEXT_DATA__ script tag.
var nextDataRe = regexp.MustCompile(`<script[^>]+id="__NEXT_DATA__"[^>]*>([\s\S]*?)</script>`)
// ExtractNextDataJSON extracts the JSON object from a NextJS __NEXT_DATA__ script tag.
func ExtractNextDataJSON(html string) (json.RawMessage, error) {
m := nextDataRe.FindStringSubmatch(html)
if len(m) < 2 {
return nil, nil
}
raw := strings.TrimSpace(m[1])
return json.RawMessage(raw), nil
}
// AbsURL resolves a potentially relative URL against a base URL string.
func AbsURL(base, ref string) string {
if ref == "" {
return ""
}
if strings.HasPrefix(ref, "http://") || strings.HasPrefix(ref, "https://") {
return ref
}
base = strings.TrimRight(base, "/")
if strings.HasPrefix(ref, "/") {
// absolute path — strip to origin
if i := strings.Index(base[8:], "/"); i >= 0 {
base = base[:8+i]
}
return base + ref
}
return base + "/" + ref
}
// ImgAttr returns the best image src from common lazy-loading data attributes.
// Checks data-lazy-src, data-src, data-cfsrc, data-setbg, then falls back to src.
func ImgAttr(attrs map[string]string, baseURL string) string {
for _, key := range []string{"data-lazy-src", "data-src", "data-cfsrc", "data-setbg", "data-manga-src", "src"} {
if v := attrs[key]; v != "" {
return AbsURL(baseURL, v)
}
}
return ""
}