ca609ccae7
Ports bases from previous session: util (shared helpers), bakkin, fmreader, foolslide, gigaviewer, gmanga, grouple, guya, heancms, hentaihand, kemono, madara, madtheme, mangadventure, mangahub, mangathemesia, mangaworld, mmrcms, senkuro, wpcomics.
182 lines
5.2 KiB
Go
182 lines
5.2 KiB
Go
package util
|
||
|
||
import (
|
||
"encoding/json"
|
||
"regexp"
|
||
"strconv"
|
||
"strings"
|
||
"time"
|
||
"unicode"
|
||
)
|
||
|
||
// ParseRelativeDate converts strings like "2 days ago", "3 hours ago" to unix milliseconds.
|
||
func ParseRelativeDate(s string) int64 {
|
||
s = strings.TrimSpace(strings.ToLower(s))
|
||
now := time.Now()
|
||
|
||
if strings.Contains(s, "just now") || strings.Contains(s, "sekarang") {
|
||
return now.UnixMilli()
|
||
}
|
||
if strings.Contains(s, "today") {
|
||
y, m, d := now.Date()
|
||
return time.Date(y, m, d, 0, 0, 0, 0, now.Location()).UnixMilli()
|
||
}
|
||
if strings.Contains(s, "yesterday") {
|
||
y, m, d := now.AddDate(0, 0, -1).Date()
|
||
return time.Date(y, m, d, 0, 0, 0, 0, now.Location()).UnixMilli()
|
||
}
|
||
|
||
num := extractLeadingNumber(s)
|
||
if num == 0 {
|
||
return 0
|
||
}
|
||
|
||
switch {
|
||
case anyWord(s, "second", "segundo", "giây", "detik"):
|
||
return now.Add(-time.Duration(num) * time.Second).UnixMilli()
|
||
case anyWord(s, "minute", "minuto", "min", "dakika", "phút", "menit"):
|
||
return now.Add(-time.Duration(num) * time.Minute).UnixMilli()
|
||
case anyWord(s, "hour", "hora", "heure", "saat", "jam", "giờ", "ore"):
|
||
return now.Add(-time.Duration(num) * time.Hour).UnixMilli()
|
||
case anyWord(s, "day", "día", "dia", "jour", "gün", "hari", "ngày", "วัน", "giorni"):
|
||
return now.AddDate(0, 0, -num).UnixMilli()
|
||
case anyWord(s, "week", "semana", "tuần"):
|
||
return now.AddDate(0, 0, -num*7).UnixMilli()
|
||
case anyWord(s, "month", "mes", "tháng"):
|
||
return now.AddDate(0, -num, 0).UnixMilli()
|
||
case anyWord(s, "year", "año", "năm"):
|
||
return now.AddDate(-num, 0, 0).UnixMilli()
|
||
}
|
||
return 0
|
||
}
|
||
|
||
func extractLeadingNumber(s string) int {
|
||
for i, c := range s {
|
||
if unicode.IsDigit(c) {
|
||
end := i + 1
|
||
for end < len(s) && s[end] >= '0' && s[end] <= '9' {
|
||
end++
|
||
}
|
||
n, _ := strconv.Atoi(s[i:end])
|
||
return n
|
||
}
|
||
}
|
||
return 0
|
||
}
|
||
|
||
func anyWord(s string, words ...string) bool {
|
||
for _, w := range words {
|
||
if strings.Contains(s, w) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
// ParseAbsoluteDate parses a date string using common Go reference time layouts.
|
||
// layout uses Go time format (e.g. "January 02, 2006", "2006-01-02").
|
||
func ParseAbsoluteDate(s, layout string) int64 {
|
||
s = strings.TrimSpace(s)
|
||
if s == "" {
|
||
return 0
|
||
}
|
||
t, err := time.ParseInLocation(layout, s, time.UTC)
|
||
if err != nil {
|
||
return 0
|
||
}
|
||
return t.UnixMilli()
|
||
}
|
||
|
||
// SlugFromURL returns the last non-empty path segment of a URL string.
|
||
func SlugFromURL(rawURL string) string {
|
||
rawURL = strings.TrimRight(rawURL, "/")
|
||
idx := strings.LastIndex(rawURL, "/")
|
||
if idx < 0 {
|
||
return rawURL
|
||
}
|
||
slug := rawURL[idx+1:]
|
||
if q := strings.IndexByte(slug, '?'); q >= 0 {
|
||
slug = slug[:q]
|
||
}
|
||
if f := strings.IndexByte(slug, '#'); f >= 0 {
|
||
slug = slug[:f]
|
||
}
|
||
return slug
|
||
}
|
||
|
||
var htmlEntityRe = regexp.MustCompile(`&[a-zA-Z]+;|&#\d+;`)
|
||
var multiSpaceRe = regexp.MustCompile(`\s+`)
|
||
|
||
// CleanText decodes common HTML entities and normalises whitespace.
|
||
func CleanText(s string) string {
|
||
replacer := strings.NewReplacer(
|
||
"&", "&", "<", "<", ">", ">",
|
||
""", `"`, "'", "'", "'", "'",
|
||
" ", " ", " ", " ",
|
||
)
|
||
s = replacer.Replace(s)
|
||
s = htmlEntityRe.ReplaceAllString(s, "")
|
||
return strings.TrimSpace(multiSpaceRe.ReplaceAllString(s, " "))
|
||
}
|
||
|
||
// StatusFromString maps common status strings to source.Status* constants.
|
||
func StatusFromString(s string) int {
|
||
s = strings.ToLower(strings.TrimSpace(s))
|
||
switch {
|
||
case anyWord(s, "ongoing", "en cours", "releasing", "publishing", "airing", "devam", "laufend", "em lançamento", "актуален"):
|
||
return 1 // StatusOngoing
|
||
case anyWord(s, "completed", "complete", "terminé", "finalizado", "abgeschlossen", "завершён", "tamamlandı"):
|
||
return 2 // StatusCompleted
|
||
case anyWord(s, "licensed"):
|
||
return 3 // StatusLicensed
|
||
case anyWord(s, "hiatus", "on hiatus", "en pause"):
|
||
return 5 // StatusHiatus
|
||
case anyWord(s, "cancelled", "canceled", "dropped", "abandonné", "заброшено"):
|
||
return 6 // StatusCancelled
|
||
}
|
||
return 0 // StatusUnknown
|
||
}
|
||
|
||
// nextDataRe matches the JSON blob inside a NextJS __NEXT_DATA__ script tag.
|
||
var nextDataRe = regexp.MustCompile(`<script[^>]+id="__NEXT_DATA__"[^>]*>([\s\S]*?)</script>`)
|
||
|
||
// ExtractNextDataJSON extracts the JSON object from a NextJS __NEXT_DATA__ script tag.
|
||
func ExtractNextDataJSON(html string) (json.RawMessage, error) {
|
||
m := nextDataRe.FindStringSubmatch(html)
|
||
if len(m) < 2 {
|
||
return nil, nil
|
||
}
|
||
raw := strings.TrimSpace(m[1])
|
||
return json.RawMessage(raw), nil
|
||
}
|
||
|
||
// AbsURL resolves a potentially relative URL against a base URL string.
|
||
func AbsURL(base, ref string) string {
|
||
if ref == "" {
|
||
return ""
|
||
}
|
||
if strings.HasPrefix(ref, "http://") || strings.HasPrefix(ref, "https://") {
|
||
return ref
|
||
}
|
||
base = strings.TrimRight(base, "/")
|
||
if strings.HasPrefix(ref, "/") {
|
||
// absolute path — strip to origin
|
||
if i := strings.Index(base[8:], "/"); i >= 0 {
|
||
base = base[:8+i]
|
||
}
|
||
return base + ref
|
||
}
|
||
return base + "/" + ref
|
||
}
|
||
|
||
// ImgAttr returns the best image src from common lazy-loading data attributes.
|
||
// Checks data-lazy-src, data-src, data-cfsrc, data-setbg, then falls back to src.
|
||
func ImgAttr(attrs map[string]string, baseURL string) string {
|
||
for _, key := range []string{"data-lazy-src", "data-src", "data-cfsrc", "data-setbg", "data-manga-src", "src"} {
|
||
if v := attrs[key]; v != "" {
|
||
return AbsURL(baseURL, v)
|
||
}
|
||
}
|
||
return ""
|
||
}
|