182 lines
5.2 KiB
Go
Executable File
182 lines
5.2 KiB
Go
Executable File
package util
|
||
|
||
import (
|
||
"encoding/json"
|
||
"regexp"
|
||
"strconv"
|
||
"strings"
|
||
"time"
|
||
"unicode"
|
||
)
|
||
|
||
// ParseRelativeDate converts strings like "2 days ago", "3 hours ago" to unix milliseconds.
|
||
func ParseRelativeDate(s string) int64 {
|
||
s = strings.TrimSpace(strings.ToLower(s))
|
||
now := time.Now()
|
||
|
||
if strings.Contains(s, "just now") || strings.Contains(s, "sekarang") {
|
||
return now.UnixMilli()
|
||
}
|
||
if strings.Contains(s, "today") {
|
||
y, m, d := now.Date()
|
||
return time.Date(y, m, d, 0, 0, 0, 0, now.Location()).UnixMilli()
|
||
}
|
||
if strings.Contains(s, "yesterday") {
|
||
y, m, d := now.AddDate(0, 0, -1).Date()
|
||
return time.Date(y, m, d, 0, 0, 0, 0, now.Location()).UnixMilli()
|
||
}
|
||
|
||
num := extractLeadingNumber(s)
|
||
if num == 0 {
|
||
return 0
|
||
}
|
||
|
||
switch {
|
||
case anyWord(s, "second", "segundo", "giây", "detik"):
|
||
return now.Add(-time.Duration(num) * time.Second).UnixMilli()
|
||
case anyWord(s, "minute", "minuto", "min", "dakika", "phút", "menit"):
|
||
return now.Add(-time.Duration(num) * time.Minute).UnixMilli()
|
||
case anyWord(s, "hour", "hora", "heure", "saat", "jam", "giờ", "ore"):
|
||
return now.Add(-time.Duration(num) * time.Hour).UnixMilli()
|
||
case anyWord(s, "day", "día", "dia", "jour", "gün", "hari", "ngày", "วัน", "giorni"):
|
||
return now.AddDate(0, 0, -num).UnixMilli()
|
||
case anyWord(s, "week", "semana", "tuần"):
|
||
return now.AddDate(0, 0, -num*7).UnixMilli()
|
||
case anyWord(s, "month", "mes", "tháng"):
|
||
return now.AddDate(0, -num, 0).UnixMilli()
|
||
case anyWord(s, "year", "año", "năm"):
|
||
return now.AddDate(-num, 0, 0).UnixMilli()
|
||
}
|
||
return 0
|
||
}
|
||
|
||
func extractLeadingNumber(s string) int {
|
||
for i, c := range s {
|
||
if unicode.IsDigit(c) {
|
||
end := i + 1
|
||
for end < len(s) && s[end] >= '0' && s[end] <= '9' {
|
||
end++
|
||
}
|
||
n, _ := strconv.Atoi(s[i:end])
|
||
return n
|
||
}
|
||
}
|
||
return 0
|
||
}
|
||
|
||
func anyWord(s string, words ...string) bool {
|
||
for _, w := range words {
|
||
if strings.Contains(s, w) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
// ParseAbsoluteDate parses a date string using common Go reference time layouts.
|
||
// layout uses Go time format (e.g. "January 02, 2006", "2006-01-02").
|
||
func ParseAbsoluteDate(s, layout string) int64 {
|
||
s = strings.TrimSpace(s)
|
||
if s == "" {
|
||
return 0
|
||
}
|
||
t, err := time.ParseInLocation(layout, s, time.UTC)
|
||
if err != nil {
|
||
return 0
|
||
}
|
||
return t.UnixMilli()
|
||
}
|
||
|
||
// SlugFromURL returns the last non-empty path segment of a URL string.
|
||
func SlugFromURL(rawURL string) string {
|
||
rawURL = strings.TrimRight(rawURL, "/")
|
||
idx := strings.LastIndex(rawURL, "/")
|
||
if idx < 0 {
|
||
return rawURL
|
||
}
|
||
slug := rawURL[idx+1:]
|
||
if q := strings.IndexByte(slug, '?'); q >= 0 {
|
||
slug = slug[:q]
|
||
}
|
||
if f := strings.IndexByte(slug, '#'); f >= 0 {
|
||
slug = slug[:f]
|
||
}
|
||
return slug
|
||
}
|
||
|
||
var htmlEntityRe = regexp.MustCompile(`&[a-zA-Z]+;|&#\d+;`)
|
||
var multiSpaceRe = regexp.MustCompile(`\s+`)
|
||
|
||
// CleanText decodes common HTML entities and normalises whitespace.
|
||
func CleanText(s string) string {
|
||
replacer := strings.NewReplacer(
|
||
"&", "&", "<", "<", ">", ">",
|
||
""", `"`, "'", "'", "'", "'",
|
||
" ", " ", " ", " ",
|
||
)
|
||
s = replacer.Replace(s)
|
||
s = htmlEntityRe.ReplaceAllString(s, "")
|
||
return strings.TrimSpace(multiSpaceRe.ReplaceAllString(s, " "))
|
||
}
|
||
|
||
// StatusFromString maps common status strings to source.Status* constants.
|
||
func StatusFromString(s string) int {
|
||
s = strings.ToLower(strings.TrimSpace(s))
|
||
switch {
|
||
case anyWord(s, "ongoing", "en cours", "releasing", "publishing", "airing", "devam", "laufend", "em lançamento", "актуален"):
|
||
return 1 // StatusOngoing
|
||
case anyWord(s, "completed", "complete", "terminé", "finalizado", "abgeschlossen", "завершён", "tamamlandı"):
|
||
return 2 // StatusCompleted
|
||
case anyWord(s, "licensed"):
|
||
return 3 // StatusLicensed
|
||
case anyWord(s, "hiatus", "on hiatus", "en pause"):
|
||
return 5 // StatusHiatus
|
||
case anyWord(s, "cancelled", "canceled", "dropped", "abandonné", "заброшено"):
|
||
return 6 // StatusCancelled
|
||
}
|
||
return 0 // StatusUnknown
|
||
}
|
||
|
||
// nextDataRe matches the JSON blob inside a NextJS __NEXT_DATA__ script tag.
|
||
var nextDataRe = regexp.MustCompile(`<script[^>]+id="__NEXT_DATA__"[^>]*>([\s\S]*?)</script>`)
|
||
|
||
// ExtractNextDataJSON extracts the JSON object from a NextJS __NEXT_DATA__ script tag.
|
||
func ExtractNextDataJSON(html string) (json.RawMessage, error) {
|
||
m := nextDataRe.FindStringSubmatch(html)
|
||
if len(m) < 2 {
|
||
return nil, nil
|
||
}
|
||
raw := strings.TrimSpace(m[1])
|
||
return json.RawMessage(raw), nil
|
||
}
|
||
|
||
// AbsURL resolves a potentially relative URL against a base URL string.
|
||
func AbsURL(base, ref string) string {
|
||
if ref == "" {
|
||
return ""
|
||
}
|
||
if strings.HasPrefix(ref, "http://") || strings.HasPrefix(ref, "https://") {
|
||
return ref
|
||
}
|
||
base = strings.TrimRight(base, "/")
|
||
if strings.HasPrefix(ref, "/") {
|
||
// absolute path — strip to origin
|
||
if i := strings.Index(base[8:], "/"); i >= 0 {
|
||
base = base[:8+i]
|
||
}
|
||
return base + ref
|
||
}
|
||
return base + "/" + ref
|
||
}
|
||
|
||
// ImgAttr returns the best image src from common lazy-loading data attributes.
|
||
// Checks data-lazy-src, data-src, data-cfsrc, data-setbg, then falls back to src.
|
||
func ImgAttr(attrs map[string]string, baseURL string) string {
|
||
for _, key := range []string{"data-lazy-src", "data-src", "data-cfsrc", "data-setbg", "data-manga-src", "src"} {
|
||
if v := attrs[key]; v != "" {
|
||
return AbsURL(baseURL, v)
|
||
}
|
||
}
|
||
return ""
|
||
}
|