package util import ( "encoding/json" "regexp" "strconv" "strings" "time" "unicode" ) // ParseRelativeDate converts strings like "2 days ago", "3 hours ago" to unix milliseconds. func ParseRelativeDate(s string) int64 { s = strings.TrimSpace(strings.ToLower(s)) now := time.Now() if strings.Contains(s, "just now") || strings.Contains(s, "sekarang") { return now.UnixMilli() } if strings.Contains(s, "today") { y, m, d := now.Date() return time.Date(y, m, d, 0, 0, 0, 0, now.Location()).UnixMilli() } if strings.Contains(s, "yesterday") { y, m, d := now.AddDate(0, 0, -1).Date() return time.Date(y, m, d, 0, 0, 0, 0, now.Location()).UnixMilli() } num := extractLeadingNumber(s) if num == 0 { return 0 } switch { case anyWord(s, "second", "segundo", "giây", "detik"): return now.Add(-time.Duration(num) * time.Second).UnixMilli() case anyWord(s, "minute", "minuto", "min", "dakika", "phút", "menit"): return now.Add(-time.Duration(num) * time.Minute).UnixMilli() case anyWord(s, "hour", "hora", "heure", "saat", "jam", "giờ", "ore"): return now.Add(-time.Duration(num) * time.Hour).UnixMilli() case anyWord(s, "day", "día", "dia", "jour", "gün", "hari", "ngày", "วัน", "giorni"): return now.AddDate(0, 0, -num).UnixMilli() case anyWord(s, "week", "semana", "tuần"): return now.AddDate(0, 0, -num*7).UnixMilli() case anyWord(s, "month", "mes", "tháng"): return now.AddDate(0, -num, 0).UnixMilli() case anyWord(s, "year", "año", "năm"): return now.AddDate(-num, 0, 0).UnixMilli() } return 0 } func extractLeadingNumber(s string) int { for i, c := range s { if unicode.IsDigit(c) { end := i + 1 for end < len(s) && s[end] >= '0' && s[end] <= '9' { end++ } n, _ := strconv.Atoi(s[i:end]) return n } } return 0 } func anyWord(s string, words ...string) bool { for _, w := range words { if strings.Contains(s, w) { return true } } return false } // ParseAbsoluteDate parses a date string using common Go reference time layouts. // layout uses Go time format (e.g. "January 02, 2006", "2006-01-02"). func ParseAbsoluteDate(s, layout string) int64 { s = strings.TrimSpace(s) if s == "" { return 0 } t, err := time.ParseInLocation(layout, s, time.UTC) if err != nil { return 0 } return t.UnixMilli() } // SlugFromURL returns the last non-empty path segment of a URL string. func SlugFromURL(rawURL string) string { rawURL = strings.TrimRight(rawURL, "/") idx := strings.LastIndex(rawURL, "/") if idx < 0 { return rawURL } slug := rawURL[idx+1:] if q := strings.IndexByte(slug, '?'); q >= 0 { slug = slug[:q] } if f := strings.IndexByte(slug, '#'); f >= 0 { slug = slug[:f] } return slug } var htmlEntityRe = regexp.MustCompile(`&[a-zA-Z]+;|&#\d+;`) var multiSpaceRe = regexp.MustCompile(`\s+`) // CleanText decodes common HTML entities and normalises whitespace. func CleanText(s string) string { replacer := strings.NewReplacer( "&", "&", "<", "<", ">", ">", """, `"`, "'", "'", "'", "'", " ", " ", " ", " ", ) s = replacer.Replace(s) s = htmlEntityRe.ReplaceAllString(s, "") return strings.TrimSpace(multiSpaceRe.ReplaceAllString(s, " ")) } // StatusFromString maps common status strings to source.Status* constants. func StatusFromString(s string) int { s = strings.ToLower(strings.TrimSpace(s)) switch { case anyWord(s, "ongoing", "en cours", "releasing", "publishing", "airing", "devam", "laufend", "em lançamento", "актуален"): return 1 // StatusOngoing case anyWord(s, "completed", "complete", "terminé", "finalizado", "abgeschlossen", "завершён", "tamamlandı"): return 2 // StatusCompleted case anyWord(s, "licensed"): return 3 // StatusLicensed case anyWord(s, "hiatus", "on hiatus", "en pause"): return 5 // StatusHiatus case anyWord(s, "cancelled", "canceled", "dropped", "abandonné", "заброшено"): return 6 // StatusCancelled } return 0 // StatusUnknown } // nextDataRe matches the JSON blob inside a NextJS __NEXT_DATA__ script tag. var nextDataRe = regexp.MustCompile(`]+id="__NEXT_DATA__"[^>]*>([\s\S]*?)`) // ExtractNextDataJSON extracts the JSON object from a NextJS __NEXT_DATA__ script tag. func ExtractNextDataJSON(html string) (json.RawMessage, error) { m := nextDataRe.FindStringSubmatch(html) if len(m) < 2 { return nil, nil } raw := strings.TrimSpace(m[1]) return json.RawMessage(raw), nil } // AbsURL resolves a potentially relative URL against a base URL string. func AbsURL(base, ref string) string { if ref == "" { return "" } if strings.HasPrefix(ref, "http://") || strings.HasPrefix(ref, "https://") { return ref } base = strings.TrimRight(base, "/") if strings.HasPrefix(ref, "/") { // absolute path — strip to origin if i := strings.Index(base[8:], "/"); i >= 0 { base = base[:8+i] } return base + ref } return base + "/" + ref } // ImgAttr returns the best image src from common lazy-loading data attributes. // Checks data-lazy-src, data-src, data-cfsrc, data-setbg, then falls back to src. func ImgAttr(attrs map[string]string, baseURL string) string { for _, key := range []string{"data-lazy-src", "data-src", "data-cfsrc", "data-setbg", "data-manga-src", "src"} { if v := attrs[key]; v != "" { return AbsURL(baseURL, v) } } return "" }