validation for the ENV epg

This commit is contained in:
2025-12-03 17:52:21 +01:00
parent 26dd1e911e
commit 77406ea301

View File

@@ -6,6 +6,7 @@ import (
"compress/gzip"
"context"
"encoding/json"
"encoding/xml"
"fmt"
"io"
"log"
@@ -78,7 +79,7 @@ func (c *Cache) Status() map[string]interface{} {
}
}
// fetchOnce downloads and writes it to destPath as-is.
// fetchOnce downloads a file and writes it to destPath as-is.
func fetchOnce(url, destPath string) (string, error) {
client := &http.Client{
Timeout: 30 * time.Second,
@@ -129,7 +130,86 @@ func fetchOnce(url, destPath string) (string, error) {
return contentType, nil
}
// fetchGunzipOnce downloads a gzipped file
// fetchAndValidateXML downloads CACHE_URL, validates XML structure and basic content,
// and only writes a file if it passes:
// / - well-formed XML
// / - at least one <channel> element
// / - at least one <programme> element
func fetchAndValidateXML(url, destPath string) (string, error) {
client := &http.Client{
Timeout: 60 * time.Second,
}
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return "", fmt.Errorf("creating request: %w", err)
}
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("performing request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
tmpPath := destPath + ".tmp"
f, err := os.Create(tmpPath)
if err != nil {
return "", fmt.Errorf("creating temp file: %w", err)
}
// Tee the response body into the file while parsing XML.
tee := io.TeeReader(resp.Body, f)
dec := xml.NewDecoder(tee)
var hasChannel, hasProgramme bool
for {
tok, errTok := dec.Token()
if errTok == io.EOF {
break
}
if errTok != nil {
_ = f.Close()
_ = os.Remove(tmpPath)
return "", fmt.Errorf("xml parse error: %w", errTok)
}
switch se := tok.(type) {
case xml.StartElement:
if se.Name.Local == "channel" {
hasChannel = true
}
if se.Name.Local == "programme" {
hasProgramme = true
}
}
}
if err := f.Close(); err != nil {
_ = os.Remove(tmpPath)
return "", fmt.Errorf("closing temp file: %w", err)
}
if !hasChannel || !hasProgramme {
_ = os.Remove(tmpPath)
return "", fmt.Errorf("invalid XML: missing channel or programme elements")
}
if err := os.Rename(tmpPath, destPath); err != nil {
_ = os.Remove(tmpPath)
return "", fmt.Errorf("renaming temp file: %w", err)
}
// This is validated XML.
return "text/xml; charset=utf-8", nil
}
// fetchGunzipOnce downloads a gzipped file, gunzips it, optionally transforms
// the XML, then gzips it again with an internal filename and writes it to destPath.
func fetchGunzipOnce(url, destPath, innerName string, transformer func(io.Reader, io.Writer) error) (string, error) {
client := &http.Client{
Timeout: 320 * time.Second,
@@ -200,7 +280,7 @@ func fetchGunzipOnce(url, destPath, innerName string, transformer func(io.Reader
return contentType, nil
}
// transformChannelRefs rewrites channel identifiers for "*1" endpoints.
// transformChannelRefs rewrites channel identifiers for "x1" endpoints.
// - <channel id="SBS.6.nl"> -> id="SBS6.nl"
// - <programme channel="SBS.6.nl"> -> channel="SBS6.nl"
func transformChannelRefs(r io.Reader, w io.Writer) error {
@@ -304,6 +384,32 @@ func startFetcher(ctx context.Context, cache *Cache, url string, okInterval, fai
}()
}
// startFetcherXML is a copy of startFetcher but uses fetchAndValidateXML for CACHE_URL.
func startFetcherXML(ctx context.Context, cache *Cache, url string, okInterval, failInterval time.Duration) {
go func() {
for {
contentType, err := fetchAndValidateXML(url, cache.filePath)
if err != nil {
log.Printf("error fetching/validating XML %s: %v", url, err)
}
cache.Update(contentType, err)
var nextInterval time.Duration
if err != nil || !cache.hasData {
nextInterval = failInterval
} else {
nextInterval = okInterval
}
select {
case <-ctx.Done():
return
case <-time.After(nextInterval):
}
}
}()
}
func startFetcherGunzip(ctx context.Context, cache *Cache, url string, okInterval, failInterval time.Duration, innerName string, transformer func(io.Reader, io.Writer) error) {
go func() {
for {
@@ -362,7 +468,7 @@ func makeGzipHandler(cache *Cache, downloadName string) http.HandlerFunc {
}
func main() {
// /chc still uses the original CACHE_URL behavior, for the set provider.
// /chc endp. and root primary use CACHE_URL.
url := os.Getenv("CACHE_URL")
if url == "" {
log.Fatal("CACHE_URL env var is required")
@@ -379,9 +485,9 @@ func main() {
tmpDir := os.TempDir()
chcCacheFilePath := filepath.Join(tmpDir, "cache_chc.xml")
// ES
es0CacheFilePath := filepath.Join(tmpDir, "cache_es0.xml.gz")
es1CacheFilePath := filepath.Join(tmpDir, "cache_es1.xml.gz")
// ALL
all0CacheFilePath := filepath.Join(tmpDir, "cache_all0.xml.gz")
all1CacheFilePath := filepath.Join(tmpDir, "cache_all1.xml.gz")
// BG
bg0CacheFilePath := filepath.Join(tmpDir, "cache_bg0.xml.gz")
@@ -409,8 +515,8 @@ func main() {
chcCache := &Cache{filePath: chcCacheFilePath}
es0Cache := &Cache{filePath: es0CacheFilePath}
es1Cache := &Cache{filePath: es1CacheFilePath}
all0Cache := &Cache{filePath: all0CacheFilePath}
all1Cache := &Cache{filePath: all1CacheFilePath}
bg0Cache := &Cache{filePath: bg0CacheFilePath}
bg1Cache := &Cache{filePath: bg1CacheFilePath}
@@ -433,23 +539,23 @@ func main() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// /chc: same behavior as the old root.
startFetcher(ctx, chcCache, url, okInterval, failInterval)
// /chc: primary provider, with XML validation.
startFetcherXML(ctx, chcCache, url, okInterval, failInterval)
// Base URLs for each region
const (
esURL = "https://epgshare01.online/epgshare01/epg_ripper_ALL_SOURCES1.xml.gz"
bgURL = "https://epgshare01.online/epgshare01/epg_ripper_BG1.xml.gz"
deURL = "https://epgshare01.online/epgshare01/epg_ripper_DE1.xml.gz"
frURL = "https://epgshare01.online/epgshare01/epg_ripper_FR1.xml.gz"
itURL = "https://epgshare01.online/epgshare01/epg_ripper_IT1.xml.gz"
nlURL = "https://epgshare01.online/epgshare01/epg_ripper_NL1.xml.gz"
ukURL = "https://epgshare01.online/epgshare01/epg_ripper_UK1.xml.gz"
allURL = "https://epgshare01.online/epgshare01/epg_ripper_ALL_SOURCES1.xml.gz"
bgURL = "https://epgshare01.online/epgshare01/epg_ripper_BG1.xml.gz"
deURL = "https://epgshare01.online/epgshare01/epg_ripper_DE1.xml.gz"
frURL = "https://epgshare01.online/epgshare01/epg_ripper_FR1.xml.gz"
itURL = "https://epgshare01.online/epgshare01/epg_ripper_IT1.xml.gz"
nlURL = "https://epgshare01.online/epgshare01/epg_ripper_NL1.xml.gz"
ukURL = "https://epgshare01.online/epgshare01/epg_ripper_UK1.xml.gz"
)
// ES: es0 (raw), es1 (undotted)
startFetcherGunzip(ctx, es0Cache, esURL, okInterval, failInterval, "es0.xml", nil)
startFetcherGunzip(ctx, es1Cache, esURL, okInterval, failInterval, "es1.xml", transformChannelRefs)
// ALL (formerly ES): all0 (raw), all1 (undotted)
startFetcherGunzip(ctx, all0Cache, allURL, okInterval, failInterval, "all0.xml", nil)
startFetcherGunzip(ctx, all1Cache, allURL, okInterval, failInterval, "all1.xml", transformChannelRefs)
// BG: bg0 (raw), bg1 (undotted)
startFetcherGunzip(ctx, bg0Cache, bgURL, okInterval, failInterval, "bg0.xml", nil)
@@ -477,13 +583,36 @@ func main() {
mux := http.NewServeMux()
// Root: always 401. Against the sniffers grabbing bandwidth
// Root: serve primary (chc) if valid; otherwise fall back to NL1 (undotted gz).
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("WWW-Authenticate", `Basic realm="restricted"`)
http.Error(w, "unauthorized", http.StatusUnauthorized)
// Try primary (chc).
if path, contentType, ok := chcCache.GetContent(); ok {
if contentType != "" {
w.Header().Set("Content-Type", contentType)
}
w.Header().Set("X-Root-Source", "primary_chc")
http.ServeFile(w, r, path)
return
}
// Fallback: NL1 (undotted).
if path, contentType, ok := nl1Cache.GetContent(); ok {
if contentType == "" {
contentType = "application/gzip"
}
w.Header().Set("Content-Type", contentType)
w.Header().Set("Content-Disposition", `attachment; filename="nl1.xml.gz"`)
w.Header().Set("X-Root-Source", "backup_nl1")
http.ServeFile(w, r, path)
return
}
// Nothing available.
w.Header().Set("X-Root-Source", "unavailable")
http.Error(w, "no valid primary or backup EPG available", http.StatusServiceUnavailable)
})
// /chc: original from CACHE_URL.
// /chc: original from CACHE_URL (validated XML).
mux.HandleFunc("/chc", func(w http.ResponseWriter, r *http.Request) {
path, contentType, ok := chcCache.GetContent()
if !ok {
@@ -497,9 +626,9 @@ func main() {
http.ServeFile(w, r, path)
})
// ES endpoints
mux.HandleFunc("/es0", makeGzipHandler(es0Cache, "es0.xml.gz"))
mux.HandleFunc("/es1", makeGzipHandler(es1Cache, "es1.xml.gz"))
// ALL endpoints (formerly ES)
mux.HandleFunc("/all0", makeGzipHandler(all0Cache, "all0.xml.gz"))
mux.HandleFunc("/all1", makeGzipHandler(all1Cache, "all1.xml.gz"))
// BG endpoints
mux.HandleFunc("/bg0", makeGzipHandler(bg0Cache, "bg0.xml.gz"))
@@ -527,11 +656,20 @@ func main() {
// Combined status endpoint.
mux.HandleFunc("/status", func(w http.ResponseWriter, r *http.Request) {
status := map[string]interface{}{
"chc": chcCache.Status(),
// Compute current root source for visibility.
rootSource := "unavailable"
if _, _, ok := chcCache.GetContent(); ok {
rootSource = "primary_chc"
} else if _, _, ok := nl1Cache.GetContent(); ok {
rootSource = "backup_nl1"
}
"es0": es0Cache.Status(),
"es1": es1Cache.Status(),
status := map[string]interface{}{
"root_source": rootSource,
"chc": chcCache.Status(),
"all0": all0Cache.Status(),
"all1": all1Cache.Status(),
"bg0": bg0Cache.Status(),
"bg1": bg1Cache.Status(),
@@ -562,8 +700,8 @@ func main() {
})
addr := ":" + port
log.Printf("Starting server on %s\n /chc -> %s\n EPG base URLs: ES:%s BG:%s DE:%s FR:%s IT:%s NL:%s UK:%s\n (ok interval: %s, fail interval: %s)",
addr, url, esURL, bgURL, deURL, frURL, itURL, nlURL, ukURL, okInterval, failInterval)
log.Printf("Starting server on %s\n /chc -> %s\n EPG base URLs: ALL:%s BG:%s DE:%s FR:%s IT:%s NL:%s UK:%s\n (ok interval: %s, fail interval: %s)",
addr, url, allURL, bgURL, deURL, frURL, itURL, nlURL, ukURL, okInterval, failInterval)
if err := http.ListenAndServe(addr, mux); err != nil {
log.Fatalf("server error: %v", err)