validation for the ENV epg

This commit is contained in:
2025-12-03 17:52:21 +01:00
parent 26dd1e911e
commit 77406ea301

View File

@@ -6,6 +6,7 @@ import (
"compress/gzip" "compress/gzip"
"context" "context"
"encoding/json" "encoding/json"
"encoding/xml"
"fmt" "fmt"
"io" "io"
"log" "log"
@@ -78,7 +79,7 @@ func (c *Cache) Status() map[string]interface{} {
} }
} }
// fetchOnce downloads and writes it to destPath as-is. // fetchOnce downloads a file and writes it to destPath as-is.
func fetchOnce(url, destPath string) (string, error) { func fetchOnce(url, destPath string) (string, error) {
client := &http.Client{ client := &http.Client{
Timeout: 30 * time.Second, Timeout: 30 * time.Second,
@@ -129,7 +130,86 @@ func fetchOnce(url, destPath string) (string, error) {
return contentType, nil return contentType, nil
} }
// fetchGunzipOnce downloads a gzipped file // fetchAndValidateXML downloads CACHE_URL, validates XML structure and basic content,
// and only writes a file if it passes:
// / - well-formed XML
// / - at least one <channel> element
// / - at least one <programme> element
func fetchAndValidateXML(url, destPath string) (string, error) {
client := &http.Client{
Timeout: 60 * time.Second,
}
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return "", fmt.Errorf("creating request: %w", err)
}
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("performing request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
tmpPath := destPath + ".tmp"
f, err := os.Create(tmpPath)
if err != nil {
return "", fmt.Errorf("creating temp file: %w", err)
}
// Tee the response body into the file while parsing XML.
tee := io.TeeReader(resp.Body, f)
dec := xml.NewDecoder(tee)
var hasChannel, hasProgramme bool
for {
tok, errTok := dec.Token()
if errTok == io.EOF {
break
}
if errTok != nil {
_ = f.Close()
_ = os.Remove(tmpPath)
return "", fmt.Errorf("xml parse error: %w", errTok)
}
switch se := tok.(type) {
case xml.StartElement:
if se.Name.Local == "channel" {
hasChannel = true
}
if se.Name.Local == "programme" {
hasProgramme = true
}
}
}
if err := f.Close(); err != nil {
_ = os.Remove(tmpPath)
return "", fmt.Errorf("closing temp file: %w", err)
}
if !hasChannel || !hasProgramme {
_ = os.Remove(tmpPath)
return "", fmt.Errorf("invalid XML: missing channel or programme elements")
}
if err := os.Rename(tmpPath, destPath); err != nil {
_ = os.Remove(tmpPath)
return "", fmt.Errorf("renaming temp file: %w", err)
}
// This is validated XML.
return "text/xml; charset=utf-8", nil
}
// fetchGunzipOnce downloads a gzipped file, gunzips it, optionally transforms
// the XML, then gzips it again with an internal filename and writes it to destPath.
func fetchGunzipOnce(url, destPath, innerName string, transformer func(io.Reader, io.Writer) error) (string, error) { func fetchGunzipOnce(url, destPath, innerName string, transformer func(io.Reader, io.Writer) error) (string, error) {
client := &http.Client{ client := &http.Client{
Timeout: 320 * time.Second, Timeout: 320 * time.Second,
@@ -200,7 +280,7 @@ func fetchGunzipOnce(url, destPath, innerName string, transformer func(io.Reader
return contentType, nil return contentType, nil
} }
// transformChannelRefs rewrites channel identifiers for "*1" endpoints. // transformChannelRefs rewrites channel identifiers for "x1" endpoints.
// - <channel id="SBS.6.nl"> -> id="SBS6.nl" // - <channel id="SBS.6.nl"> -> id="SBS6.nl"
// - <programme channel="SBS.6.nl"> -> channel="SBS6.nl" // - <programme channel="SBS.6.nl"> -> channel="SBS6.nl"
func transformChannelRefs(r io.Reader, w io.Writer) error { func transformChannelRefs(r io.Reader, w io.Writer) error {
@@ -304,6 +384,32 @@ func startFetcher(ctx context.Context, cache *Cache, url string, okInterval, fai
}() }()
} }
// startFetcherXML is a copy of startFetcher but uses fetchAndValidateXML for CACHE_URL.
func startFetcherXML(ctx context.Context, cache *Cache, url string, okInterval, failInterval time.Duration) {
go func() {
for {
contentType, err := fetchAndValidateXML(url, cache.filePath)
if err != nil {
log.Printf("error fetching/validating XML %s: %v", url, err)
}
cache.Update(contentType, err)
var nextInterval time.Duration
if err != nil || !cache.hasData {
nextInterval = failInterval
} else {
nextInterval = okInterval
}
select {
case <-ctx.Done():
return
case <-time.After(nextInterval):
}
}
}()
}
func startFetcherGunzip(ctx context.Context, cache *Cache, url string, okInterval, failInterval time.Duration, innerName string, transformer func(io.Reader, io.Writer) error) { func startFetcherGunzip(ctx context.Context, cache *Cache, url string, okInterval, failInterval time.Duration, innerName string, transformer func(io.Reader, io.Writer) error) {
go func() { go func() {
for { for {
@@ -362,7 +468,7 @@ func makeGzipHandler(cache *Cache, downloadName string) http.HandlerFunc {
} }
func main() { func main() {
// /chc still uses the original CACHE_URL behavior, for the set provider. // /chc endp. and root primary use CACHE_URL.
url := os.Getenv("CACHE_URL") url := os.Getenv("CACHE_URL")
if url == "" { if url == "" {
log.Fatal("CACHE_URL env var is required") log.Fatal("CACHE_URL env var is required")
@@ -379,9 +485,9 @@ func main() {
tmpDir := os.TempDir() tmpDir := os.TempDir()
chcCacheFilePath := filepath.Join(tmpDir, "cache_chc.xml") chcCacheFilePath := filepath.Join(tmpDir, "cache_chc.xml")
// ES // ALL
es0CacheFilePath := filepath.Join(tmpDir, "cache_es0.xml.gz") all0CacheFilePath := filepath.Join(tmpDir, "cache_all0.xml.gz")
es1CacheFilePath := filepath.Join(tmpDir, "cache_es1.xml.gz") all1CacheFilePath := filepath.Join(tmpDir, "cache_all1.xml.gz")
// BG // BG
bg0CacheFilePath := filepath.Join(tmpDir, "cache_bg0.xml.gz") bg0CacheFilePath := filepath.Join(tmpDir, "cache_bg0.xml.gz")
@@ -409,8 +515,8 @@ func main() {
chcCache := &Cache{filePath: chcCacheFilePath} chcCache := &Cache{filePath: chcCacheFilePath}
es0Cache := &Cache{filePath: es0CacheFilePath} all0Cache := &Cache{filePath: all0CacheFilePath}
es1Cache := &Cache{filePath: es1CacheFilePath} all1Cache := &Cache{filePath: all1CacheFilePath}
bg0Cache := &Cache{filePath: bg0CacheFilePath} bg0Cache := &Cache{filePath: bg0CacheFilePath}
bg1Cache := &Cache{filePath: bg1CacheFilePath} bg1Cache := &Cache{filePath: bg1CacheFilePath}
@@ -433,23 +539,23 @@ func main() {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
// /chc: same behavior as the old root. // /chc: primary provider, with XML validation.
startFetcher(ctx, chcCache, url, okInterval, failInterval) startFetcherXML(ctx, chcCache, url, okInterval, failInterval)
// Base URLs for each region // Base URLs for each region
const ( const (
esURL = "https://epgshare01.online/epgshare01/epg_ripper_ALL_SOURCES1.xml.gz" allURL = "https://epgshare01.online/epgshare01/epg_ripper_ALL_SOURCES1.xml.gz"
bgURL = "https://epgshare01.online/epgshare01/epg_ripper_BG1.xml.gz" bgURL = "https://epgshare01.online/epgshare01/epg_ripper_BG1.xml.gz"
deURL = "https://epgshare01.online/epgshare01/epg_ripper_DE1.xml.gz" deURL = "https://epgshare01.online/epgshare01/epg_ripper_DE1.xml.gz"
frURL = "https://epgshare01.online/epgshare01/epg_ripper_FR1.xml.gz" frURL = "https://epgshare01.online/epgshare01/epg_ripper_FR1.xml.gz"
itURL = "https://epgshare01.online/epgshare01/epg_ripper_IT1.xml.gz" itURL = "https://epgshare01.online/epgshare01/epg_ripper_IT1.xml.gz"
nlURL = "https://epgshare01.online/epgshare01/epg_ripper_NL1.xml.gz" nlURL = "https://epgshare01.online/epgshare01/epg_ripper_NL1.xml.gz"
ukURL = "https://epgshare01.online/epgshare01/epg_ripper_UK1.xml.gz" ukURL = "https://epgshare01.online/epgshare01/epg_ripper_UK1.xml.gz"
) )
// ES: es0 (raw), es1 (undotted) // ALL (formerly ES): all0 (raw), all1 (undotted)
startFetcherGunzip(ctx, es0Cache, esURL, okInterval, failInterval, "es0.xml", nil) startFetcherGunzip(ctx, all0Cache, allURL, okInterval, failInterval, "all0.xml", nil)
startFetcherGunzip(ctx, es1Cache, esURL, okInterval, failInterval, "es1.xml", transformChannelRefs) startFetcherGunzip(ctx, all1Cache, allURL, okInterval, failInterval, "all1.xml", transformChannelRefs)
// BG: bg0 (raw), bg1 (undotted) // BG: bg0 (raw), bg1 (undotted)
startFetcherGunzip(ctx, bg0Cache, bgURL, okInterval, failInterval, "bg0.xml", nil) startFetcherGunzip(ctx, bg0Cache, bgURL, okInterval, failInterval, "bg0.xml", nil)
@@ -477,13 +583,36 @@ func main() {
mux := http.NewServeMux() mux := http.NewServeMux()
// Root: always 401. Against the sniffers grabbing bandwidth // Root: serve primary (chc) if valid; otherwise fall back to NL1 (undotted gz).
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("WWW-Authenticate", `Basic realm="restricted"`) // Try primary (chc).
http.Error(w, "unauthorized", http.StatusUnauthorized) if path, contentType, ok := chcCache.GetContent(); ok {
if contentType != "" {
w.Header().Set("Content-Type", contentType)
}
w.Header().Set("X-Root-Source", "primary_chc")
http.ServeFile(w, r, path)
return
}
// Fallback: NL1 (undotted).
if path, contentType, ok := nl1Cache.GetContent(); ok {
if contentType == "" {
contentType = "application/gzip"
}
w.Header().Set("Content-Type", contentType)
w.Header().Set("Content-Disposition", `attachment; filename="nl1.xml.gz"`)
w.Header().Set("X-Root-Source", "backup_nl1")
http.ServeFile(w, r, path)
return
}
// Nothing available.
w.Header().Set("X-Root-Source", "unavailable")
http.Error(w, "no valid primary or backup EPG available", http.StatusServiceUnavailable)
}) })
// /chc: original from CACHE_URL. // /chc: original from CACHE_URL (validated XML).
mux.HandleFunc("/chc", func(w http.ResponseWriter, r *http.Request) { mux.HandleFunc("/chc", func(w http.ResponseWriter, r *http.Request) {
path, contentType, ok := chcCache.GetContent() path, contentType, ok := chcCache.GetContent()
if !ok { if !ok {
@@ -497,9 +626,9 @@ func main() {
http.ServeFile(w, r, path) http.ServeFile(w, r, path)
}) })
// ES endpoints // ALL endpoints (formerly ES)
mux.HandleFunc("/es0", makeGzipHandler(es0Cache, "es0.xml.gz")) mux.HandleFunc("/all0", makeGzipHandler(all0Cache, "all0.xml.gz"))
mux.HandleFunc("/es1", makeGzipHandler(es1Cache, "es1.xml.gz")) mux.HandleFunc("/all1", makeGzipHandler(all1Cache, "all1.xml.gz"))
// BG endpoints // BG endpoints
mux.HandleFunc("/bg0", makeGzipHandler(bg0Cache, "bg0.xml.gz")) mux.HandleFunc("/bg0", makeGzipHandler(bg0Cache, "bg0.xml.gz"))
@@ -527,11 +656,20 @@ func main() {
// Combined status endpoint. // Combined status endpoint.
mux.HandleFunc("/status", func(w http.ResponseWriter, r *http.Request) { mux.HandleFunc("/status", func(w http.ResponseWriter, r *http.Request) {
status := map[string]interface{}{ // Compute current root source for visibility.
"chc": chcCache.Status(), rootSource := "unavailable"
if _, _, ok := chcCache.GetContent(); ok {
rootSource = "primary_chc"
} else if _, _, ok := nl1Cache.GetContent(); ok {
rootSource = "backup_nl1"
}
"es0": es0Cache.Status(), status := map[string]interface{}{
"es1": es1Cache.Status(), "root_source": rootSource,
"chc": chcCache.Status(),
"all0": all0Cache.Status(),
"all1": all1Cache.Status(),
"bg0": bg0Cache.Status(), "bg0": bg0Cache.Status(),
"bg1": bg1Cache.Status(), "bg1": bg1Cache.Status(),
@@ -562,8 +700,8 @@ func main() {
}) })
addr := ":" + port addr := ":" + port
log.Printf("Starting server on %s\n /chc -> %s\n EPG base URLs: ES:%s BG:%s DE:%s FR:%s IT:%s NL:%s UK:%s\n (ok interval: %s, fail interval: %s)", log.Printf("Starting server on %s\n /chc -> %s\n EPG base URLs: ALL:%s BG:%s DE:%s FR:%s IT:%s NL:%s UK:%s\n (ok interval: %s, fail interval: %s)",
addr, url, esURL, bgURL, deURL, frURL, itURL, nlURL, ukURL, okInterval, failInterval) addr, url, allURL, bgURL, deURL, frURL, itURL, nlURL, ukURL, okInterval, failInterval)
if err := http.ListenAndServe(addr, mux); err != nil { if err := http.ListenAndServe(addr, mux); err != nil {
log.Fatalf("server error: %v", err) log.Fatalf("server error: %v", err)