#!/usr/bin/env python3 """Build genre cache from beets database + Essentia audio features. Run daily via cron. Genre-level energy scores are computed from the mean Essentia energy of all analysed tracks in that genre. Unanalysed tracks fall back to their genre average, or 5 (neutral) if the genre has no analysed tracks yet. """ import json import os import sqlite3 import sys import time BEETS_DB = "/home/susan/.config/beets/library.db" FEATURES_DB = "/var/lib/radio/audio_features.db" CACHE_PATH = "/var/lib/radio/genre_cache.json" NEUTRAL_ENERGY = 5 # Genre → broad group (for DJ filtering / display) GROUP_KEYWORDS = { "chill": ["ambient", "downtempo", "chill", "drone", "new age", "lounge", "easy listening", "sleep", "meditation", "minimal"], "jazz": ["jazz", "bossa nova", "bebop", "swing", "fusion"], "classical": ["classical", "chamber", "orchestral", "piano", "baroque", "romantic"], "electronic": ["techno", "house", "trance", "electro", "electronic", "idm", "ebm", "synth", "synthwave"], "bass": ["dubstep", "drum and bass", "dnb", "jungle", "breakbeat", "garage", "uk garage", "dub", "bass"], "rock": ["rock", "punk", "metal", "shoegaze", "post rock", "post-rock", "grunge", "alternative", "indie rock", "noise rock"], "hiphop": ["hip hop", "hip-hop", "rap", "trap", "boom bap", "grime"], "soul": ["soul", "r&b", "rnb", "funk", "disco", "motown", "gospel", "neo soul"], "folk": ["folk", "acoustic", "singer-songwriter", "country", "bluegrass", "americana"], "pop": ["pop", "synth pop", "synthpop", "dream pop", "new wave", "art pop", "indie pop"], "world": ["afrobeat", "reggae", "latin", "world", "flamenco", "samba", "cumbia"], "heavy": ["industrial", "noise", "hardcore", "metal", "thrash", "grindcore", "power electronics"], "experimental": ["experimental", "avant-garde", "musique concrete", "free improvisation"], } def normalize_genre(g): return g.lower().replace("-", " ").replace("_", " ").strip() def essentia_to_scale(e): """Convert Essentia normalized energy (0.0–1.0) to 1–10 integer scale.""" return max(1, min(10, round(e * 9 + 1))) def find_group(genre): g = normalize_genre(genre) for group, keywords in GROUP_KEYWORDS.items(): for kw in keywords: if kw in g or g in kw: return group return "other" def load_essentia_features(): """Load path → normalized energy from audio_features.db.""" if not os.path.exists(FEATURES_DB): print(f"Warning: features DB not found at {FEATURES_DB}, all tracks will get energy {NEUTRAL_ENERGY}", file=sys.stderr) return {} try: conn = sqlite3.connect(f"file:{FEATURES_DB}?mode=ro", uri=True) rows = conn.execute("SELECT path, energy FROM features").fetchall() conn.close() return {row[0]: row[1] for row in rows} except Exception as e: print(f"Warning: failed to read features DB: {e}", file=sys.stderr) return {} def main(): if not os.path.exists(BEETS_DB): print(f"Error: beets DB not found at {BEETS_DB}", file=sys.stderr) sys.exit(1) essentia_energies = load_essentia_features() print(f"Loaded Essentia features for {len(essentia_energies)} tracks") conn = sqlite3.connect(f"file:{BEETS_DB}?mode=ro", uri=True) conn.row_factory = sqlite3.Row cur = conn.cursor() cur.execute("SELECT path, title, artist, album, albumartist, genre FROM items") # First pass: collect per-track data, accumulate Essentia energies per genre track_data = {} genre_energies = {} # genre → [list of 1–10 scores from analysed tracks] analysed_count = 0 for row in cur: path = row["path"] if isinstance(path, bytes): try: path = path.decode("utf-8") except UnicodeDecodeError: continue genre_raw = row["genre"] or "" genre = normalize_genre(genre_raw) if genre_raw else "misc" if path in essentia_energies: energy = essentia_to_scale(essentia_energies[path]) analysed_count += 1 genre_energies.setdefault(genre, []).append(energy) else: energy = None # resolve in second pass track_data[path] = { "genre": genre, "genre_raw": genre_raw, "energy": energy, "artist": row["artist"] or "", "album": row["album"] or "", "title": row["title"] or "", } conn.close() # Compute genre averages from Essentia data genre_avg_energy = {} for genre, scores in genre_energies.items(): genre_avg_energy[genre] = round(sum(scores) / len(scores)) print(f"Genre energies computed from Essentia for {len(genre_avg_energy)} genres") # Second pass: fill unanalysed tracks with genre average or neutral fallback_genre_avg = 0 fallback_neutral = 0 for info in track_data.values(): if info["energy"] is None: if info["genre"] in genre_avg_energy: info["energy"] = genre_avg_energy[info["genre"]] fallback_genre_avg += 1 else: info["energy"] = NEUTRAL_ENERGY fallback_neutral += 1 # Build genre summary all_genres = set(info["genre"] for info in track_data.values()) genres_out = {} for genre in all_genres: genres_out[genre] = { "energy": genre_avg_energy.get(genre, NEUTRAL_ENERGY), "group": find_group(genre), "analysed_tracks": len(genre_energies.get(genre, [])), } # Build output tracks tracks_out = { path: { "genre": info["genre"], "energy": info["energy"], "artist": info["artist"], "album": info["album"], "title": info["title"], } for path, info in track_data.items() } cache = { "genres": genres_out, "tracks": tracks_out, "built_at": time.time(), "stats": { "total_tracks": len(tracks_out), "essentia_analysed": analysed_count, "fallback_genre_avg": fallback_genre_avg, "fallback_neutral": fallback_neutral, "genres_with_essentia": len(genre_energies), }, } tmp = CACHE_PATH + ".tmp" with open(tmp, "w") as f: json.dump(cache, f) os.replace(tmp, CACHE_PATH) print(f"Built genre cache: {len(tracks_out)} tracks, {len(genres_out)} genres") print(f" Essentia energy: {analysed_count} tracks") print(f" Genre avg fallback: {fallback_genre_avg} tracks") print(f" Neutral ({NEUTRAL_ENERGY}) fallback: {fallback_neutral} tracks") if __name__ == "__main__": main()