summaryrefslogtreecommitdiff
path: root/build_genre_cache.py
diff options
context:
space:
mode:
authorCaine <caine@jihakuz.xyz>2026-03-07 12:52:55 +0000
committerCaine <caine@jihakuz.xyz>2026-03-07 12:52:55 +0000
commit01682c62c2c9ea2f7f498544ee3aaa299c0c2423 (patch)
tree82a86298eeed90bfba253bf03a4b3393b182b491 /build_genre_cache.py
Initial commit: Radio Susan scripts, configs, and SFX
Diffstat (limited to 'build_genre_cache.py')
-rwxr-xr-xbuild_genre_cache.py185
1 files changed, 185 insertions, 0 deletions
diff --git a/build_genre_cache.py b/build_genre_cache.py
new file mode 100755
index 0000000..fb9ace3
--- /dev/null
+++ b/build_genre_cache.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python3
+"""Build genre cache from beets database + Essentia audio features. Run daily via cron.
+
+Genre-level energy scores are computed from the mean Essentia energy of all
+analysed tracks in that genre. Unanalysed tracks fall back to their genre
+average, or 5 (neutral) if the genre has no analysed tracks yet.
+"""
+
+import json
+import os
+import sqlite3
+import sys
+import time
+
+BEETS_DB = "/home/susan/.config/beets/library.db"
+FEATURES_DB = "/var/lib/radio/audio_features.db"
+CACHE_PATH = "/var/lib/radio/genre_cache.json"
+
+NEUTRAL_ENERGY = 5
+
+# Genre → broad group (for DJ filtering / display)
+GROUP_KEYWORDS = {
+ "chill": ["ambient", "downtempo", "chill", "drone", "new age", "lounge", "easy listening", "sleep", "meditation", "minimal"],
+ "jazz": ["jazz", "bossa nova", "bebop", "swing", "fusion"],
+ "classical": ["classical", "chamber", "orchestral", "piano", "baroque", "romantic"],
+ "electronic": ["techno", "house", "trance", "electro", "electronic", "idm", "ebm", "synth", "synthwave"],
+ "bass": ["dubstep", "drum and bass", "dnb", "jungle", "breakbeat", "garage", "uk garage", "dub", "bass"],
+ "rock": ["rock", "punk", "metal", "shoegaze", "post rock", "post-rock", "grunge", "alternative", "indie rock", "noise rock"],
+ "hiphop": ["hip hop", "hip-hop", "rap", "trap", "boom bap", "grime"],
+ "soul": ["soul", "r&b", "rnb", "funk", "disco", "motown", "gospel", "neo soul"],
+ "folk": ["folk", "acoustic", "singer-songwriter", "country", "bluegrass", "americana"],
+ "pop": ["pop", "synth pop", "synthpop", "dream pop", "new wave", "art pop", "indie pop"],
+ "world": ["afrobeat", "reggae", "latin", "world", "flamenco", "samba", "cumbia"],
+ "heavy": ["industrial", "noise", "hardcore", "metal", "thrash", "grindcore", "power electronics"],
+ "experimental": ["experimental", "avant-garde", "musique concrete", "free improvisation"],
+}
+
+
+def normalize_genre(g):
+ return g.lower().replace("-", " ").replace("_", " ").strip()
+
+
+def essentia_to_scale(e):
+ """Convert Essentia normalized energy (0.0–1.0) to 1–10 integer scale."""
+ return max(1, min(10, round(e * 9 + 1)))
+
+
+def find_group(genre):
+ g = normalize_genre(genre)
+ for group, keywords in GROUP_KEYWORDS.items():
+ for kw in keywords:
+ if kw in g or g in kw:
+ return group
+ return "other"
+
+
+def load_essentia_features():
+ """Load path → normalized energy from audio_features.db."""
+ if not os.path.exists(FEATURES_DB):
+ print(f"Warning: features DB not found at {FEATURES_DB}, all tracks will get energy {NEUTRAL_ENERGY}", file=sys.stderr)
+ return {}
+ try:
+ conn = sqlite3.connect(f"file:{FEATURES_DB}?mode=ro", uri=True)
+ rows = conn.execute("SELECT path, energy FROM features").fetchall()
+ conn.close()
+ return {row[0]: row[1] for row in rows}
+ except Exception as e:
+ print(f"Warning: failed to read features DB: {e}", file=sys.stderr)
+ return {}
+
+
+def main():
+ if not os.path.exists(BEETS_DB):
+ print(f"Error: beets DB not found at {BEETS_DB}", file=sys.stderr)
+ sys.exit(1)
+
+ essentia_energies = load_essentia_features()
+ print(f"Loaded Essentia features for {len(essentia_energies)} tracks")
+
+ conn = sqlite3.connect(f"file:{BEETS_DB}?mode=ro", uri=True)
+ conn.row_factory = sqlite3.Row
+ cur = conn.cursor()
+ cur.execute("SELECT path, title, artist, album, albumartist, genre FROM items")
+
+ # First pass: collect per-track data, accumulate Essentia energies per genre
+ track_data = {}
+ genre_energies = {} # genre → [list of 1–10 scores from analysed tracks]
+ analysed_count = 0
+
+ for row in cur:
+ path = row["path"]
+ if isinstance(path, bytes):
+ try:
+ path = path.decode("utf-8")
+ except UnicodeDecodeError:
+ continue
+
+ genre_raw = row["genre"] or ""
+ genre = normalize_genre(genre_raw) if genre_raw else "misc"
+
+ if path in essentia_energies:
+ energy = essentia_to_scale(essentia_energies[path])
+ analysed_count += 1
+ genre_energies.setdefault(genre, []).append(energy)
+ else:
+ energy = None # resolve in second pass
+
+ track_data[path] = {
+ "genre": genre,
+ "genre_raw": genre_raw,
+ "energy": energy,
+ "artist": row["artist"] or "",
+ "album": row["album"] or "",
+ "title": row["title"] or "",
+ }
+
+ conn.close()
+
+ # Compute genre averages from Essentia data
+ genre_avg_energy = {}
+ for genre, scores in genre_energies.items():
+ genre_avg_energy[genre] = round(sum(scores) / len(scores))
+
+ print(f"Genre energies computed from Essentia for {len(genre_avg_energy)} genres")
+
+ # Second pass: fill unanalysed tracks with genre average or neutral
+ fallback_genre_avg = 0
+ fallback_neutral = 0
+ for info in track_data.values():
+ if info["energy"] is None:
+ if info["genre"] in genre_avg_energy:
+ info["energy"] = genre_avg_energy[info["genre"]]
+ fallback_genre_avg += 1
+ else:
+ info["energy"] = NEUTRAL_ENERGY
+ fallback_neutral += 1
+
+ # Build genre summary
+ all_genres = set(info["genre"] for info in track_data.values())
+ genres_out = {}
+ for genre in all_genres:
+ genres_out[genre] = {
+ "energy": genre_avg_energy.get(genre, NEUTRAL_ENERGY),
+ "group": find_group(genre),
+ "analysed_tracks": len(genre_energies.get(genre, [])),
+ }
+
+ # Build output tracks
+ tracks_out = {
+ path: {
+ "genre": info["genre"],
+ "energy": info["energy"],
+ "artist": info["artist"],
+ "album": info["album"],
+ "title": info["title"],
+ }
+ for path, info in track_data.items()
+ }
+
+ cache = {
+ "genres": genres_out,
+ "tracks": tracks_out,
+ "built_at": time.time(),
+ "stats": {
+ "total_tracks": len(tracks_out),
+ "essentia_analysed": analysed_count,
+ "fallback_genre_avg": fallback_genre_avg,
+ "fallback_neutral": fallback_neutral,
+ "genres_with_essentia": len(genre_energies),
+ },
+ }
+
+ tmp = CACHE_PATH + ".tmp"
+ with open(tmp, "w") as f:
+ json.dump(cache, f)
+ os.replace(tmp, CACHE_PATH)
+
+ print(f"Built genre cache: {len(tracks_out)} tracks, {len(genres_out)} genres")
+ print(f" Essentia energy: {analysed_count} tracks")
+ print(f" Genre avg fallback: {fallback_genre_avg} tracks")
+ print(f" Neutral ({NEUTRAL_ENERGY}) fallback: {fallback_neutral} tracks")
+
+
+if __name__ == "__main__":
+ main() \ No newline at end of file