summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCaine <susan@tomflux.xyz>2026-02-15 09:41:49 +0000
committerCaine <susan@tomflux.xyz>2026-02-15 09:41:49 +0000
commitc7956ae9b228054d57897ea338ad4154cc0b7221 (patch)
treea1f517ea452dddf34468c01c85871358e8b7295e
Initial commit: susan automation scriptsHEADmaster
Overnight transcoding, music discovery/import, system health reports, stats page generator, and bookmark management. Secrets stored in /etc/automation/ — not in repo.
-rw-r--r--.gitignore14
-rw-r--r--README.md23
-rw-r--r--add_bookmark.js76
-rwxr-xr-xadd_bookmark_to_wishlist.js97
-rw-r--r--decrypt_bookmarks.js34
-rw-r--r--generate_stats_page.py285
-rwxr-xr-ximport_music.sh131
-rw-r--r--morning_report.py438
-rw-r--r--music_config.example.json15
-rwxr-xr-xmusic_recommender.py1038
-rwxr-xr-xovernight_transcoder.py1245
-rw-r--r--scrape_discogs_labels.py183
-rwxr-xr-xtranscode_album.sh81
13 files changed, 3660 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..00150fb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,14 @@
+# Secrets & config with credentials
+music_config.json
+music_state.json
+bookmarks.json
+
+# Generated data
+discogs_labels.json
+__pycache__/
+*.pyc
+
+# Editor
+*.swp
+*.swo
+*~
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b1a2f9b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,23 @@
+# susan-scripts
+
+Automation scripts for Susan (home server). Managed by Caine.
+
+## Scripts
+
+| Script | Purpose |
+|--------|---------|
+| `overnight_transcoder.py` | Nightly HEVC transcoding of video library |
+| `morning_report.py` | Daily system health report (email) |
+| `music_recommender.py` | Last.fm-based music discovery + Soulseek download |
+| `import_music.sh` | FLAC→Opus transcoding + beets tagging for new albums |
+| `transcode_album.sh` | Manual album transcode helper |
+| `scrape_discogs_labels.py` | Scrape Discogs labels for music pipeline |
+| `generate_stats_page.py` | Retro stats page generator |
+| `decrypt_bookmarks.js` | Floccus bookmark decryption |
+| `add_bookmark.js` | Add bookmark to Floccus XBEL |
+| `add_bookmark_to_wishlist.js` | Add bookmark to wishlist folder |
+
+## Setup
+
+- Copy `music_config.example.json` to `music_config.json` and fill in credentials.
+- Bookmark scripts read password from `/etc/automation/bookmarks.json`.
diff --git a/add_bookmark.js b/add_bookmark.js
new file mode 100644
index 0000000..9a68789
--- /dev/null
+++ b/add_bookmark.js
@@ -0,0 +1,76 @@
+#!/usr/bin/env node
+// Add a bookmark to Floccus encrypted bookmarks
+// Usage: node add_bookmark.js <url> <title>
+
+const crypto = require('crypto');
+const fs = require('fs');
+
+const BOOKMARKS_PATH = '/var/www/webdav/bookmarks.xbel';
+const PASSWORD = process.env.BOOKMARKS_PASSWORD || JSON.parse(require('fs').readFileSync('/etc/automation/bookmarks.json', 'utf8')).password;
+
+const url = process.argv[2];
+const title = process.argv[3];
+
+if (!url || !title) {
+ console.log('Usage: node add_bookmark.js <url> <title>');
+ process.exit(1);
+}
+
+function decrypt(data, password) {
+ const ciphertext = Buffer.from(data.ciphertext, 'base64');
+ const salt = data.salt;
+ const key = crypto.pbkdf2Sync(password, salt, 250000, 32, 'sha256');
+ const iv = ciphertext.slice(0, 16);
+ const encrypted = ciphertext.slice(16, -16);
+ const tag = ciphertext.slice(-16);
+
+ const decipher = crypto.createDecipheriv('aes-256-gcm', key, iv);
+ decipher.setAuthTag(tag);
+ return Buffer.concat([decipher.update(encrypted), decipher.final()]).toString('utf8');
+}
+
+function encrypt(plaintext, password, salt) {
+ const key = crypto.pbkdf2Sync(password, salt, 250000, 32, 'sha256');
+ const iv = crypto.randomBytes(16);
+
+ const cipher = crypto.createCipheriv('aes-256-gcm', key, iv);
+ const encrypted = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]);
+ const tag = cipher.getAuthTag();
+
+ const combined = Buffer.concat([iv, encrypted, tag]);
+ return combined.toString('base64');
+}
+
+// Read and decrypt
+const data = JSON.parse(fs.readFileSync(BOOKMARKS_PATH, 'utf8'));
+let xml = decrypt(data, PASSWORD);
+
+// Find highest ID
+const idMatch = xml.match(/highestId :(\d+):/);
+let highestId = idMatch ? parseInt(idMatch[1]) : 100;
+const newId = highestId + 1;
+
+// Escape XML entities
+const escapeXml = (str) => str
+ .replace(/&/g, '&amp;')
+ .replace(/</g, '&lt;')
+ .replace(/>/g, '&gt;')
+ .replace(/"/g, '&quot;')
+ .replace(/'/g, '&apos;');
+
+// Add bookmark
+const newBookmark = `<bookmark href="${escapeXml(url)}" id="${newId}">
+ <title>${escapeXml(title)}</title>
+ </bookmark>
+</xbel>`;
+
+xml = xml.replace(/highestId :(\d+):/, `highestId :${newId}:`);
+xml = xml.replace('</xbel>', newBookmark);
+
+// Encrypt and save
+const newCiphertext = encrypt(xml, PASSWORD, data.salt);
+fs.writeFileSync(BOOKMARKS_PATH, JSON.stringify({ ciphertext: newCiphertext, salt: data.salt }));
+
+console.log(`Added bookmark: ${title}`);
+console.log(`URL: ${url}`);
+console.log(`ID: ${newId}`);
diff --git a/add_bookmark_to_wishlist.js b/add_bookmark_to_wishlist.js
new file mode 100755
index 0000000..a488174
--- /dev/null
+++ b/add_bookmark_to_wishlist.js
@@ -0,0 +1,97 @@
+#!/usr/bin/env node
+// Add a bookmark to Music/Wishlist folder in Floccus bookmarks
+// Usage: node add_bookmark_to_wishlist.js <url> <title>
+
+const crypto = require('crypto');
+const fs = require('fs');
+
+const BOOKMARKS_PATH = '/var/www/webdav/bookmarks.xbel';
+const PASSWORD = process.env.BOOKMARKS_PASSWORD || JSON.parse(require('fs').readFileSync('/etc/automation/bookmarks.json', 'utf8')).password;
+const WISHLIST_FOLDER_TITLE = 'Wishlist';
+
+const url = process.argv[2];
+const title = process.argv[3];
+
+if (!url || !title) {
+ console.error('Usage: node add_bookmark_to_wishlist.js <url> <title>');
+ process.exit(1);
+}
+
+function decrypt(data, password) {
+ const ciphertext = Buffer.from(data.ciphertext, 'base64');
+ const salt = data.salt;
+ const key = crypto.pbkdf2Sync(password, salt, 250000, 32, 'sha256');
+ const iv = ciphertext.slice(0, 16);
+ const encrypted = ciphertext.slice(16, -16);
+ const tag = ciphertext.slice(-16);
+
+ const decipher = crypto.createDecipheriv('aes-256-gcm', key, iv);
+ decipher.setAuthTag(tag);
+ return Buffer.concat([decipher.update(encrypted), decipher.final()]).toString('utf8');
+}
+
+function encrypt(plaintext, password, salt) {
+ const key = crypto.pbkdf2Sync(password, salt, 250000, 32, 'sha256');
+ const iv = crypto.randomBytes(16);
+
+ const cipher = crypto.createCipheriv('aes-256-gcm', key, iv);
+ const encrypted = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]);
+ const tag = cipher.getAuthTag();
+
+ return Buffer.concat([iv, encrypted, tag]).toString('base64');
+}
+
+const escapeXml = (str) => str
+ .replace(/&/g, '&amp;')
+ .replace(/</g, '&lt;')
+ .replace(/>/g, '&gt;')
+ .replace(/"/g, '&quot;')
+ .replace(/'/g, '&apos;');
+
+try {
+ const data = JSON.parse(fs.readFileSync(BOOKMARKS_PATH, 'utf8'));
+ let xml = decrypt(data, PASSWORD);
+
+ // Check if bookmark already exists
+ if (xml.includes(escapeXml(url)) || xml.includes(url)) {
+ console.log('Bookmark already exists');
+ process.exit(0);
+ }
+
+ // Find highest ID
+ const idMatch = xml.match(/highestId :(\d+):/);
+ let highestId = idMatch ? parseInt(idMatch[1]) : 100;
+ const newId = highestId + 1;
+
+ // Find the Wishlist folder and add bookmark inside it
+ const wishlistPattern = /<folder id="\d+">\s*<title>Wishlist<\/title>/;
+
+ if (wishlistPattern.test(xml)) {
+ // Add bookmark inside existing Wishlist folder
+ const newBookmark = `<bookmark href="${escapeXml(url)}" id="${newId}">
+ <title>${escapeXml(title)}</title>
+ </bookmark>
+ </folder>`;
+
+ // Find Wishlist folder's closing tag and insert before it
+ xml = xml.replace(
+ /(<folder id="\d+">\s*<title>Wishlist<\/title>[\s\S]*?)(<\/folder>)/,
+ (match, folderContent, closingTag) => folderContent + newBookmark
+ );
+ } else {
+ console.error('Wishlist folder not found');
+ process.exit(1);
+ }
+
+ // Update highest ID
+ xml = xml.replace(/highestId :(\d+):/, `highestId :${newId}:`);
+
+ // Save
+ const newCiphertext = encrypt(xml, PASSWORD, data.salt);
+ fs.writeFileSync(BOOKMARKS_PATH, JSON.stringify({ ciphertext: newCiphertext, salt: data.salt }));
+
+ console.log(`Added: ${title}`);
+} catch (e) {
+ console.error('Error:', e.message);
+ process.exit(1);
+}
diff --git a/decrypt_bookmarks.js b/decrypt_bookmarks.js
new file mode 100644
index 0000000..e73c4e4
--- /dev/null
+++ b/decrypt_bookmarks.js
@@ -0,0 +1,34 @@
+#!/usr/bin/env node
+// Decrypt Floccus bookmarks
+// Usage: node decrypt_bookmarks.js [password]
+
+const crypto = require('crypto');
+const fs = require('fs');
+
+const BOOKMARKS_PATH = '/var/www/webdav/bookmarks.xbel';
+const password = process.argv[2] || process.env.BOOKMARKS_PASSWORD || JSON.parse(require('fs').readFileSync('/etc/automation/bookmarks.json', 'utf8')).password;
+
+const data = JSON.parse(fs.readFileSync(BOOKMARKS_PATH, 'utf8'));
+const ciphertext = Buffer.from(data.ciphertext, 'base64');
+const salt = data.salt; // Floccus uses salt as UTF-8 string, not hex-decoded
+
+// Floccus encryption: PBKDF2-SHA256, 250000 iterations
+const key = crypto.pbkdf2Sync(password, salt, 250000, 32, 'sha256');
+
+// 16-byte IV at start, then ciphertext, then 16-byte GCM tag at end
+const iv = ciphertext.slice(0, 16);
+const encrypted = ciphertext.slice(16, -16);
+const tag = ciphertext.slice(-16);
+
+try {
+ const decipher = crypto.createDecipheriv('aes-256-gcm', key, iv);
+ decipher.setAuthTag(tag);
+
+ let decrypted = decipher.update(encrypted);
+ decrypted = Buffer.concat([decrypted, decipher.final()]);
+
+ console.log(decrypted.toString('utf8'));
+} catch (e) {
+ console.error('Decryption failed:', e.message);
+ process.exit(1);
+}
diff --git a/generate_stats_page.py b/generate_stats_page.py
new file mode 100644
index 0000000..efbd423
--- /dev/null
+++ b/generate_stats_page.py
@@ -0,0 +1,285 @@
+#!/usr/bin/env python3
+"""Generate a retro stats page for Susan.
+
+90s CS professor homepage aesthetic. Pure HTML, no JS.
+Regenerate periodically via cron.
+"""
+
+import datetime
+import os
+import subprocess
+import re
+
+OUTPUT = "/var/www/webdav/obsidian/stats.html"
+
+def cmd(c):
+ try:
+ return subprocess.check_output(c, shell=True, stderr=subprocess.DEVNULL, timeout=10).decode().strip()
+ except:
+ return ""
+
+def get_uptime():
+ up = cmd("uptime -p")
+ return up.replace("up ", "") if up else "unknown"
+
+def get_uptime_since():
+ return cmd("uptime -s")
+
+def get_load():
+ load = cmd("cat /proc/loadavg")
+ return load.split()[:3] if load else ["?", "?", "?"]
+
+def get_memory():
+ mem = cmd("free -h | grep Mem")
+ parts = mem.split()
+ if len(parts) >= 7:
+ return {"total": parts[1], "used": parts[2], "free": parts[3], "available": parts[6]}
+ return {"total": "?", "used": "?", "free": "?", "available": "?"}
+
+def get_disk():
+ disks = []
+ for line in cmd("df -h /disks /home 2>/dev/null").split("\n")[1:]:
+ parts = line.split()
+ if len(parts) >= 6:
+ disks.append({"fs": parts[0], "size": parts[1], "used": parts[2], "avail": parts[3], "pct": parts[4], "mount": parts[5]})
+ return disks
+
+def get_services():
+ services = []
+ lines = cmd("systemctl list-units --type=service --state=running --no-pager --no-legend").split("\n")
+ targets = ["jellyfin", "navidrome", "qbittorrent", "sonarr", "radarr", "lidarr",
+ "readarr", "prowlarr", "slskd", "nginx", "audiobookshelf", "openclaw-gateway"]
+ for line in lines:
+ for t in targets:
+ if t in line.lower():
+ name = line.split()[0].replace(".service", "")
+ services.append(name)
+ return sorted(services)
+
+def get_media_counts():
+ films = int(cmd("find /disks/Plex/Films -maxdepth 1 -type d | wc -l") or 1) - 1
+ tv = int(cmd("find /disks/Plex/TV -maxdepth 1 -type d | wc -l") or 1) - 1
+ anime = int(cmd("find /disks/Plex/Anime -maxdepth 1 -type d | wc -l") or 1) - 1
+ tracks = int(cmd("find /disks/Plex/Music -type f \\( -name '*.flac' -o -name '*.mp3' -o -name '*.ogg' -o -name '*.opus' \\) | wc -l") or 0)
+ artists = int(cmd("ls -1d /disks/Plex/Music/*/ 2>/dev/null | grep -v -E 'venv|lib|bin|data|include|_|pyvenv' | wc -l") or 0)
+ return {"films": films, "tv": tv, "anime": anime, "tracks": tracks, "artists": artists}
+
+def get_cpu():
+ model = cmd("grep 'model name' /proc/cpuinfo | head -1 | cut -d: -f2").strip()
+ cores = cmd("nproc")
+ return model, cores
+
+def get_packages():
+ return cmd("dpkg -l | grep '^ii' | wc -l")
+
+def get_kernel():
+ return cmd("uname -r")
+
+def get_os():
+ return cmd("grep PRETTY_NAME /etc/os-release | cut -d'\"' -f2")
+
+def get_install_date():
+ d = cmd("stat -c %w /")
+ if d and d != "-":
+ return d.split(".")[0]
+ return "unknown"
+
+COUNTER_FILE = os.path.join(os.path.dirname(__file__), "..", "data", "visitor_counter.txt")
+
+def get_and_increment_counter():
+ """Read and increment a persistent visitor counter."""
+ os.makedirs(os.path.dirname(COUNTER_FILE), exist_ok=True)
+ count = 0
+ if os.path.exists(COUNTER_FILE):
+ try:
+ count = int(open(COUNTER_FILE).read().strip())
+ except:
+ count = 0
+ count += 1
+ with open(COUNTER_FILE, "w") as f:
+ f.write(str(count))
+ return count
+
+now = datetime.datetime.now()
+uptime = get_uptime()
+uptime_since = get_uptime_since()
+load = get_load()
+mem = get_memory()
+disks = get_disk()
+services = get_services()
+media = get_media_counts()
+cpu_model, cpu_cores = get_cpu()
+packages = get_packages()
+kernel = get_kernel()
+os_name = get_os()
+install_date = get_install_date()
+visitor_count = get_and_increment_counter()
+
+html = f"""<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<meta http-equiv="refresh" content="300">
+<title>Susan - System Status</title>
+<style type="text/css">
+body {{
+ background-color: #e8e8e8;
+ background-image: url("");
+ font-family: "Times New Roman", Times, serif;
+ color: #333;
+ margin: 20px 40px;
+ font-size: 14pt;
+}}
+h1 {{
+ font-size: 22pt;
+ color: #003366;
+ border-bottom: 2px solid #003366;
+ padding-bottom: 4px;
+}}
+h2 {{
+ font-size: 16pt;
+ color: #003366;
+ margin-top: 24px;
+}}
+table {{
+ border-collapse: collapse;
+ margin: 8px 0;
+}}
+td, th {{
+ border: 2px solid #003366;
+ padding: 4px 10px;
+ text-align: left;
+ background-color: #f5f5f0;
+}}
+th {{
+ background-color: #d0d0c8;
+ font-weight: bold;
+ color: #003366;
+}}
+a {{
+ color: #003366;
+}}
+hr {{
+ border: none;
+ border-top: 2px solid #003366;
+ margin: 16px 0;
+}}
+.footer {{
+ margin-top: 30px;
+ font-size: 10pt;
+ color: #666;
+ border-top: 1px solid #999;
+ padding-top: 6px;
+}}
+.status-ok {{
+ color: green;
+ font-weight: bold;
+}}
+.status-warn {{
+ color: #cc7700;
+ font-weight: bold;
+}}
+.counter {{
+ font-family: "Courier New", monospace;
+ background-color: #000;
+ color: #0f0;
+ padding: 2px 6px;
+ font-size: 10pt;
+}}
+</style>
+</head>
+<body>
+
+<h1>Susan &mdash; System Status</h1>
+
+<hr>
+
+<h2>General Information</h2>
+
+<table>
+<tr><th>Hostname</th><td>susan</td></tr>
+<tr><th>Operating System</th><td>{os_name}</td></tr>
+<tr><th>Kernel</th><td>{kernel}</td></tr>
+<tr><th>Processor</th><td>{cpu_model} ({cpu_cores} cores)</td></tr>
+<tr><th>Installed Packages</th><td>{packages}</td></tr>
+<tr><th>First Installed</th><td>{install_date}</td></tr>
+</table>
+
+<h2>Uptime &amp; Load</h2>
+
+<table>
+<tr><th>Uptime</th><td>{uptime}</td></tr>
+<tr><th>Up Since</th><td>{uptime_since}</td></tr>
+<tr><th>Load Average</th><td>{load[0]}, {load[1]}, {load[2]} (1, 5, 15 min)</td></tr>
+</table>
+
+<h2>Memory</h2>
+
+<table>
+<tr><th>Total</th><th>Used</th><th>Free</th><th>Available</th></tr>
+<tr><td>{mem['total']}</td><td>{mem['used']}</td><td>{mem['free']}</td><td>{mem['available']}</td></tr>
+</table>
+
+<h2>Disk Usage</h2>
+
+<table>
+<tr><th>Mount</th><th>Size</th><th>Used</th><th>Available</th><th>Use%</th></tr>
+"""
+
+for d in disks:
+ pct = int(d["pct"].replace("%", "")) if d["pct"].replace("%", "").isdigit() else 0
+ cls = "status-warn" if pct > 85 else "status-ok"
+ html += f'<tr><td>{d["mount"]}</td><td>{d["size"]}</td><td>{d["used"]}</td><td>{d["avail"]}</td><td class="{cls}">{d["pct"]}</td></tr>\n'
+
+html += f"""</table>
+
+<h2>Running Services</h2>
+
+<table>
+<tr><th>Service</th><th>Status</th></tr>
+"""
+
+for s in services:
+ html += f'<tr><td>{s}</td><td class="status-ok">running</td></tr>\n'
+
+html += f"""</table>
+
+<h2>Media Library</h2>
+
+<table>
+<tr><th>Category</th><th>Count</th></tr>
+<tr><td>Films</td><td>{media['films']}</td></tr>
+<tr><td>TV Shows</td><td>{media['tv']}</td></tr>
+<tr><td>Anime</td><td>{media['anime']}</td></tr>
+<tr><td>Music Artists</td><td>{media['artists']}</td></tr>
+<tr><td>Music Tracks</td><td>{media['tracks']}</td></tr>
+</table>
+
+<hr>
+
+<div class="footer">
+<p>
+You are visitor number <span class="counter">{visitor_count:06,}</span> since December 2023.
+</p>
+<p>
+This page was last generated on <b>{now.strftime("%A, %d %B %Y at %H:%M:%S")}</b>.
+<br>
+No JavaScript was harmed in the making of this page.
+<br>
+Best viewed with any browser. Optimised for 800&times;600.
+</p>
+</div>
+
+</body>
+</html>
+"""
+
+os.makedirs(os.path.dirname(OUTPUT), exist_ok=True)
+with open(OUTPUT, "w") as f:
+ f.write(html)
+
+print(f"Generated stats page: {OUTPUT}")
+print(f" Uptime: {uptime}")
+print(f" Services: {len(services)}")
+print(f" Films: {media['films']}, TV: {media['tv']}, Anime: {media['anime']}")
+print(f" Music: {media['tracks']} tracks, {media['artists']} artists")
diff --git a/import_music.sh b/import_music.sh
new file mode 100755
index 0000000..bf6c1aa
--- /dev/null
+++ b/import_music.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# Import, transcode, and organize music from slskd downloads
+# Usage: ./import_music.sh [--dry-run] [--keep-flac]
+
+INGEST_DIR="/disks/Plex/Music/_ingest"
+LIBRARY_DIR="/disks/Plex/Music"
+GROUP="mediaserver"
+BITRATE="128k"
+
+DRY_RUN=false
+KEEP_FLAC=false
+
+for arg in "$@"; do
+ case $arg in
+ --dry-run) DRY_RUN=true ;;
+ --keep-flac) KEEP_FLAC=true ;;
+ esac
+done
+
+echo "=== Music Import Started ==="
+$DRY_RUN && echo "DRY RUN MODE"
+
+# Skip these directories
+shopt -s extglob
+
+cd "$INGEST_DIR" || exit 1
+
+for album_dir in */; do
+ [[ ! -d "$album_dir" ]] && continue
+ dir_name="${album_dir%/}"
+
+ # Skip special dirs
+ [[ "$dir_name" == "incomplete" ]] && continue
+ [[ "$dir_name" == "downloads" ]] && continue
+
+ # Count audio files
+ audio_count=$(ls -1 "$album_dir"*.flac "$album_dir"*.mp3 "$album_dir"*.opus 2>/dev/null | wc -l)
+ [[ $audio_count -lt 2 ]] && continue
+
+ echo ""
+ echo "Processing: $dir_name ($audio_count files)"
+
+ # Get first audio file for metadata
+ first_file=$(ls "$album_dir"*.flac "$album_dir"*.mp3 2>/dev/null | head -1)
+ [[ -z "$first_file" ]] && continue
+
+ # Extract metadata
+ artist=$(ffprobe -v quiet -show_entries format_tags=artist -of default=noprint_wrappers=1:nokey=1 "$first_file" 2>/dev/null)
+ album_artist=$(ffprobe -v quiet -show_entries format_tags=album_artist -of default=noprint_wrappers=1:nokey=1 "$first_file" 2>/dev/null)
+ album=$(ffprobe -v quiet -show_entries format_tags=album -of default=noprint_wrappers=1:nokey=1 "$first_file" 2>/dev/null)
+
+ # Prefer album_artist over artist
+ [[ -n "$album_artist" ]] && artist="$album_artist"
+
+ # Clean for filesystem
+ artist=$(echo "$artist" | tr -d '<>:"/\\|?*' | sed 's/\.$//')
+ album=$(echo "$album" | tr -d '<>:"/\\|?*' | sed 's/\.$//')
+
+ if [[ -z "$artist" ]] || [[ -z "$album" ]]; then
+ echo " ⚠ Missing metadata, skipping"
+ continue
+ fi
+
+ dest_dir="$LIBRARY_DIR/$artist/$album"
+ echo " → $artist / $album"
+
+ if $DRY_RUN; then
+ echo " [DRY RUN] Would transcode to $dest_dir"
+ continue
+ fi
+
+ mkdir -p "$dest_dir"
+
+ # Transcode FLACs
+ for flac in "$album_dir"*.flac "$album_dir"*.FLAC; do
+ [[ ! -f "$flac" ]] && continue
+
+ base=$(basename "$flac")
+ # Remove .flac extension properly
+ name="${base%.[Ff][Ll][Aa][Cc]}"
+ opus_out="$dest_dir/${name}.opus"
+
+ echo " Transcoding: $name"
+ ffmpeg -hide_banner -loglevel error -i "$flac" \
+ -c:a libopus -b:a "$BITRATE" -vbr on \
+ -map_metadata 0 -y "$opus_out"
+
+ [[ -f "$opus_out" ]] && chgrp "$GROUP" "$opus_out" 2>/dev/null
+ done
+
+ # Remove any FLACs from destination (we only want Opus there)
+ for flac in "$dest_dir"/*.flac "$dest_dir"/*.FLAC; do
+ [[ -f "$flac" ]] && rm "$flac"
+ done
+
+ # Copy non-FLAC audio
+ for audio in "$album_dir"*.mp3 "$album_dir"*.m4a "$album_dir"*.ogg; do
+ [[ -f "$audio" ]] && cp "$audio" "$dest_dir/"
+ done
+
+ # Copy artwork
+ for art in "$album_dir"*.jpg "$album_dir"*.png "$album_dir"cover.* "$album_dir"folder.*; do
+ [[ -f "$art" ]] && cp "$art" "$dest_dir/"
+ done
+
+ # Fix permissions
+ chgrp -R "$GROUP" "$dest_dir" 2>/dev/null
+ chmod -R g+rw "$dest_dir" 2>/dev/null
+
+ # Run beets for tagging
+ echo " Running beets..."
+ beet import -q "$dest_dir" 2>/dev/null
+ if [[ $? -eq 0 ]]; then
+ echo " ✓ Beets tagging complete"
+ else
+ echo " ⚠ Beets tagging skipped or failed (check manually)"
+ fi
+
+ # Verify
+ new_count=$(ls -1 "$dest_dir"/*.opus "$dest_dir"/*.mp3 2>/dev/null | wc -l)
+ if [[ $new_count -ge 3 ]]; then
+ echo " ✓ Imported $new_count files"
+ rm -rf "$INGEST_DIR/$dir_name"
+ echo " ✓ Cleaned source"
+ else
+ echo " ✗ Verification failed"
+ fi
+done
+
+echo ""
+echo "=== Done ==="
diff --git a/morning_report.py b/morning_report.py
new file mode 100644
index 0000000..a1e8103
--- /dev/null
+++ b/morning_report.py
@@ -0,0 +1,438 @@
+#!/usr/bin/env python3
+"""
+Susan Morning Report
+Generates a system health report and emails it to Tom.
+Run via cron at 06:45 (after transcoder finishes).
+"""
+
+import subprocess
+import json
+import os
+import re
+import smtplib
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from datetime import datetime, timedelta
+from pathlib import Path
+
+# Config
+EMAIL_CONFIG = "/etc/susan/email.json"
+TRANSCODER_LOG = "/var/log/transcoder.log"
+TRANSCODER_DB = "/var/lib/transcoder/cache.db"
+TO_EMAIL = "tom@tomflux.xyz"
+
+
+def run_cmd(cmd: str, timeout: int = 30) -> str:
+ """Run a shell command and return output."""
+ try:
+ result = subprocess.run(
+ cmd, shell=True, capture_output=True, text=True, timeout=timeout
+ )
+ return result.stdout.strip()
+ except Exception as e:
+ return f"Error: {e}"
+
+
+def get_uptime() -> dict:
+ """Get system uptime info."""
+ uptime_raw = run_cmd("uptime -p")
+ load = run_cmd("cat /proc/loadavg").split()[:3]
+ boot_time = run_cmd("uptime -s")
+
+ return {
+ "uptime": uptime_raw.replace("up ", ""),
+ "since": boot_time,
+ "load_avg": f"{load[0]} / {load[1]} / {load[2]}" if len(load) >= 3 else "unknown"
+ }
+
+
+def get_disk_space() -> list:
+ """Get disk usage for important mounts."""
+ disks = []
+ # Only show root and /disks* mounts, exclude virtual filesystems
+ df_output = run_cmd("df -h --output=target,size,used,avail,pcent -x tmpfs -x devtmpfs -x squashfs | tail -n +2")
+
+ for line in df_output.split('\n'):
+ if line.strip():
+ parts = line.split()
+ if len(parts) >= 5:
+ mount = parts[0]
+ # Only include / and /disks*, /home, /var
+ if mount in ['/', '/home', '/var'] or mount.startswith('/disks'):
+ disks.append({
+ "mount": mount,
+ "size": parts[1],
+ "used": parts[2],
+ "avail": parts[3],
+ "percent": parts[4]
+ })
+ return disks
+
+
+def get_raid_status() -> dict:
+ """Get 3ware RAID status using tw_cli."""
+ status = {"available": False, "details": None, "drives": [], "array": None}
+
+ # Try tw_cli first
+ tw_output = run_cmd("sudo tw_cli /c0 show 2>/dev/null")
+ if "Error" not in tw_output and tw_output and "Unit" in tw_output:
+ status["available"] = True
+ status["details"] = tw_output
+
+ # Parse array status
+ for line in tw_output.split('\n'):
+ if line.startswith('u0'):
+ parts = line.split()
+ if len(parts) >= 3:
+ status["array"] = {
+ "unit": parts[0],
+ "type": parts[1],
+ "status": parts[2],
+ "verify": parts[4] if len(parts) > 4 and '%' in parts[4] else None
+ }
+ # Parse drive lines (p4, p5, p6, etc)
+ if line.startswith('p'):
+ parts = line.split()
+ if len(parts) >= 3:
+ status["drives"].append({
+ "port": parts[0],
+ "status": parts[1],
+ "size": parts[3] if len(parts) > 3 else "unknown"
+ })
+ else:
+ # Fallback to checking for mdadm
+ md_output = run_cmd("cat /proc/mdstat 2>/dev/null")
+ if md_output and "Error" not in md_output and "md" in md_output:
+ status["available"] = True
+ status["details"] = md_output
+ status["type"] = "mdadm"
+
+ return status
+
+
+def get_smart_status() -> list:
+ """Get SMART status for drives. Note: requires sudo for smartctl."""
+ drives = []
+
+ # Find physical block devices (skip loop, ram, etc)
+ lsblk = run_cmd("lsblk -d -o NAME,SIZE,TYPE | grep disk | grep -v loop")
+ for line in lsblk.split('\n'):
+ if line.strip():
+ parts = line.split()
+ if parts and not parts[0].startswith('loop'):
+ dev = f"/dev/{parts[0]}"
+ size = parts[1] if len(parts) > 1 else "unknown"
+
+ # Try smartctl with sudo
+ smart = run_cmd(f"sudo smartctl -H {dev} 2>/dev/null | grep -iE 'overall-health|result|PASSED|FAILED'")
+ reallocated = run_cmd(f"sudo smartctl -A {dev} 2>/dev/null | grep -i 'Reallocated_Sector'")
+
+ # Determine health status
+ if "PASSED" in smart:
+ health = "PASSED"
+ elif "FAILED" in smart:
+ health = "FAILED"
+ elif "sudo:" in smart or not smart:
+ health = "needs sudo"
+ elif "Unable to detect" in run_cmd(f"sudo smartctl -i {dev} 2>&1"):
+ health = "RAID array (skip)"
+ else:
+ health = "unknown"
+
+ # Skip RAID virtual devices (they show as large and can't be queried)
+ if health == "RAID array (skip)":
+ continue
+
+ # Also skip if smartctl says it's not a physical device
+ if "unknown" in health and "T" in size: # Multi-TB device with unknown = likely RAID
+ continue
+
+ drive_info = {
+ "device": dev,
+ "size": size,
+ "health": health,
+ }
+
+ if reallocated:
+ # Extract reallocated sector count (last number on line)
+ match = re.search(r'(\d+)\s*$', reallocated.strip())
+ if match:
+ drive_info["reallocated_sectors"] = int(match.group(1))
+
+ drives.append(drive_info)
+
+ return drives
+
+
+def get_memory() -> dict:
+ """Get memory usage."""
+ mem_output = run_cmd("free -h | grep Mem")
+ parts = mem_output.split()
+
+ if len(parts) >= 4:
+ return {
+ "total": parts[1],
+ "used": parts[2],
+ "available": parts[6] if len(parts) > 6 else parts[3]
+ }
+ return {"total": "unknown", "used": "unknown", "available": "unknown"}
+
+
+def get_cpu_info() -> dict:
+ """Get CPU info and temperature."""
+ info = {}
+
+ # CPU model
+ model = run_cmd("grep 'model name' /proc/cpuinfo | head -1 | cut -d: -f2")
+ info["model"] = model.strip() if model else "unknown"
+
+ # Temperatures
+ temps = run_cmd("sensors 2>/dev/null | grep -E 'Core|temp' | head -5")
+ info["temps"] = temps if temps else "sensors not available"
+
+ return info
+
+
+def get_transcoder_status() -> dict:
+ """Get last night's transcoder results + queue stats from DB."""
+ import sqlite3
+
+ status = {"ran": False, "summary": None, "queue": None}
+
+ # Get queue stats from database
+ db_path = Path(TRANSCODER_DB)
+ if db_path.exists():
+ try:
+ conn = sqlite3.connect(TRANSCODER_DB)
+
+ # Pending files
+ cursor = conn.execute("""
+ SELECT COUNT(*), SUM(original_size)
+ FROM files WHERE is_hevc = 0 AND status = 'pending'
+ """)
+ row = cursor.fetchone()
+ pending_count = row[0] or 0
+ pending_size = row[1] or 0
+
+ # Already HEVC
+ cursor = conn.execute("SELECT COUNT(*) FROM files WHERE is_hevc = 1")
+ hevc_count = cursor.fetchone()[0] or 0
+
+ # Lifetime stats
+ cursor = conn.execute("SELECT total_files_transcoded, total_space_saved FROM stats WHERE id = 1")
+ row = cursor.fetchone()
+ lifetime_transcoded = row[0] if row else 0
+ lifetime_saved = row[1] if row else 0
+
+ # Failed count
+ cursor = conn.execute("SELECT COUNT(*) FROM files WHERE status = 'failed'")
+ failed_count = cursor.fetchone()[0] or 0
+
+ conn.close()
+
+ status["queue"] = {
+ "pending_count": pending_count,
+ "pending_size": pending_size,
+ "pending_size_human": f"{pending_size / (1024**3):.1f} GB" if pending_size else "0 GB",
+ "hevc_count": hevc_count,
+ "failed_count": failed_count,
+ "lifetime_transcoded": lifetime_transcoded,
+ "lifetime_saved": lifetime_saved,
+ "lifetime_saved_human": f"{lifetime_saved / (1024**3):.1f} GB" if lifetime_saved else "0 GB"
+ }
+ except Exception as e:
+ status["queue"] = {"error": str(e)}
+
+ # Check log for last night's run
+ log_path = Path(TRANSCODER_LOG)
+ if log_path.exists():
+ try:
+ log_content = log_path.read_text()
+ except PermissionError:
+ log_content = run_cmd(f"sudo cat {TRANSCODER_LOG} 2>/dev/null")
+ if "Error" in log_content:
+ log_content = ""
+
+ if log_content:
+ # Find the last SESSION COMPLETE block
+ sessions = re.findall(
+ r'SESSION COMPLETE.*?Transcoded:\s+(\d+).*?Failed:\s+(\d+).*?Space saved:\s+([\d.]+\s+\w+)',
+ log_content, re.DOTALL
+ )
+
+ if sessions:
+ last = sessions[-1]
+ status["ran"] = True
+ status["transcoded"] = int(last[0])
+ status["failed"] = int(last[1])
+ status["space_saved"] = last[2]
+ status["summary"] = f"{last[0]} transcoded, {last[1]} failed, {last[2]} saved"
+ else:
+ today = datetime.now().strftime("%Y-%m-%d")
+ yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
+ if today in log_content or yesterday in log_content:
+ status["ran"] = True
+ status["summary"] = "Ran (check log for details)"
+
+ return status
+
+
+def get_failed_services() -> list:
+ """Get any failed systemd services."""
+ output = run_cmd("systemctl --failed --no-pager --plain | grep -E '\.service|\.socket|\.mount' | awk '{print $1}'")
+ failed = []
+ for line in output.split('\n'):
+ name = line.strip()
+ if name and not name.startswith('UNIT') and '●' not in name:
+ failed.append(name)
+ return failed
+
+
+def generate_report() -> str:
+ """Generate the full morning report."""
+ now = datetime.now()
+
+ report = []
+ report.append("=" * 50)
+ report.append(f"🖥️ SUSAN MORNING REPORT")
+ report.append(f"📅 {now.strftime('%A, %B %d %Y at %H:%M')}")
+ report.append("=" * 50)
+ report.append("")
+
+ # Uptime
+ uptime = get_uptime()
+ report.append("⏱️ UPTIME")
+ report.append(f" Up: {uptime['uptime']}")
+ report.append(f" Since: {uptime['since']}")
+ report.append(f" Load: {uptime['load_avg']}")
+ report.append("")
+
+ # Memory
+ mem = get_memory()
+ report.append("🧠 MEMORY")
+ report.append(f" Used: {mem['used']} / {mem['total']}")
+ report.append(f" Available: {mem['available']}")
+ report.append("")
+
+ # Disk Space
+ disks = get_disk_space()
+ report.append("💾 DISK SPACE")
+ for disk in disks:
+ warn = " ⚠️" if disk['percent'].replace('%', '').isdigit() and int(disk['percent'].replace('%', '')) > 85 else ""
+ report.append(f" {disk['mount']}: {disk['used']}/{disk['size']} ({disk['percent']} used){warn}")
+ report.append("")
+
+ # RAID Status
+ raid = get_raid_status()
+ report.append("🔒 RAID STATUS")
+ if raid["available"]:
+ if raid.get("array"):
+ arr = raid["array"]
+ status_icon = "✅" if arr["status"] == "OK" else ("⚠️" if "VERIFY" in arr["status"] else "❌")
+ report.append(f" Array: {arr['type']} {status_icon} {arr['status']}")
+ if arr.get("verify"):
+ report.append(f" Verify progress: {arr['verify']}")
+
+ if raid.get("drives"):
+ report.append(f" Drives: {len(raid['drives'])} disks")
+ for drive in raid["drives"]:
+ d_icon = "✅" if drive["status"] == "OK" else "❌"
+ report.append(f" {drive['port']}: {d_icon} {drive['status']} ({drive['size']} TB)")
+ else:
+ report.append(" tw_cli not available - install 3ware tools for RAID monitoring")
+ report.append("")
+
+ # Drive Health (SMART)
+ drives = get_smart_status()
+ if drives:
+ report.append("🔧 DRIVE HEALTH (SMART)")
+ for drive in drives:
+ warn = ""
+ if drive.get("reallocated_sectors", 0) > 0:
+ warn = f" ⚠️ {drive['reallocated_sectors']} reallocated sectors!"
+ health_icon = "✅" if drive["health"] == "PASSED" else "❌"
+ report.append(f" {drive['device']} ({drive['size']}): {health_icon} {drive['health']}{warn}")
+ report.append("")
+
+ # Transcoder
+ transcoder = get_transcoder_status()
+ report.append("🎬 TRANSCODER")
+
+ # Last night's run
+ if transcoder.get("ran"):
+ report.append(f" Last run: ✅ {transcoder.get('summary', 'Completed')}")
+ elif transcoder.get("summary"):
+ report.append(f" Last run: ⏸️ {transcoder['summary']}")
+ else:
+ report.append(f" Last run: No data")
+
+ # Queue stats from database
+ if transcoder.get("queue") and not transcoder["queue"].get("error"):
+ q = transcoder["queue"]
+ report.append(f" Queue: {q['pending_count']} files ({q['pending_size_human']}) waiting")
+ if q['failed_count'] > 0:
+ report.append(f" ⚠️ Failed: {q['failed_count']} files need attention")
+ report.append(f" Library: {q['hevc_count']} files already HEVC")
+ if q['lifetime_transcoded'] > 0:
+ report.append(f" Lifetime: {q['lifetime_transcoded']} transcoded, {q['lifetime_saved_human']} saved")
+ elif transcoder.get("queue", {}).get("error"):
+ report.append(f" DB error: {transcoder['queue']['error']}")
+
+ report.append("")
+
+ # Failed Services
+ failed = get_failed_services()
+ report.append("🚨 SYSTEMD SERVICES")
+ if failed:
+ report.append(f" ❌ {len(failed)} failed: {', '.join(failed)}")
+ else:
+ report.append(" ✅ All services OK")
+ report.append("")
+
+ report.append("=" * 50)
+ report.append("End of report")
+ report.append("=" * 50)
+
+ return "\n".join(report)
+
+
+def send_email(subject: str, body: str):
+ """Send the report via email."""
+ with open(EMAIL_CONFIG) as f:
+ cfg = json.load(f)
+
+ smtp = cfg['smtp']
+
+ msg = MIMEMultipart()
+ msg['Subject'] = subject
+ msg['From'] = smtp['from']
+ msg['To'] = TO_EMAIL
+
+ # Plain text version
+ msg.attach(MIMEText(body, 'plain'))
+
+ server = smtplib.SMTP_SSL(smtp['server'], smtp['port'])
+ server.login(smtp['username'], smtp['password'])
+ server.sendmail(smtp['from'], [TO_EMAIL], msg.as_string())
+ server.quit()
+
+
+def main():
+ report = generate_report()
+
+ # Print to stdout (for logging)
+ print(report)
+
+ # Email it
+ today = datetime.now().strftime("%Y-%m-%d")
+ subject = f"🖥️ Susan Morning Report - {today}"
+
+ try:
+ send_email(subject, report)
+ print("\n✅ Report emailed successfully")
+ except Exception as e:
+ print(f"\n❌ Failed to send email: {e}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/music_config.example.json b/music_config.example.json
new file mode 100644
index 0000000..ac8c622
--- /dev/null
+++ b/music_config.example.json
@@ -0,0 +1,15 @@
+{
+ "lastfm": {
+ "api_key": "YOUR_LASTFM_API_KEY",
+ "username": "tomflux"
+ },
+ "navidrome": {
+ "url": "https://navi.jihakuz.xyz",
+ "username": "YOUR_NAVIDROME_USERNAME",
+ "password": "YOUR_NAVIDROME_PASSWORD"
+ },
+ "slskd": {
+ "url": "https://music.jihakuz.xyz",
+ "api_key": "YOUR_SLSKD_API_KEY"
+ }
+}
diff --git a/music_recommender.py b/music_recommender.py
new file mode 100755
index 0000000..a366f9a
--- /dev/null
+++ b/music_recommender.py
@@ -0,0 +1,1038 @@
+#!/usr/bin/env python3
+"""
+Music Recommender - Finds music you might like based on Last.fm history,
+cross-referenced with your Navidrome collection. Can auto-download via slskd.
+
+Usage:
+ python music_recommender.py recommend # Get recommendations
+ python music_recommender.py gaps # Find library gaps
+ python music_recommender.py recent # Recent scrobbles
+ python music_recommender.py search "query" # Search Navidrome
+ python music_recommender.py surprise # Pick & download an album overnight
+ python music_recommender.py slskd "query" # Search Soulseek
+ python music_recommender.py download "artist - album" # Download from Soulseek
+"""
+
+import argparse
+import hashlib
+import json
+import os
+import random
+import re
+import string
+import sys
+import time
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Optional
+from urllib.parse import urlencode
+
+import requests
+
+# Default config path
+CONFIG_PATH = Path(__file__).parent / "music_config.json"
+STATE_PATH = Path(__file__).parent / "music_state.json"
+
+
+class LastFM:
+ """Last.fm API client."""
+
+ BASE_URL = "https://ws.audioscrobbler.com/2.0/"
+
+ def __init__(self, api_key: str, username: str):
+ self.api_key = api_key
+ self.username = username
+
+ def _request(self, method: str, **params) -> dict:
+ params.update({
+ "method": method,
+ "api_key": self.api_key,
+ "format": "json",
+ })
+ resp = requests.get(self.BASE_URL, params=params)
+ resp.raise_for_status()
+ return resp.json()
+
+ def get_recent_tracks(self, limit: int = 50) -> list[dict]:
+ """Get recently played tracks."""
+ data = self._request("user.getrecenttracks", user=self.username, limit=limit)
+ tracks = data.get("recenttracks", {}).get("track", [])
+ return [
+ {
+ "artist": t.get("artist", {}).get("#text", ""),
+ "album": t.get("album", {}).get("#text", ""),
+ "track": t.get("name", ""),
+ "now_playing": "@attr" in t and t["@attr"].get("nowplaying") == "true",
+ }
+ for t in tracks
+ ]
+
+ def get_top_artists(self, period: str = "3month", limit: int = 50) -> list[dict]:
+ """Get top artists. Period: overall, 7day, 1month, 3month, 6month, 12month."""
+ data = self._request("user.gettopartists", user=self.username, period=period, limit=limit)
+ artists = data.get("topartists", {}).get("artist", [])
+ return [
+ {
+ "name": a.get("name", ""),
+ "playcount": int(a.get("playcount", 0)),
+ "url": a.get("url", ""),
+ }
+ for a in artists
+ ]
+
+ def get_similar_artists(self, artist: str, limit: int = 10) -> list[dict]:
+ """Get artists similar to the given artist."""
+ try:
+ data = self._request("artist.getsimilar", artist=artist, limit=limit)
+ similar = data.get("similarartists", {}).get("artist", [])
+ return [
+ {
+ "name": a.get("name", ""),
+ "match": float(a.get("match", 0)),
+ "url": a.get("url", ""),
+ }
+ for a in similar
+ ]
+ except Exception:
+ return []
+
+ def get_artist_top_albums(self, artist: str, limit: int = 5) -> list[dict]:
+ """Get top albums for an artist."""
+ try:
+ data = self._request("artist.gettopalbums", artist=artist, limit=limit)
+ albums = data.get("topalbums", {}).get("album", [])
+ return [
+ {
+ "name": a.get("name", ""),
+ "artist": artist,
+ "playcount": int(a.get("playcount", 0)),
+ "url": a.get("url", ""),
+ }
+ for a in albums
+ if a.get("name") and a.get("name") != "(null)"
+ ]
+ except Exception:
+ return []
+
+ def get_recommendations(self, limit: int = 20) -> list[dict]:
+ """Get recommended artists based on listening history."""
+ top = self.get_top_artists(period="3month", limit=10)
+
+ recommendations = {}
+ for artist in top:
+ similar = self.get_similar_artists(artist["name"], limit=5)
+ for s in similar:
+ name = s["name"]
+ if name not in recommendations and name not in [a["name"] for a in top]:
+ recommendations[name] = {
+ "name": name,
+ "because_of": artist["name"],
+ "match": s["match"],
+ }
+
+ sorted_recs = sorted(recommendations.values(), key=lambda x: x["match"], reverse=True)
+ return sorted_recs[:limit]
+
+
+class Navidrome:
+ """Navidrome/Subsonic API client."""
+
+ def __init__(self, url: str, username: str, password: str):
+ self.base_url = url.rstrip("/")
+ self.username = username
+ self.password = password
+
+ def _auth_params(self) -> dict:
+ salt = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
+ token = hashlib.md5((self.password + salt).encode()).hexdigest()
+ return {
+ "u": self.username,
+ "t": token,
+ "s": salt,
+ "v": "1.16.1",
+ "c": "music_recommender",
+ "f": "json",
+ }
+
+ def _request(self, endpoint: str, **params) -> dict:
+ params.update(self._auth_params())
+ url = f"{self.base_url}/rest/{endpoint}"
+ resp = requests.get(url, params=params)
+ resp.raise_for_status()
+ data = resp.json()
+
+ sr = data.get("subsonic-response", {})
+ if sr.get("status") == "failed":
+ error = sr.get("error", {})
+ raise Exception(f"Subsonic error {error.get('code')}: {error.get('message')}")
+
+ return sr
+
+ def ping(self) -> bool:
+ try:
+ self._request("ping")
+ return True
+ except Exception:
+ return False
+
+ def get_artists(self) -> list[dict]:
+ data = self._request("getArtists")
+ artists = []
+ for index in data.get("artists", {}).get("index", []):
+ for artist in index.get("artist", []):
+ artists.append({
+ "id": artist.get("id"),
+ "name": artist.get("name"),
+ "album_count": artist.get("albumCount", 0),
+ })
+ return artists
+
+ def get_artist(self, artist_id: str) -> dict:
+ data = self._request("getArtist", id=artist_id)
+ artist = data.get("artist", {})
+ return {
+ "id": artist.get("id"),
+ "name": artist.get("name"),
+ "album_count": artist.get("albumCount", 0),
+ "albums": [
+ {
+ "id": a.get("id"),
+ "name": a.get("name"),
+ "year": a.get("year"),
+ "song_count": a.get("songCount", 0),
+ }
+ for a in artist.get("album", [])
+ ],
+ }
+
+ def search(self, query: str) -> dict:
+ data = self._request("search3", query=query)
+ result = data.get("searchResult3", {})
+ return {
+ "artists": [
+ {"id": a.get("id"), "name": a.get("name")}
+ for a in result.get("artist", [])
+ ],
+ "albums": [
+ {"id": a.get("id"), "name": a.get("name"), "artist": a.get("artist")}
+ for a in result.get("album", [])
+ ],
+ "songs": [
+ {"id": s.get("id"), "title": s.get("title"), "artist": s.get("artist"), "album": s.get("album")}
+ for s in result.get("song", [])
+ ],
+ }
+
+
+class Slskd:
+ """slskd API client for Soulseek searches and downloads."""
+
+ def __init__(self, url: str, api_key: str):
+ self.base_url = url.rstrip("/")
+ self.api_key = api_key
+ self.headers = {"X-API-Key": api_key}
+
+ def _request(self, method: str, endpoint: str, **kwargs) -> dict:
+ url = f"{self.base_url}/api/v0/{endpoint}"
+ kwargs.setdefault("headers", {}).update(self.headers)
+ resp = requests.request(method, url, **kwargs)
+ resp.raise_for_status()
+ if resp.content:
+ return resp.json()
+ return {}
+
+ def search(self, query: str, timeout: int = 30) -> str:
+ """Start a search and return the search ID."""
+ data = self._request("POST", "searches", json={"searchText": query})
+ return data.get("id")
+
+ def get_search_state(self, search_id: str) -> dict:
+ """Get search state."""
+ return self._request("GET", f"searches/{search_id}")
+
+ def wait_for_search(self, search_id: str, timeout: int = 60, min_results: int = 5, min_wait: int = 30) -> bool:
+ """Wait for search to complete or have enough results.
+
+ Args:
+ search_id: The search ID to wait for
+ timeout: Maximum time to wait
+ min_results: Return early if we have this many files
+ min_wait: Minimum time to wait before honoring isComplete with 0 results
+ """
+ start = time.time()
+ while time.time() - start < timeout:
+ state = self.get_search_state(search_id)
+ file_count = state.get("fileCount", 0)
+ elapsed = time.time() - start
+
+ # Got enough results - done
+ if file_count >= min_results:
+ return True
+
+ # Search complete with results - done
+ if state.get("isComplete") and file_count > 0:
+ return True
+
+ # Search complete with NO results - only trust after min_wait
+ # (Soulseek can mark complete before results arrive)
+ if state.get("isComplete") and elapsed >= min_wait:
+ return True
+
+ time.sleep(2)
+ return False
+
+ def get_search_responses(self, search_id: str, max_retries: int = 15, retry_delay: float = 2) -> list[dict]:
+ """Get search responses (results grouped by user).
+
+ Retries if response list is empty but state shows results exist.
+ slskd has a race condition where /responses can lag significantly
+ behind state.responseCount - sometimes 10+ seconds.
+ """
+ for attempt in range(max_retries):
+ responses = self._request("GET", f"searches/{search_id}/responses")
+ if responses:
+ return responses
+
+ # Check if state says there should be responses
+ state = self.get_search_state(search_id)
+ if state.get("responseCount", 0) == 0:
+ # Genuinely no results
+ return []
+
+ # State has responses but endpoint returned empty - wait and retry
+ time.sleep(retry_delay)
+
+ return []
+
+ def download_files(self, username: str, files: list[dict]) -> dict:
+ """Queue files for download from a user."""
+ return self._request("POST", f"transfers/downloads/{username}", json=files)
+
+ def get_downloads(self) -> list[dict]:
+ """Get current downloads."""
+ return self._request("GET", "transfers/downloads")
+
+ def search_and_find_album(self, artist: str, album: str, timeout: int = 60) -> Optional[dict]:
+ """Search for an album and return best result."""
+ query = f"{artist} {album}"
+ print(f" Searching Soulseek for: {query}")
+
+ search_id = self.search(query)
+ if not search_id:
+ return None
+
+ # Wait for results
+ self.wait_for_search(search_id, timeout=timeout, min_results=3)
+
+ responses = self.get_search_responses(search_id)
+ if not responses:
+ return None
+
+ # Score and rank results
+ best_match = None
+ best_score = 0
+
+ for response in responses:
+ username = response.get("username", "")
+ files = response.get("files", [])
+
+ # Skip users with no free slots
+ if not response.get("hasFreeUploadSlot", False):
+ continue
+
+ # Group files by directory
+ directories = {}
+ for f in files:
+ filename = f.get("filename", "")
+ # Extract directory
+ parts = filename.replace("\\", "/").rsplit("/", 1)
+ if len(parts) == 2:
+ dir_path, fname = parts
+ else:
+ dir_path, fname = "", parts[0]
+
+ if dir_path not in directories:
+ directories[dir_path] = []
+ directories[dir_path].append(f)
+
+ # Find directories that look like albums (multiple audio files)
+ for dir_path, dir_files in directories.items():
+ audio_files = [f for f in dir_files if any(
+ f.get("filename", "").lower().endswith(ext)
+ for ext in [".flac", ".mp3", ".m4a", ".ogg", ".opus"]
+ )]
+
+ if len(audio_files) < 3:
+ continue
+
+ # Score this directory
+ score = 0
+ dir_lower = dir_path.lower()
+
+ # Prefer FLAC
+ flac_count = sum(1 for f in audio_files if f.get("filename", "").lower().endswith(".flac"))
+ if flac_count == len(audio_files):
+ score += 100
+ elif flac_count > 0:
+ score += 50
+
+ # Check artist/album in path
+ if artist.lower() in dir_lower:
+ score += 30
+ if album.lower() in dir_lower:
+ score += 30
+
+ # Prefer more complete albums
+ score += min(len(audio_files), 20)
+
+ # Prefer faster users (higher upload speed)
+ score += min(response.get("uploadSpeed", 0) // 100000, 20)
+
+ if score > best_score:
+ best_score = score
+ best_match = {
+ "username": username,
+ "directory": dir_path,
+ "files": audio_files,
+ "file_count": len(audio_files),
+ "score": score,
+ "has_flac": flac_count > 0,
+ }
+
+ return best_match
+
+ def download_album(self, match: dict) -> bool:
+ """Download an album match."""
+ if not match:
+ return False
+
+ username = match["username"]
+ files = match["files"]
+
+ # Format files for download API
+ download_files = [
+ {"filename": f["filename"], "size": f.get("size", 0)}
+ for f in files
+ ]
+
+ try:
+ self.download_files(username, download_files)
+ return True
+ except Exception as e:
+ print(f" Download failed: {e}")
+ return False
+
+ def get_user_downloads(self, username: str) -> list[dict]:
+ """Get downloads from a specific user."""
+ try:
+ all_downloads = self._request("GET", "transfers/downloads")
+ # Filter by username - strip invisible Unicode chars for robust matching
+ clean = lambda s: ''.join(c for c in (s or '') if c.isprintable())
+ target = clean(username)
+ return [d for d in all_downloads if clean(d.get("username", "")) == target]
+ except Exception:
+ return []
+
+ def cancel_user_downloads(self, username: str) -> bool:
+ """Cancel all downloads from a user."""
+ try:
+ self._request("DELETE", f"transfers/downloads/{username}")
+ return True
+ except Exception:
+ return False
+
+ def wait_for_download(self, username: str, expected_files: int,
+ timeout_minutes: int = 30, check_interval: int = 60) -> bool:
+ """Wait for downloads from a user to complete."""
+ import time
+
+ start = time.time()
+ timeout_seconds = timeout_minutes * 60
+
+ while time.time() - start < timeout_seconds:
+ downloads = self.get_user_downloads(username)
+
+ if not downloads:
+ # No downloads found - might have completed and been removed
+ # Or never started
+ return False
+
+ # Check states
+ completed = sum(1 for d in downloads
+ for f in d.get("files", [])
+ if f.get("state", "").startswith("Completed"))
+ failed = sum(1 for d in downloads
+ for f in d.get("files", [])
+ if any(s in (f.get("state") or "") for s in ["Errored", "Cancelled", "TimedOut", "Rejected", "Aborted"]))
+ in_progress = sum(1 for d in downloads
+ for f in d.get("files", [])
+ if any(s in (f.get("state") or "") for s in ["Queued", "Initializing", "InProgress"]))
+
+ total = completed + failed + in_progress
+
+ print(f" Progress: {completed}/{total} complete, {failed} failed, {in_progress} in progress")
+
+ if completed >= expected_files:
+ return True
+
+ if failed > 0 and in_progress == 0:
+ # All either completed or failed, nothing in progress
+ return completed > 0
+
+ time.sleep(check_interval)
+
+ # Timeout
+ return False
+
+
+class MusicRecommender:
+ """Main recommender combining all sources."""
+
+ def __init__(self, config: dict):
+ self.config = config
+
+ self.lastfm = LastFM(
+ api_key=config["lastfm"]["api_key"],
+ username=config["lastfm"]["username"],
+ )
+
+ self.navidrome = None
+ if "navidrome" in config:
+ self.navidrome = Navidrome(
+ url=config["navidrome"]["url"],
+ username=config["navidrome"]["username"],
+ password=config["navidrome"]["password"],
+ )
+
+ self.slskd = None
+ if "slskd" in config:
+ self.slskd = Slskd(
+ url=config["slskd"]["url"],
+ api_key=config["slskd"]["api_key"],
+ )
+
+ def get_library_artists(self) -> set[str]:
+ if not self.navidrome:
+ return set()
+ artists = self.navidrome.get_artists()
+ return {a["name"].lower() for a in artists}
+
+ def get_library_albums(self) -> set[str]:
+ """Get set of 'artist - album' strings in library."""
+ if not self.navidrome:
+ return set()
+
+ albums = set()
+ artists = self.navidrome.get_artists()
+ for artist in artists:
+ try:
+ details = self.navidrome.get_artist(artist["id"])
+ for album in details.get("albums", []):
+ key = f"{artist['name'].lower()} - {album['name'].lower()}"
+ albums.add(key)
+ except Exception:
+ pass
+ return albums
+
+ def find_gaps(self) -> list[dict]:
+ """Find artists you listen to but don't have in your library."""
+ top_artists = self.lastfm.get_top_artists(period="6month", limit=50)
+ library = self.get_library_artists()
+
+ # Filter out YouTube auto-channels and other noise
+ noise_patterns = [" - topic", "official", "vevo", "cyber gardens"]
+
+ gaps = []
+ for artist in top_artists:
+ name_lower = artist["name"].lower()
+
+ # Skip noise
+ if any(p in name_lower for p in noise_patterns):
+ continue
+
+ if name_lower not in library:
+ gaps.append({
+ "name": artist["name"],
+ "playcount": artist["playcount"],
+ "reason": "listened to but not in library",
+ })
+
+ return gaps
+
+ def get_disliked_artists(self) -> set[str]:
+ """Get set of disliked artist names (lowercase) from config feedback."""
+ disliked = set()
+ feedback = self.config.get("feedback", {})
+ for entry in feedback.get("disliked", []):
+ if "artist" in entry:
+ disliked.add(entry["artist"].lower())
+ return disliked
+
+ def get_recommendations(self) -> list[dict]:
+ recs = self.lastfm.get_recommendations(limit=30)
+ library = self.get_library_artists()
+ disliked = self.get_disliked_artists()
+ filtered = [r for r in recs if r["name"].lower() not in library and r["name"].lower() not in disliked]
+ return filtered[:20]
+
+ def pick_surprise_album(self) -> Optional[dict]:
+ """Pick an album to download as a surprise."""
+ # Load state to avoid repeats
+ state = self._load_state()
+ downloaded = set(state.get("downloaded_albums", []))
+
+ # Strategy 1: Album from a gap artist (artist you listen to but don't own)
+ gaps = self.find_gaps()[:10]
+
+ # Strategy 2: Album from a recommended artist
+ recs = self.get_recommendations()[:10]
+
+ # Combine and shuffle
+ candidates = []
+
+ for gap in gaps:
+ albums = self.lastfm.get_artist_top_albums(gap["name"], limit=3)
+ for album in albums:
+ key = f"{gap['name'].lower()} - {album['name'].lower()}"
+ if key not in downloaded:
+ candidates.append({
+ "artist": gap["name"],
+ "album": album["name"],
+ "reason": f"You've played {gap['name']} {gap['playcount']} times but don't own this",
+ "priority": 2, # Higher priority for gaps
+ })
+
+ for rec in recs:
+ albums = self.lastfm.get_artist_top_albums(rec["name"], limit=2)
+ for album in albums:
+ key = f"{rec['name'].lower()} - {album['name'].lower()}"
+ if key not in downloaded:
+ candidates.append({
+ "artist": rec["name"],
+ "album": album["name"],
+ "reason": f"Similar to {rec['because_of']}",
+ "priority": 1,
+ })
+
+ if not candidates:
+ return None
+
+ # Sort by priority, then shuffle within priority
+ candidates.sort(key=lambda x: -x["priority"])
+ top_priority = candidates[0]["priority"]
+ top_candidates = [c for c in candidates if c["priority"] == top_priority]
+
+ return random.choice(top_candidates)
+
+ def surprise_download(self, max_attempts: int = 3, wait_for_completion: bool = False,
+ timeout_minutes: int = 30) -> dict:
+ """Pick and download a surprise album. Tries multiple albums if needed.
+
+ Args:
+ max_attempts: Maximum albums to try
+ wait_for_completion: If True, wait for download to complete (for overnight job)
+ timeout_minutes: How long to wait for each download before giving up
+ """
+ if not self.slskd:
+ return {"success": False, "error": "slskd not configured"}
+
+ # Get all candidates
+ state = self._load_state()
+ downloaded = set(state.get("downloaded_albums", []))
+ disliked = self.get_disliked_artists()
+
+ candidates = []
+
+ # From gaps (higher priority) - skip disliked artists
+ for gap in self.find_gaps()[:15]:
+ if gap["name"].lower() in disliked:
+ continue
+ albums = self.lastfm.get_artist_top_albums(gap["name"], limit=3)
+ for album in albums:
+ key = f"{gap['name'].lower()} - {album['name'].lower()}"
+ if key not in downloaded:
+ candidates.append({
+ "artist": gap["name"],
+ "album": album["name"],
+ "reason": f"You've played {gap['name']} {gap['playcount']} times but don't own this",
+ "priority": 2,
+ })
+
+ # From recommendations
+ for rec in self.get_recommendations()[:10]:
+ albums = self.lastfm.get_artist_top_albums(rec["name"], limit=2)
+ for album in albums:
+ key = f"{rec['name'].lower()} - {album['name'].lower()}"
+ if key not in downloaded:
+ candidates.append({
+ "artist": rec["name"],
+ "album": album["name"],
+ "reason": f"Similar to {rec['because_of']}",
+ "priority": 1,
+ })
+
+ # From Discogs labels (curated 90s labels)
+ discogs_path = Path(__file__).parent / "discogs_labels.json"
+ if discogs_path.exists():
+ try:
+ with open(discogs_path) as f:
+ discogs_data = json.load(f)
+ # Pick random releases from the pool
+ discogs_releases = discogs_data.get("releases", [])
+ random.shuffle(discogs_releases)
+ for release in discogs_releases[:20]:
+ if release['artist'].lower() in disliked:
+ continue
+ key = f"{release['artist'].lower()} - {release['album'].lower()}"
+ if key not in downloaded:
+ year_str = f" ({release['year']})" if release.get('year') else ""
+ candidates.append({
+ "artist": release["artist"],
+ "album": release["album"],
+ "reason": f"From {release['label']}{year_str}",
+ "priority": 1, # Same priority as recommendations
+ })
+ except Exception:
+ pass # Silently skip if file is corrupt
+
+ if not candidates:
+ return {"success": False, "error": "No suitable albums found"}
+
+ # Shuffle within priority groups
+ random.shuffle(candidates)
+ candidates.sort(key=lambda x: -x["priority"])
+
+ # Try up to max_attempts albums
+ tried = []
+ for pick in candidates[:max_attempts]:
+ print(f"\n🎲 Trying: {pick['artist']} - {pick['album']}")
+ print(f" Reason: {pick['reason']}\n")
+
+ match = self.slskd.search_and_find_album(pick["artist"], pick["album"])
+
+ if not match:
+ print(f" ❌ Not found on Soulseek, trying next...")
+ tried.append(f"{pick['artist']} - {pick['album']} (not found)")
+ continue
+
+ print(f" Found: {match['file_count']} files from {match['username']}")
+ print(f" FLAC: {'Yes' if match['has_flac'] else 'No'}")
+
+ if not self.slskd.download_album(match):
+ tried.append(f"{pick['artist']} - {pick['album']} (queue failed)")
+ continue
+
+ print(f" ✓ Download queued!")
+
+ # If not waiting for completion, return success after queuing
+ if not wait_for_completion:
+ self._save_downloaded(pick["artist"], pick["album"], pick.get("reason"))
+ return {
+ "success": True,
+ "artist": pick["artist"],
+ "album": pick["album"],
+ "reason": pick["reason"],
+ "files": match["file_count"],
+ "source": match["username"],
+ "has_flac": match["has_flac"],
+ "attempts": len(tried) + 1,
+ "status": "queued",
+ }
+
+ # Wait for download to complete
+ print(f" ⏳ Waiting up to {timeout_minutes} minutes for download...")
+
+ if self.slskd.wait_for_download(match["username"], match["file_count"],
+ timeout_minutes=timeout_minutes):
+ self._save_downloaded(pick["artist"], pick["album"], pick.get("reason"))
+ return {
+ "success": True,
+ "artist": pick["artist"],
+ "album": pick["album"],
+ "reason": pick["reason"],
+ "files": match["file_count"],
+ "source": match["username"],
+ "has_flac": match["has_flac"],
+ "attempts": len(tried) + 1,
+ "status": "completed",
+ }
+ else:
+ # Download timed out - cancel and try next
+ print(f" ⏱️ Timeout! Cancelling and trying next album...")
+ self.slskd.cancel_user_downloads(match["username"])
+ tried.append(f"{pick['artist']} - {pick['album']} (timeout after {timeout_minutes}min)")
+ continue
+
+ # Save failed attempts to wishlist for Discogs bookmarking
+ self._save_wishlist(tried)
+
+ return {
+ "success": False,
+ "error": f"Tried {len(tried)} albums, none worked",
+ "tried": tried,
+ }
+
+ def _save_wishlist(self, albums: list[str]):
+ """Save albums that couldn't be found to wishlist + bookmarks."""
+ wishlist_path = Path(__file__).parent / "music_wishlist.md"
+
+ # Read existing wishlist
+ existing = set()
+ if wishlist_path.exists():
+ with open(wishlist_path) as f:
+ for line in f:
+ if line.startswith("- [ ] "):
+ existing.add(line.strip()[6:].split(" — ")[0])
+
+ # Add new entries to markdown wishlist
+ with open(wishlist_path, "a") as f:
+ if not wishlist_path.exists() or wishlist_path.stat().st_size == 0:
+ f.write("# Music Wishlist\n\n")
+ f.write("Albums that couldn't be found on Soulseek.\n")
+ f.write("Search Discogs/Bandcamp manually or check back later.\n\n")
+
+ for album in albums:
+ clean = album.replace(" (download failed)", "").replace(" (not found)", "").replace(" (queue failed)", "")
+ clean = clean.split(" (timeout")[0] # Remove timeout messages
+ if clean not in existing:
+ query = clean.replace(" - ", " ").replace(" ", "+")
+ discogs_url = f"https://www.discogs.com/search/?q={query}&type=release"
+ f.write(f"- [ ] {clean} — [Discogs]({discogs_url})\n")
+
+ # Also add to Floccus bookmarks
+ self._add_to_bookmarks(clean, discogs_url)
+
+ def _add_to_bookmarks(self, title: str, url: str):
+ """Add album to Floccus bookmarks in Music/Wishlist folder."""
+ import subprocess
+
+ add_bookmark_script = Path(__file__).parent / "add_bookmark_to_wishlist.js"
+ if add_bookmark_script.exists():
+ try:
+ subprocess.run(
+ ["node", str(add_bookmark_script), url, title],
+ capture_output=True,
+ timeout=30
+ )
+ print(f" 📌 Added to bookmarks: {title}")
+ except Exception as e:
+ print(f" ⚠ Bookmark add failed: {e}")
+
+ def _load_state(self) -> dict:
+ if STATE_PATH.exists():
+ with open(STATE_PATH) as f:
+ return json.load(f)
+ return {"downloaded_albums": []}
+
+ def _save_downloaded(self, artist: str, album: str, reason: str = None):
+ state = self._load_state()
+ key = f"{artist.lower()} - {album.lower()}"
+ if key not in state["downloaded_albums"]:
+ state["downloaded_albums"].append(key)
+ # Keep last 100
+ state["downloaded_albums"] = state["downloaded_albums"][-100:]
+ state["last_download"] = {
+ "artist": artist,
+ "album": album,
+ "reason": reason,
+ "timestamp": datetime.now().isoformat(),
+ }
+ with open(STATE_PATH, "w") as f:
+ json.dump(state, f, indent=2)
+
+ def format_recommendations(self, recs: list[dict]) -> str:
+ lines = ["# Music Recommendations\n"]
+ lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n")
+
+ for i, rec in enumerate(recs, 1):
+ lines.append(f"{i}. **{rec['name']}**")
+ if "because_of" in rec:
+ lines.append(f" Similar to: {rec['because_of']}")
+ lines.append("")
+
+ return "\n".join(lines)
+
+ def format_gaps(self, gaps: list[dict]) -> str:
+ lines = ["# Library Gaps\n"]
+ lines.append("Artists you listen to but don't have in Navidrome:\n")
+
+ for i, gap in enumerate(gaps, 1):
+ lines.append(f"{i}. **{gap['name']}** ({gap['playcount']} plays)")
+
+ return "\n".join(lines)
+
+
+def load_config(path: Path) -> dict:
+ if not path.exists():
+ print(f"Config not found: {path}")
+ print("Create it with your API keys. See music_config.example.json")
+ sys.exit(1)
+
+ with open(path) as f:
+ return json.load(f)
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Music recommendation tool")
+ parser.add_argument("command", nargs="?", default="recommend",
+ choices=["recommend", "gaps", "recent", "search", "test",
+ "surprise", "slskd", "download"],
+ help="Command to run")
+ parser.add_argument("query", nargs="?", help="Search query")
+ parser.add_argument("--config", type=Path, default=CONFIG_PATH,
+ help="Path to config file")
+ parser.add_argument("--json", action="store_true", help="Output as JSON")
+ parser.add_argument("--wait", action="store_true",
+ help="Wait for download to complete (for overnight jobs)")
+ parser.add_argument("--timeout", type=int, default=30,
+ help="Timeout in minutes for download completion (default: 30)")
+
+ args = parser.parse_args()
+ config = load_config(args.config)
+ recommender = MusicRecommender(config)
+
+ if args.command == "test":
+ print("Testing Last.fm...", end=" ")
+ try:
+ recent = recommender.lastfm.get_recent_tracks(limit=1)
+ print(f"OK (last track: {recent[0]['track'][:30] if recent else 'none'}...)")
+ except Exception as e:
+ print(f"FAILED: {e}")
+
+ if recommender.navidrome:
+ print("Testing Navidrome...", end=" ")
+ if recommender.navidrome.ping():
+ artists = recommender.navidrome.get_artists()
+ print(f"OK ({len(artists)} artists)")
+ else:
+ print("FAILED")
+
+ if recommender.slskd:
+ print("Testing slskd...", end=" ")
+ try:
+ recommender.slskd._request("GET", "server")
+ print("OK")
+ except Exception as e:
+ print(f"FAILED: {e}")
+
+ elif args.command == "recommend":
+ recs = recommender.get_recommendations()
+ if args.json:
+ print(json.dumps(recs, indent=2))
+ else:
+ print(recommender.format_recommendations(recs))
+
+ elif args.command == "gaps":
+ gaps = recommender.find_gaps()
+ if args.json:
+ print(json.dumps(gaps, indent=2))
+ else:
+ print(recommender.format_gaps(gaps))
+
+ elif args.command == "recent":
+ recent = recommender.lastfm.get_recent_tracks(limit=20)
+ if args.json:
+ print(json.dumps(recent, indent=2))
+ else:
+ print("# Recent Tracks\n")
+ for t in recent:
+ status = "🎵 NOW" if t["now_playing"] else " "
+ print(f"{status} {t['artist']} - {t['track']}")
+
+ elif args.command == "search":
+ if not args.query:
+ print("Search requires a query")
+ sys.exit(1)
+
+ if recommender.navidrome:
+ print(f"Searching Navidrome for '{args.query}'...")
+ results = recommender.navidrome.search(args.query)
+ if results["artists"]:
+ print("\nArtists in library:")
+ for a in results["artists"][:5]:
+ print(f" - {a['name']}")
+ if results["albums"]:
+ print("\nAlbums in library:")
+ for a in results["albums"][:5]:
+ print(f" - {a['artist']} - {a['name']}")
+
+ elif args.command == "surprise":
+ result = recommender.surprise_download(
+ wait_for_completion=args.wait,
+ timeout_minutes=args.timeout,
+ )
+ if args.json:
+ print(json.dumps(result, indent=2))
+ else:
+ if result["success"]:
+ status = result.get("status", "queued")
+ print(f"\n✅ {'Download complete!' if status == 'completed' else 'Queued for download!'}")
+ print(f" {result['artist']} - {result['album']}")
+ print(f" {result['files']} files {'(FLAC)' if result['has_flac'] else '(MP3)'}")
+ print(f" From: {result['source']}")
+ if result.get("attempts", 1) > 1:
+ print(f" (took {result['attempts']} attempts)")
+ else:
+ print(f"\n❌ {result['error']}")
+ if "tried" in result:
+ print(" Tried:")
+ for t in result["tried"]:
+ print(f" - {t}")
+
+ elif args.command == "slskd":
+ if not args.query:
+ print("slskd search requires a query")
+ sys.exit(1)
+
+ if not recommender.slskd:
+ print("slskd not configured")
+ sys.exit(1)
+
+ print(f"Searching Soulseek for '{args.query}'...")
+ search_id = recommender.slskd.search(args.query)
+ recommender.slskd.wait_for_search(search_id, timeout=30)
+ # Brief pause to ensure /responses endpoint is populated
+ time.sleep(2)
+ responses = recommender.slskd.get_search_responses(search_id)
+
+ print(f"\nFound {len(responses)} users with results:\n")
+ for resp in responses[:10]:
+ username = resp.get("username", "?")
+ files = resp.get("files", [])
+ has_slot = "✓" if resp.get("hasFreeUploadSlot") else "✗"
+ print(f" {has_slot} {username}: {len(files)} files")
+
+ elif args.command == "download":
+ if not args.query:
+ print("download requires 'artist - album'")
+ sys.exit(1)
+
+ if not recommender.slskd:
+ print("slskd not configured")
+ sys.exit(1)
+
+ # Parse "artist - album"
+ if " - " in args.query:
+ artist, album = args.query.split(" - ", 1)
+ else:
+ print("Format: 'Artist - Album'")
+ sys.exit(1)
+
+ print(f"Searching for: {artist} - {album}")
+ match = recommender.slskd.search_and_find_album(artist, album)
+
+ if match:
+ print(f"\nBest match: {match['file_count']} files from {match['username']}")
+ print(f"FLAC: {'Yes' if match['has_flac'] else 'No'}")
+ print(f"Directory: {match['directory'][:60]}...")
+
+ if recommender.slskd.download_album(match):
+ print("\n✅ Download queued!")
+ else:
+ print("\n❌ Download failed")
+ else:
+ print("\n❌ No suitable results found")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/overnight_transcoder.py b/overnight_transcoder.py
new file mode 100755
index 0000000..8a6d7d0
--- /dev/null
+++ b/overnight_transcoder.py
@@ -0,0 +1,1245 @@
+#!/usr/bin/env python3
+"""
+Overnight Video Transcoder
+Transcodes large video files to HEVC to save space.
+Runs via cron between 02:00-07:00.
+
+Usage:
+ overnight_transcoder.py # Normal run
+ overnight_transcoder.py --dry # Dry run - show what would be transcoded
+ overnight_transcoder.py --stats # Show statistics
+ overnight_transcoder.py --verbose # Extra debug output
+"""
+
+import os
+import sys
+import json
+import subprocess
+import shutil
+import logging
+import argparse
+import sqlite3
+import traceback
+import re
+import signal
+from pathlib import Path
+from datetime import datetime, time
+from typing import Optional, List, Dict, Tuple
+from contextlib import contextmanager
+
+# Parse arguments
+parser = argparse.ArgumentParser(description="Overnight video transcoder")
+parser.add_argument("--dry", action="store_true", help="Dry run - don't transcode, just show what would happen")
+parser.add_argument("--stats", action="store_true", help="Show cache statistics and exit")
+parser.add_argument("--failed", action="store_true", help="Show failed transcodes and exit")
+parser.add_argument("--retry-failed", action="store_true", help="Reset failed files to pending for retry")
+parser.add_argument("--clear-cache", action="store_true", help="Clear the cache and exit")
+parser.add_argument("--verbose", "-v", action="store_true", help="Verbose debug output")
+parser.add_argument("--json-log", action="store_true", help="Output structured JSON logs (one per line)")
+args = parser.parse_args()
+
+DRY_RUN = args.dry
+VERBOSE = args.verbose
+JSON_LOG = args.json_log
+
+# Track current transcode for signal handling
+CURRENT_TRANSCODE = {"path": None, "pid": None, "start_time": None}
+
+# Configuration
+CONFIG = {
+ "source_dirs": [
+ "/disks/Plex/Anime",
+ "/disks/Plex/Films",
+ "/disks/Plex/TV",
+ ],
+ "cleanup_dir": "/disks/Plex/.cleanup",
+ "log_dir": "/var/log",
+ "log_file": "/var/log/transcoder.log",
+ "db_file": "/var/lib/transcoder/cache.db",
+ "video_extensions": [".mkv", ".mp4", ".avi", ".m4v", ".mov", ".wmv"],
+ "cutoff_time": time(6, 30), # Don't start new encodes after 06:30
+ "cleanup_days": 7, # Delete originals after 7 days
+
+ # FFmpeg settings
+ "crf": 20,
+ "preset": "slow",
+ "audio_codec": "ac3",
+ "audio_bitrate": "640k",
+ "threads": 12,
+}
+
+
+def init_db() -> sqlite3.Connection:
+ """Initialize the SQLite database for caching file info."""
+ db_path = Path(CONFIG["db_file"])
+ db_path.parent.mkdir(parents=True, exist_ok=True)
+
+ conn = sqlite3.connect(CONFIG["db_file"])
+
+ # Main files table - tracks all video files
+ conn.execute("""
+ CREATE TABLE IF NOT EXISTS files (
+ path TEXT PRIMARY KEY,
+ filename TEXT,
+ directory TEXT,
+ original_size INTEGER,
+ mtime REAL,
+ video_codec TEXT,
+ audio_codec TEXT,
+ is_hevc INTEGER,
+ first_seen REAL,
+ scanned_at REAL,
+
+ -- Transcode info (NULL if not transcoded)
+ status TEXT DEFAULT 'pending', -- pending, transcoding, done, failed, stalled
+ transcoded_at REAL,
+ transcoded_size INTEGER,
+ transcode_duration_secs REAL,
+ transcode_settings TEXT, -- JSON: {crf, preset, audio_codec, audio_bitrate}
+ failure_reason TEXT,
+
+ -- Computed (updated after transcode)
+ space_saved INTEGER,
+ compression_ratio REAL
+ )
+ """)
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_is_hevc ON files(is_hevc)")
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_status ON files(status)")
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_size ON files(original_size)")
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_directory ON files(directory)")
+
+ # Stats summary table - running totals
+ conn.execute("""
+ CREATE TABLE IF NOT EXISTS stats (
+ id INTEGER PRIMARY KEY CHECK (id = 1), -- Single row
+ total_files_transcoded INTEGER DEFAULT 0,
+ total_space_saved INTEGER DEFAULT 0,
+ total_transcode_time_secs REAL DEFAULT 0,
+ last_updated REAL
+ )
+ """)
+ conn.execute("INSERT OR IGNORE INTO stats (id) VALUES (1)")
+
+ conn.commit()
+ return conn
+
+
+def get_audio_codec(filepath: str) -> Optional[str]:
+ """Get the audio codec of a file using ffprobe."""
+ try:
+ result = subprocess.run([
+ "ffprobe", "-v", "quiet",
+ "-select_streams", "a:0",
+ "-show_entries", "stream=codec_name",
+ "-of", "json",
+ filepath
+ ], capture_output=True, text=True, timeout=30)
+
+ if result.returncode == 0:
+ data = json.loads(result.stdout)
+ streams = data.get("streams", [])
+ if streams:
+ return streams[0].get("codec_name", "").lower()
+ except Exception as e:
+ log.warning(f"Failed to probe audio {filepath}: {e}")
+ return None
+
+
+def get_cached_file(conn: sqlite3.Connection, filepath: str, size: int, mtime: float) -> Optional[dict]:
+ """Get cached file info if file hasn't changed. Returns dict or None."""
+ cursor = conn.execute(
+ """SELECT video_codec, audio_codec, is_hevc, status
+ FROM files WHERE path = ? AND original_size = ? AND mtime = ?""",
+ (filepath, size, mtime)
+ )
+ row = cursor.fetchone()
+ if row:
+ return {
+ "video_codec": row[0],
+ "audio_codec": row[1],
+ "is_hevc": bool(row[2]),
+ "status": row[3]
+ }
+ return None
+
+
+def cache_file(conn: sqlite3.Connection, filepath: str, size: int, mtime: float,
+ video_codec: str, audio_codec: str, is_hevc: bool):
+ """Cache file info."""
+ p = Path(filepath)
+ now = datetime.now().timestamp()
+
+ # Check if file already exists (to preserve first_seen)
+ cursor = conn.execute("SELECT first_seen FROM files WHERE path = ?", (filepath,))
+ row = cursor.fetchone()
+ first_seen = row[0] if row else now
+
+ conn.execute("""
+ INSERT OR REPLACE INTO files
+ (path, filename, directory, original_size, mtime, video_codec, audio_codec,
+ is_hevc, first_seen, scanned_at, status)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
+ COALESCE((SELECT status FROM files WHERE path = ?), 'pending'))
+ """, (filepath, p.name, str(p.parent), size, mtime, video_codec, audio_codec,
+ int(is_hevc), first_seen, now, filepath))
+ conn.commit()
+
+
+def update_transcode_result(conn: sqlite3.Connection, filepath: str, new_path: str,
+ new_size: int, duration_secs: float, settings: dict,
+ success: bool, failure_reason: str = None):
+ """Update database after transcode attempt."""
+ now = datetime.now().timestamp()
+
+ if success:
+ cursor = conn.execute("SELECT original_size FROM files WHERE path = ?", (filepath,))
+ row = cursor.fetchone()
+ original_size = row[0] if row else 0
+
+ space_saved = original_size - new_size
+ compression_ratio = new_size / original_size if original_size > 0 else 0
+
+ conn.execute("""
+ UPDATE files SET
+ status = 'done',
+ transcoded_at = ?,
+ transcoded_size = ?,
+ transcode_duration_secs = ?,
+ transcode_settings = ?,
+ space_saved = ?,
+ compression_ratio = ?,
+ failure_reason = NULL
+ WHERE path = ?
+ """, (now, new_size, duration_secs, json.dumps(settings),
+ space_saved, compression_ratio, filepath))
+
+ # Update running totals
+ conn.execute("""
+ UPDATE stats SET
+ total_files_transcoded = total_files_transcoded + 1,
+ total_space_saved = total_space_saved + ?,
+ total_transcode_time_secs = total_transcode_time_secs + ?,
+ last_updated = ?
+ WHERE id = 1
+ """, (space_saved, duration_secs, now))
+ else:
+ conn.execute("""
+ UPDATE files SET
+ status = 'failed',
+ failure_reason = ?
+ WHERE path = ?
+ """, (failure_reason, filepath))
+
+ conn.commit()
+
+
+def remove_from_cache(conn: sqlite3.Connection, filepath: str):
+ """Remove a file from the cache."""
+ conn.execute("DELETE FROM files WHERE path = ?", (filepath,))
+ conn.commit()
+
+
+def get_cache_stats(conn: sqlite3.Connection) -> dict:
+ """Get comprehensive stats from the cache."""
+ # File counts and sizes
+ cursor = conn.execute("""
+ SELECT
+ COUNT(*) as total,
+ SUM(CASE WHEN is_hevc = 1 THEN 1 ELSE 0 END) as hevc_count,
+ SUM(CASE WHEN is_hevc = 0 AND status = 'pending' THEN 1 ELSE 0 END) as pending_count,
+ SUM(CASE WHEN status = 'done' THEN 1 ELSE 0 END) as done_count,
+ SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed_count,
+ SUM(CASE WHEN is_hevc = 1 THEN original_size ELSE 0 END) as hevc_size,
+ SUM(CASE WHEN is_hevc = 0 AND status = 'pending' THEN original_size ELSE 0 END) as pending_size
+ FROM files
+ """)
+ row = cursor.fetchone()
+ file_stats = {
+ "total": row[0] or 0,
+ "hevc_count": row[1] or 0,
+ "pending_count": row[2] or 0,
+ "done_count": row[3] or 0,
+ "failed_count": row[4] or 0,
+ "hevc_size": row[5] or 0,
+ "pending_size": row[6] or 0,
+ }
+
+ # Running totals
+ cursor = conn.execute("SELECT * FROM stats WHERE id = 1")
+ row = cursor.fetchone()
+ if row:
+ file_stats["total_transcoded"] = row[1] or 0
+ file_stats["total_space_saved"] = row[2] or 0
+ file_stats["total_transcode_time"] = row[3] or 0
+
+ # By directory breakdown
+ cursor = conn.execute("""
+ SELECT directory,
+ COUNT(*) as count,
+ SUM(CASE WHEN is_hevc = 0 AND status = 'pending' THEN original_size ELSE 0 END) as pending_size
+ FROM files
+ GROUP BY directory
+ ORDER BY pending_size DESC
+ LIMIT 10
+ """)
+ file_stats["by_directory"] = cursor.fetchall()
+
+ return file_stats
+
+# Ensure log directory exists (skip for /var/log which requires root)
+if not CONFIG["log_dir"].startswith("/var/"):
+ Path(CONFIG["log_dir"]).mkdir(parents=True, exist_ok=True)
+
+# Set up logging
+log_level = logging.DEBUG if VERBOSE else logging.INFO
+logging.basicConfig(
+ level=log_level,
+ format='%(asctime)s [%(levelname)s] %(message)s',
+ handlers=[
+ logging.FileHandler(CONFIG["log_file"]),
+ logging.StreamHandler(sys.stdout)
+ ]
+)
+log = logging.getLogger(__name__)
+
+
+def log_json(event: str, **data):
+ """Write structured JSON log entry (for machine parsing)."""
+ if not JSON_LOG:
+ return
+ entry = {
+ "ts": datetime.now().isoformat(),
+ "event": event,
+ **data
+ }
+ print(json.dumps(entry), file=sys.stderr)
+
+
+def format_bytes(size: int) -> str:
+ """Human-readable byte size."""
+ for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
+ if abs(size) < 1024.0:
+ return f"{size:.2f} {unit}"
+ size /= 1024.0
+ return f"{size:.2f} PB"
+
+
+def format_duration(seconds: float) -> str:
+ """Human-readable duration."""
+ if seconds < 60:
+ return f"{seconds:.1f}s"
+ elif seconds < 3600:
+ return f"{seconds/60:.1f}m"
+ else:
+ return f"{seconds/3600:.1f}h"
+
+
+def get_disk_space(path: str) -> dict:
+ """Get disk space info for a path."""
+ try:
+ stat = os.statvfs(path)
+ total = stat.f_blocks * stat.f_frsize
+ free = stat.f_bavail * stat.f_frsize
+ used = total - free
+ return {
+ "total": total,
+ "free": free,
+ "used": used,
+ "percent_used": (used / total * 100) if total > 0 else 0
+ }
+ except Exception as e:
+ log.warning(f"Failed to get disk space for {path}: {e}")
+ return None
+
+
+def check_disk_space():
+ """Log disk space for all source directories."""
+ log.info("=" * 50)
+ log.info("Disk Space Check")
+ log.info("=" * 50)
+
+ checked_mounts = set()
+ for source_dir in CONFIG["source_dirs"]:
+ # Get mount point to avoid duplicate checks
+ try:
+ mount = subprocess.run(
+ ["df", "--output=target", source_dir],
+ capture_output=True, text=True
+ ).stdout.strip().split('\n')[-1]
+ except:
+ mount = source_dir
+
+ if mount in checked_mounts:
+ continue
+ checked_mounts.add(mount)
+
+ space = get_disk_space(source_dir)
+ if space:
+ log.info(f" {mount}:")
+ log.info(f" Total: {format_bytes(space['total'])}")
+ log.info(f" Free: {format_bytes(space['free'])} ({100-space['percent_used']:.1f}% available)")
+ log_json("disk_space", mount=mount, **space)
+
+ if space['percent_used'] > 90:
+ log.warning(f" ⚠️ LOW DISK SPACE on {mount}!")
+ log.info("")
+
+
+def get_video_duration(filepath: str) -> Optional[float]:
+ """Get video duration in seconds using ffprobe."""
+ try:
+ result = subprocess.run([
+ "ffprobe", "-v", "quiet",
+ "-show_entries", "format=duration",
+ "-of", "json",
+ filepath
+ ], capture_output=True, text=True, timeout=30)
+
+ if result.returncode == 0:
+ data = json.loads(result.stdout)
+ return float(data.get("format", {}).get("duration", 0))
+ except Exception as e:
+ log.debug(f"Failed to get duration for {filepath}: {e}")
+ return None
+
+
+def handle_interrupt(signum, frame):
+ """Handle SIGINT/SIGTERM gracefully."""
+ log.warning("=" * 50)
+ log.warning("INTERRUPT RECEIVED - cleaning up...")
+ log.warning("=" * 50)
+
+ if CURRENT_TRANSCODE["path"]:
+ log.warning(f"Interrupted transcode: {CURRENT_TRANSCODE['path']}")
+ if CURRENT_TRANSCODE["start_time"]:
+ elapsed = (datetime.now() - CURRENT_TRANSCODE["start_time"]).total_seconds()
+ log.warning(f"Elapsed time: {format_duration(elapsed)}")
+
+ # Try to kill ffmpeg if running
+ if CURRENT_TRANSCODE["pid"]:
+ try:
+ os.kill(CURRENT_TRANSCODE["pid"], signal.SIGTERM)
+ log.info("Sent SIGTERM to ffmpeg process")
+ except:
+ pass
+
+ # Clean up partial output
+ temp_path = Path(CURRENT_TRANSCODE["path"]).with_suffix(".transcoding.mkv")
+ if temp_path.exists():
+ log.info(f"Removing partial output: {temp_path}")
+ temp_path.unlink()
+
+ log_json("interrupted",
+ file=CURRENT_TRANSCODE["path"],
+ elapsed=elapsed if CURRENT_TRANSCODE["start_time"] else None)
+ sys.exit(1)
+
+
+# Register signal handlers
+signal.signal(signal.SIGINT, handle_interrupt)
+signal.signal(signal.SIGTERM, handle_interrupt)
+
+
+def get_video_codec(filepath: str) -> Optional[str]:
+ """Get the video codec of a file using ffprobe."""
+ try:
+ result = subprocess.run([
+ "ffprobe", "-v", "quiet",
+ "-select_streams", "v:0",
+ "-show_entries", "stream=codec_name",
+ "-of", "json",
+ filepath
+ ], capture_output=True, text=True, timeout=30)
+
+ if result.returncode == 0:
+ data = json.loads(result.stdout)
+ streams = data.get("streams", [])
+ if streams:
+ return streams[0].get("codec_name", "").lower()
+ except Exception as e:
+ log.warning(f"Failed to probe {filepath}: {e}")
+ return None
+
+
+def is_hevc(filepath: str) -> bool:
+ """Check if a file is already HEVC encoded."""
+ codec = get_video_codec(filepath)
+ return codec in ["hevc", "h265"]
+
+
+def get_file_size_gb(filepath: str) -> float:
+ """Get file size in GB."""
+ return os.path.getsize(filepath) / (1024 ** 3)
+
+
+def find_videos_to_transcode(conn: sqlite3.Connection) -> List[Dict]:
+ """Find all non-HEVC videos, sorted by size (biggest first). Uses cache for speed."""
+ videos = []
+ cache_hits = 0
+ cache_misses = 0
+ files_scanned = 0
+ dirs_scanned = 0
+
+ scan_start = datetime.now()
+ log.info("Scanning source directories...")
+
+ for source_dir in CONFIG["source_dirs"]:
+ if not os.path.exists(source_dir):
+ log.warning(f" ⚠ Source directory not found: {source_dir}")
+ continue
+
+ dir_start = datetime.now()
+ dir_files = 0
+ dir_size = 0
+ log.info(f" 📁 Scanning: {source_dir}")
+
+ for root, dirs, files in os.walk(source_dir):
+ # Skip cleanup directory
+ if ".cleanup" in root:
+ continue
+
+ dirs_scanned += 1
+
+ for filename in files:
+ ext = os.path.splitext(filename)[1].lower()
+ if ext not in CONFIG["video_extensions"]:
+ continue
+
+ filepath = os.path.join(root, filename)
+ files_scanned += 1
+ dir_files += 1
+
+ try:
+ stat = os.stat(filepath)
+ size = stat.st_size
+ mtime = stat.st_mtime
+ dir_size += size
+
+ # Check cache first
+ cached = get_cached_file(conn, filepath, size, mtime)
+ if cached:
+ video_codec = cached["video_codec"]
+ audio_codec = cached["audio_codec"]
+ is_hevc = cached["is_hevc"]
+ status = cached["status"]
+ cache_hits += 1
+ else:
+ # Need to probe this file
+ log.debug(f" Probing: {filename}")
+ video_codec = get_video_codec(filepath)
+ audio_codec = get_audio_codec(filepath)
+ is_hevc = video_codec in ["hevc", "h265"] if video_codec else False
+ status = "done" if is_hevc else "pending"
+ cache_file(conn, filepath, size, mtime,
+ video_codec or "unknown", audio_codec or "unknown", is_hevc)
+ cache_misses += 1
+
+ videos.append({
+ "path": filepath,
+ "size": size,
+ "size_gb": size / (1024 ** 3),
+ "video_codec": video_codec,
+ "audio_codec": audio_codec,
+ "is_hevc": is_hevc,
+ "status": status
+ })
+ except OSError as e:
+ log.warning(f" ⚠ Cannot access {filepath}: {e}")
+
+ dir_elapsed = (datetime.now() - dir_start).total_seconds()
+ log.info(f" Found {dir_files} videos ({format_bytes(dir_size)}) in {format_duration(dir_elapsed)}")
+ log_json("scan_directory",
+ directory=source_dir,
+ files=dir_files,
+ size=dir_size,
+ elapsed=dir_elapsed)
+
+ scan_elapsed = (datetime.now() - scan_start).total_seconds()
+ cache_rate = (cache_hits / (cache_hits + cache_misses) * 100) if (cache_hits + cache_misses) > 0 else 0
+
+ log.info("")
+ log.info(f"Scan complete in {format_duration(scan_elapsed)}")
+ log.info(f" Files scanned: {files_scanned}")
+ log.info(f" Dirs traversed: {dirs_scanned}")
+ log.info(f" Cache hits: {cache_hits} ({cache_rate:.1f}%)")
+ log.info(f" Cache misses: {cache_misses} (probed)")
+ log.info("")
+
+ log_json("scan_complete",
+ files=files_scanned,
+ dirs=dirs_scanned,
+ cache_hits=cache_hits,
+ cache_misses=cache_misses,
+ elapsed=scan_elapsed)
+
+ # Sort by size descending (biggest first)
+ videos.sort(key=lambda x: x["size"], reverse=True)
+ return videos
+
+
+def is_past_cutoff() -> bool:
+ """Check if we're past the cutoff time for starting new encodes."""
+ now = datetime.now().time()
+ return now >= CONFIG["cutoff_time"]
+
+
+def verify_output(filepath: str, min_size_ratio: float = 0.1) -> bool:
+ """Verify the transcoded file is valid."""
+ if not os.path.exists(filepath):
+ return False
+
+ # Check file size is reasonable (at least 10% of some minimum)
+ size = os.path.getsize(filepath)
+ if size < 1024 * 1024: # Less than 1MB is definitely wrong
+ return False
+
+ # Check it's valid video with ffprobe
+ codec = get_video_codec(filepath)
+ if codec != "hevc":
+ return False
+
+ return True
+
+
+def transcode_file(input_path: str, conn: sqlite3.Connection) -> Optional[dict]:
+ """Transcode a single file to HEVC. Returns result dict on success."""
+
+ input_file = Path(input_path)
+ output_path = input_file.with_suffix(".transcoding.mkv")
+ final_path = input_file.with_suffix(".mkv")
+
+ original_size = os.path.getsize(input_path)
+ video_duration = get_video_duration(input_path)
+
+ log.info("")
+ log.info("=" * 60)
+ log.info(f"TRANSCODE START: {input_file.name}")
+ log.info("=" * 60)
+ log.info(f" Input path: {input_path}")
+ log.info(f" Output path: {output_path}")
+ log.info(f" Original size: {format_bytes(original_size)}")
+ if video_duration:
+ log.info(f" Duration: {format_duration(video_duration)}")
+
+ log_json("transcode_start",
+ file=input_path,
+ size=original_size,
+ duration=video_duration)
+
+ # Mark as transcoding in DB
+ conn.execute("UPDATE files SET status = 'transcoding' WHERE path = ?", (input_path,))
+ conn.commit()
+
+ start_time = datetime.now()
+ CURRENT_TRANSCODE["path"] = input_path
+ CURRENT_TRANSCODE["start_time"] = start_time
+
+ settings = {
+ "crf": CONFIG["crf"],
+ "preset": CONFIG["preset"],
+ "audio_codec": CONFIG["audio_codec"],
+ "audio_bitrate": CONFIG["audio_bitrate"],
+ "threads": CONFIG["threads"]
+ }
+
+ # Build ffmpeg command
+ cmd = [
+ "ffmpeg", "-y",
+ "-threads", str(CONFIG["threads"]),
+ "-progress", "pipe:1", # Progress to stdout for parsing
+ "-i", input_path,
+ "-c:v", "libx265",
+ "-preset", CONFIG["preset"],
+ "-crf", str(CONFIG["crf"]),
+ "-x265-params", f"pools={CONFIG['threads']}",
+ "-c:a", CONFIG["audio_codec"],
+ "-b:a", CONFIG["audio_bitrate"],
+ "-c:s", "copy", # Copy subtitles
+ str(output_path)
+ ]
+
+ # Log the full command for debugging
+ cmd_str = ' '.join(f'"{c}"' if ' ' in c else c for c in cmd)
+ log.info(f" FFmpeg command:")
+ log.debug(f" {cmd_str}")
+ if not VERBOSE:
+ log.info(f" (use --verbose to see full command)")
+ log.info("")
+ log_json("ffmpeg_command", command=cmd_str)
+
+ # Log stderr to file to prevent pipe buffer deadlock
+ stderr_log_path = Path(CONFIG["log_dir"]) / "transcoder_ffmpeg_stderr.log"
+ stderr_file = open(stderr_log_path, "a")
+ stderr_file.write(f"\n{'='*60}\n{datetime.now()} - {input_path}\n{'='*60}\n")
+ stderr_file.flush()
+
+ try:
+ # Use Popen for progress monitoring
+ # stderr goes to file (not PIPE) to prevent buffer deadlock
+ process = subprocess.Popen(
+ cmd,
+ stdout=subprocess.PIPE,
+ stderr=stderr_file,
+ text=True
+ )
+ CURRENT_TRANSCODE["pid"] = process.pid
+ log.debug(f" FFmpeg PID: {process.pid}")
+
+ # Monitor progress
+ last_progress_log = datetime.now()
+ progress_interval = 60 # Log progress every 60 seconds
+ current_time_us = 0
+
+ import select
+ last_output_size = 0
+ last_output_growth = datetime.now()
+ stall_timeout = 600 # 10 minutes with no output growth = stalled
+
+ while True:
+ # Use select for non-blocking read with timeout
+ ready, _, _ = select.select([process.stdout], [], [], 30)
+
+ if ready:
+ line = process.stdout.readline()
+ if not line and process.poll() is not None:
+ break
+
+ # Parse progress output
+ if line.startswith("out_time_us="):
+ try:
+ current_time_us = int(line.split("=")[1])
+ except:
+ pass
+ elif process.poll() is not None:
+ break
+
+ # Log progress periodically
+ now = datetime.now()
+ if (now - last_progress_log).total_seconds() >= progress_interval:
+ elapsed = (now - start_time).total_seconds()
+
+ if video_duration and current_time_us > 0:
+ current_secs = current_time_us / 1_000_000
+ percent = (current_secs / video_duration) * 100
+ eta_secs = (elapsed / percent * 100) - elapsed if percent > 0 else 0
+
+ log.info(f" ⏳ Progress: {percent:.1f}% | Elapsed: {format_duration(elapsed)} | ETA: {format_duration(eta_secs)}")
+ log_json("transcode_progress",
+ file=input_path,
+ percent=percent,
+ elapsed=elapsed,
+ eta=eta_secs)
+ else:
+ log.info(f" ⏳ Elapsed: {format_duration(elapsed)}")
+
+ # Check output size growth and detect stalls
+ if output_path.exists():
+ current_size = os.path.getsize(output_path)
+ log.info(f" Output size: {format_bytes(current_size)}")
+
+ if current_size > last_output_size:
+ last_output_size = current_size
+ last_output_growth = now
+ elif (now - last_output_growth).total_seconds() > stall_timeout:
+ log.error(f" 🛑 STALL DETECTED: Output file hasn't grown in {stall_timeout}s")
+ log.error(f" Output stuck at {format_bytes(current_size)}")
+ log.error(f" Killing ffmpeg (PID {process.pid})")
+ process.kill()
+ process.wait(timeout=30)
+
+ # Mark as stalled in DB with timestamp for 7-day cooldown
+ conn.execute(
+ "UPDATE files SET status = 'stalled', failure_reason = ?, transcoded_at = ? WHERE path = ?",
+ (f"Output stalled at {format_bytes(current_size)} after {format_duration(elapsed)}",
+ datetime.now().timestamp(), input_path)
+ )
+ conn.commit()
+
+ # Clean up partial output
+ if output_path.exists():
+ output_path.unlink()
+ log.info(f" Cleaned up partial file: {output_path}")
+
+ stderr_file.close()
+ return None
+
+ last_progress_log = now
+
+ # Wait for process to finish (stderr already going to file)
+ process.wait(timeout=60)
+ stderr_file.close()
+
+ if process.returncode != 0:
+ # Read last 1000 chars of stderr log for error reporting
+ try:
+ with open(stderr_log_path, 'r') as f:
+ f.seek(max(0, os.path.getsize(stderr_log_path) - 1000))
+ error_msg = f.read()
+ except:
+ error_msg = "Unknown error (check transcoder_ffmpeg_stderr.log)"
+ log.error("=" * 60)
+ log.error("TRANSCODE FAILED")
+ log.error("=" * 60)
+ log.error(f" File: {input_path}")
+ log.error(f" Exit code: {process.returncode}")
+ log.error(f" FFmpeg error output:")
+ for line in error_msg.split('\n')[-20:]: # Last 20 lines
+ if line.strip():
+ log.error(f" {line}")
+ log.error("=" * 60)
+
+ log_json("transcode_failed",
+ file=input_path,
+ exit_code=process.returncode,
+ error=error_msg[-500:])
+
+ if output_path.exists():
+ output_path.unlink()
+ update_transcode_result(conn, input_path, None, 0, 0, settings, False, error_msg[-500:])
+ CURRENT_TRANSCODE["path"] = None
+ return None
+
+ except subprocess.TimeoutExpired:
+ log.error("=" * 60)
+ log.error(f"TRANSCODE TIMEOUT (8 hours): {input_path}")
+ log.error("=" * 60)
+ log_json("transcode_timeout", file=input_path)
+ process.kill()
+ if output_path.exists():
+ output_path.unlink()
+ update_transcode_result(conn, input_path, None, 0, 0, settings, False, "Timeout (8 hours)")
+ CURRENT_TRANSCODE["path"] = None
+ return None
+ except Exception as e:
+ log.error("=" * 60)
+ log.error(f"TRANSCODE ERROR: {input_path}")
+ log.error("=" * 60)
+ log.error(f" Exception: {type(e).__name__}: {e}")
+ log.error(f" Traceback:")
+ for line in traceback.format_exc().split('\n'):
+ if line.strip():
+ log.error(f" {line}")
+ log.error("=" * 60)
+
+ log_json("transcode_error",
+ file=input_path,
+ error=str(e),
+ traceback=traceback.format_exc())
+
+ if output_path.exists():
+ output_path.unlink()
+ update_transcode_result(conn, input_path, None, 0, 0, settings, False, str(e))
+ CURRENT_TRANSCODE["path"] = None
+ return None
+
+ # Verify output
+ log.info(" Verifying output file...")
+ if not verify_output(str(output_path)):
+ log.error("=" * 60)
+ log.error("OUTPUT VERIFICATION FAILED")
+ log.error("=" * 60)
+ log.error(f" File: {output_path}")
+ if output_path.exists():
+ log.error(f" Output size: {format_bytes(os.path.getsize(output_path))}")
+ output_codec = get_video_codec(str(output_path))
+ log.error(f" Output codec: {output_codec}")
+ else:
+ log.error(" Output file does not exist!")
+ log.error("=" * 60)
+
+ log_json("transcode_verify_failed", file=input_path)
+ if output_path.exists():
+ output_path.unlink()
+ update_transcode_result(conn, input_path, None, 0, 0, settings, False, "Output verification failed")
+ CURRENT_TRANSCODE["path"] = None
+ return None
+
+ new_size = os.path.getsize(output_path)
+ duration_secs = (datetime.now() - start_time).total_seconds()
+ space_saved = original_size - new_size
+ compression_ratio = new_size / original_size if original_size > 0 else 0
+ encode_speed = video_duration / duration_secs if video_duration and duration_secs > 0 else 0
+
+ log.info("")
+ log.info("=" * 60)
+ log.info("✅ TRANSCODE COMPLETE")
+ log.info("=" * 60)
+ log.info(f" File: {input_file.name}")
+ log.info(f" Duration: {format_duration(duration_secs)}")
+ log.info(f" Original: {format_bytes(original_size)}")
+ log.info(f" New size: {format_bytes(new_size)}")
+ log.info(f" Saved: {format_bytes(space_saved)} ({100*(1-compression_ratio):.1f}% reduction)")
+ log.info(f" Ratio: {compression_ratio:.2f}x")
+ if encode_speed > 0:
+ log.info(f" Speed: {encode_speed:.2f}x realtime")
+ log.info("=" * 60)
+ log.info("")
+
+ log_json("transcode_complete",
+ file=input_path,
+ original_size=original_size,
+ new_size=new_size,
+ space_saved=space_saved,
+ duration_secs=duration_secs,
+ compression_ratio=compression_ratio,
+ encode_speed=encode_speed)
+
+ # Move original to cleanup directory
+ cleanup_dir = Path(CONFIG["cleanup_dir"])
+ cleanup_dir.mkdir(parents=True, exist_ok=True)
+
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ cleanup_name = f"{timestamp}_{input_file.name}"
+ cleanup_path = cleanup_dir / cleanup_name
+
+ log.info(f" Moving original to cleanup: {cleanup_path}")
+ try:
+ shutil.move(input_path, cleanup_path)
+ log.info(f" ✓ Original moved successfully")
+ log_json("original_moved", source=input_path, dest=str(cleanup_path))
+ except Exception as e:
+ log.error(f" ✗ Failed to move original: {e}")
+ log.error(f" Traceback: {traceback.format_exc()}")
+ # Don't fail the whole operation, but keep both files
+
+ # Rename output to final name
+ log.info(f" Renaming output: {output_path} -> {final_path}")
+ try:
+ output_path.rename(final_path)
+ log.info(f" ✓ Output renamed successfully")
+ except Exception as e:
+ log.error(f" ✗ Failed to rename output: {e}")
+ final_path = output_path
+
+ # Fix permissions - mediaserver group
+ log.debug(" Setting permissions (mediaserver:664)")
+ try:
+ subprocess.run(["chgrp", "mediaserver", str(final_path)], check=False)
+ subprocess.run(["chmod", "664", str(final_path)], check=False)
+ log.debug(" ✓ Permissions set")
+ except Exception as e:
+ log.warning(f" ✗ Failed to set permissions: {e}")
+
+ # Update database with success
+ update_transcode_result(conn, input_path, str(final_path), new_size, duration_secs, settings, True)
+
+ CURRENT_TRANSCODE["path"] = None
+ return {
+ "output_path": str(final_path),
+ "new_size": new_size,
+ "duration_secs": duration_secs,
+ "space_saved": space_saved
+ }
+
+
+def cleanup_old_files():
+ """Delete original files older than cleanup_days."""
+ cleanup_dir = Path(CONFIG["cleanup_dir"])
+ if not cleanup_dir.exists():
+ log.debug("Cleanup directory doesn't exist, skipping cleanup")
+ return
+
+ log.info("Checking cleanup directory for old originals...")
+ cutoff = datetime.now().timestamp() - (CONFIG["cleanup_days"] * 24 * 3600)
+ cutoff_date = datetime.fromtimestamp(cutoff).strftime('%Y-%m-%d %H:%M')
+ log.debug(f" Cutoff date: {cutoff_date} ({CONFIG['cleanup_days']} days ago)")
+
+ deleted_count = 0
+ deleted_size = 0
+ kept_count = 0
+ kept_size = 0
+
+ for filepath in cleanup_dir.iterdir():
+ if filepath.is_file():
+ mtime = filepath.stat().st_mtime
+ size = filepath.stat().st_size
+
+ if mtime < cutoff:
+ try:
+ filepath.unlink()
+ deleted_count += 1
+ deleted_size += size
+ log.info(f" 🗑️ Deleted: {filepath.name} ({format_bytes(size)})")
+ log_json("cleanup_deleted", file=str(filepath), size=size)
+ except Exception as e:
+ log.warning(f" ⚠ Failed to delete {filepath}: {e}")
+ else:
+ kept_count += 1
+ kept_size += size
+ days_old = (datetime.now().timestamp() - mtime) / 86400
+ log.debug(f" ⏳ Keeping: {filepath.name} ({days_old:.1f} days old)")
+
+ if deleted_count > 0:
+ log.info(f" Cleanup: deleted {deleted_count} files, freed {format_bytes(deleted_size)}")
+ log_json("cleanup_complete", deleted=deleted_count, freed=deleted_size)
+ else:
+ log.info(f" Cleanup: nothing to delete ({kept_count} files still in retention)")
+
+ if kept_count > 0:
+ log.debug(f" Retention: {kept_count} files ({format_bytes(kept_size)}) waiting)")
+
+
+def main():
+ # Initialize database
+ conn = init_db()
+
+ # Handle --stats flag
+ if args.stats:
+ stats = get_cache_stats(conn)
+ print("=" * 60)
+ print("Transcoder Statistics")
+ print("=" * 60)
+ print(f"Total files scanned: {stats['total']}")
+ print(f"Already HEVC: {stats['hevc_count']} ({stats['hevc_size']/(1024**3):.2f} GB)")
+ print(f"Pending transcode: {stats['pending_count']} ({stats['pending_size']/(1024**3):.2f} GB)")
+ print(f"Completed: {stats['done_count']}")
+ print(f"Failed: {stats['failed_count']}")
+ print("")
+ print("--- Lifetime Stats ---")
+ print(f"Total transcoded: {stats.get('total_transcoded', 0)} files")
+ print(f"Total space saved: {stats.get('total_space_saved', 0)/(1024**3):.2f} GB")
+ total_time = stats.get('total_transcode_time', 0)
+ print(f"Total transcode time: {total_time/3600:.1f} hours")
+ if stats['pending_count'] > 0:
+ est_savings = stats['pending_size'] * 0.5
+ print(f"\nEstimated remaining: ~{est_savings/(1024**3):.2f} GB savings")
+
+ if stats.get('by_directory'):
+ print("\n--- Top Directories (by pending size) ---")
+ for dir_path, count, pending in stats['by_directory'][:5]:
+ if pending > 0:
+ # Shorten path for display
+ short = dir_path.replace("/disks/Plex/", "")
+ print(f" {short}: {pending/(1024**3):.2f} GB pending")
+
+ print("=" * 60)
+ conn.close()
+ return
+
+ # Handle --clear-cache flag
+ if args.clear_cache:
+ conn.execute("DELETE FROM files")
+ conn.execute("UPDATE stats SET total_files_transcoded=0, total_space_saved=0, total_transcode_time_secs=0")
+ conn.commit()
+ print("Cache cleared.")
+ conn.close()
+ return
+
+ # Handle --failed flag
+ if args.failed:
+ cursor = conn.execute("""
+ SELECT path, original_size, failure_reason
+ FROM files WHERE status = 'failed'
+ ORDER BY original_size DESC
+ """)
+ rows = cursor.fetchall()
+ print("=" * 60)
+ print(f"Failed Transcodes ({len(rows)} files)")
+ print("=" * 60)
+ for path, size, reason in rows:
+ print(f"\n{path}")
+ print(f" Size: {size/(1024**3):.2f} GB")
+ print(f" Reason: {reason}")
+ if not rows:
+ print("No failed transcodes!")
+ print("=" * 60)
+ conn.close()
+ return
+
+ # Handle --retry-failed flag
+ if args.retry_failed:
+ cursor = conn.execute("UPDATE files SET status = 'pending', failure_reason = NULL WHERE status = 'failed'")
+ count = cursor.rowcount
+ conn.commit()
+ print(f"Reset {count} failed files to pending.")
+ conn.close()
+ return
+
+ session_start = datetime.now()
+
+ log.info("")
+ log.info("=" * 70)
+ log.info(" OVERNIGHT VIDEO TRANSCODER")
+ log.info("=" * 70)
+ log.info(f" Started: {session_start.strftime('%Y-%m-%d %H:%M:%S')}")
+ log.info(f" Mode: {'DRY RUN' if DRY_RUN else 'LIVE'}")
+ log.info(f" Verbose: {'YES' if VERBOSE else 'NO'}")
+ log.info(f" JSON log: {'YES' if JSON_LOG else 'NO'}")
+ log.info(f" Cutoff: {CONFIG['cutoff_time']}")
+ log.info(f" Settings: CRF={CONFIG['crf']}, preset={CONFIG['preset']}, audio={CONFIG['audio_codec']}@{CONFIG['audio_bitrate']}")
+ log.info("=" * 70)
+ log.info("")
+
+ log_json("session_start",
+ mode="dry_run" if DRY_RUN else "live",
+ config=CONFIG)
+
+ # Check disk space first
+ check_disk_space()
+
+ # Run cleanup first (skip in dry run)
+ if not DRY_RUN:
+ cleanup_old_files()
+
+ # Find videos to transcode
+ videos = find_videos_to_transcode(conn)
+
+ # Filter to non-HEVC pending only (using cached codec info)
+ to_transcode = []
+ hevc_count = 0
+ hevc_size = 0
+
+ for video in videos:
+ if video.get("is_hevc", False) or video.get("status") == "done":
+ hevc_count += 1
+ hevc_size += video["size"]
+ elif video.get("status") == "pending":
+ to_transcode.append(video)
+
+ log.info(f"Already HEVC: {hevc_count} files ({hevc_size / (1024**3):.2f} GB)")
+ log.info(f"Need transcoding: {len(to_transcode)} files")
+
+ if DRY_RUN:
+ log.info("")
+ log.info("=" * 60)
+ log.info("DRY RUN - Files that would be transcoded (biggest first):")
+ log.info("=" * 60)
+
+ total_size = 0
+ estimated_savings = 0
+
+ for i, video in enumerate(to_transcode[:20], 1): # Show top 20
+ size_gb = video["size_gb"]
+ total_size += video["size"]
+ # Estimate ~50% compression for HEVC
+ est_saved = video["size"] * 0.5
+ estimated_savings += est_saved
+
+ log.info(f"{i:2}. [{size_gb:6.2f} GB] {video['path']}")
+
+ if len(to_transcode) > 20:
+ log.info(f" ... and {len(to_transcode) - 20} more files")
+
+ log.info("")
+ log.info("=" * 60)
+ log.info(f"Total to transcode: {total_size / (1024**3):.2f} GB")
+ log.info(f"Estimated savings (~50%): {estimated_savings / (1024**3):.2f} GB")
+ log.info("=" * 60)
+ return
+
+ # Normal run
+ transcoded_count = 0
+ total_saved = 0
+ failed_count = 0
+ skipped_count = 0
+
+ log.info("")
+ log.info("=" * 60)
+ log.info("Starting transcode queue")
+ log.info("=" * 60)
+ log.info(f" Files to process: {len(to_transcode)}")
+ log.info(f" Total size: {format_bytes(sum(v['size'] for v in to_transcode))}")
+ log.info("")
+
+ for i, video in enumerate(to_transcode, 1):
+ # Check cutoff time
+ if is_past_cutoff():
+ remaining = len(to_transcode) - i + 1
+ log.info("")
+ log.info("=" * 60)
+ log.info(f"⏰ CUTOFF TIME REACHED ({CONFIG['cutoff_time']})")
+ log.info(f" Stopping with {remaining} files remaining in queue")
+ log.info("=" * 60)
+ log_json("cutoff_reached", remaining_files=remaining)
+ break
+
+ # Skip already done or failed
+ if video.get("status") in ["done", "failed"]:
+ log.debug(f" Skipping (status={video['status']}): {video['path']}")
+ skipped_count += 1
+ continue
+
+ # Skip stalled files unless 7 days have passed
+ if video.get("status") == "stalled":
+ row = conn.execute(
+ "SELECT transcoded_at FROM files WHERE path = ?", (video['path'],)
+ ).fetchone()
+ if row and row[0]:
+ stalled_at = datetime.fromtimestamp(row[0])
+ days_since = (datetime.now() - stalled_at).days
+ if days_since < 7:
+ log.debug(f" Skipping stalled file ({days_since}d ago, retry after 7d): {video['path']}")
+ skipped_count += 1
+ continue
+ else:
+ log.info(f" Retrying stalled file ({days_since}d cooldown passed): {video['path']}")
+ else:
+ log.debug(f" Skipping stalled file (no timestamp): {video['path']}")
+ skipped_count += 1
+ continue
+
+ log.info(f"[{i}/{len(to_transcode)}] Queued: {Path(video['path']).name}")
+ log.info(f" Size: {format_bytes(video['size'])}")
+ log.info(f" Codec: video={video.get('video_codec', 'unknown')}, audio={video.get('audio_codec', 'unknown')}")
+ log.info(f" Path: {video['path']}")
+
+ log_json("transcode_queued",
+ index=i,
+ total=len(to_transcode),
+ file=video['path'],
+ size=video['size'],
+ video_codec=video.get('video_codec'),
+ audio_codec=video.get('audio_codec'))
+
+ # Transcode
+ result = transcode_file(video["path"], conn)
+
+ if result:
+ total_saved += result["space_saved"]
+ transcoded_count += 1
+ log.info(f" Running totals: {transcoded_count} done, {format_bytes(total_saved)} saved")
+ else:
+ failed_count += 1
+ log.error(f" ❌ Transcode #{i} failed: {video['path']}")
+ log.info(f" Running totals: {transcoded_count} done, {failed_count} failed")
+
+ session_end = datetime.now()
+ session_duration = (session_end - session_start).total_seconds()
+
+ log.info("")
+ log.info("=" * 70)
+ log.info(" SESSION COMPLETE")
+ log.info("=" * 70)
+ log.info(f" Started: {session_start.strftime('%Y-%m-%d %H:%M:%S')}")
+ log.info(f" Ended: {session_end.strftime('%Y-%m-%d %H:%M:%S')}")
+ log.info(f" Duration: {format_duration(session_duration)}")
+ log.info(f" Transcoded: {transcoded_count} files")
+ log.info(f" Failed: {failed_count} files")
+ log.info(f" Skipped: {skipped_count} files")
+ log.info(f" Space saved: {format_bytes(total_saved)}")
+
+ # Get updated lifetime stats
+ stats = get_cache_stats(conn)
+ log.info("")
+ log.info(" --- Lifetime Totals ---")
+ log.info(f" Total transcoded: {stats.get('total_transcoded', 0)} files")
+ log.info(f" Total saved: {format_bytes(stats.get('total_space_saved', 0))}")
+ log.info(f" Pending: {stats.get('pending_count', 0)} files ({format_bytes(stats.get('pending_size', 0))})")
+ log.info("=" * 70)
+ log.info("")
+
+ log_json("session_complete",
+ started=session_start.isoformat(),
+ ended=session_end.isoformat(),
+ duration_secs=session_duration,
+ transcoded=transcoded_count,
+ space_saved=total_saved,
+ lifetime_transcoded=stats.get('total_transcoded', 0),
+ lifetime_saved=stats.get('total_space_saved', 0),
+ pending_count=stats.get('pending_count', 0))
+
+ conn.close()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scrape_discogs_labels.py b/scrape_discogs_labels.py
new file mode 100644
index 0000000..7f6c6ff
--- /dev/null
+++ b/scrape_discogs_labels.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+"""Scrape releases from Discogs labels and save to JSON for the music pipeline.
+
+Reads label URLs from Tom's bookmarks (Discogs folder) and scrapes releases.
+"""
+
+import json
+import re
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+import requests
+
+HEADERS = {
+ "User-Agent": "MusicRecommender/1.0 +https://github.com/openclaw"
+}
+
+SCRIPTS_DIR = Path(__file__).parent
+
+
+def get_labels_from_bookmarks() -> list[tuple[int, str]]:
+ """Parse bookmarks and extract Discogs label IDs and names.
+
+ Only looks in Music > Discog Labels folder.
+ """
+ labels = []
+
+ # Run the decrypt script
+ try:
+ result = subprocess.run(
+ ["node", str(SCRIPTS_DIR / "decrypt_bookmarks.js")],
+ capture_output=True,
+ text=True,
+ timeout=30
+ )
+ bookmarks_xml = result.stdout
+ except Exception as e:
+ print(f"Error reading bookmarks: {e}", file=sys.stderr)
+ return labels
+
+ # Find the "Discog Labels" folder section
+ # Look for <folder...><title>Discog Labels</title>...</folder>
+ folder_pattern = r'<folder[^>]*>\s*<title>Discog Labels</title>(.*?)</folder>'
+ folder_match = re.search(folder_pattern, bookmarks_xml, re.DOTALL | re.IGNORECASE)
+
+ if not folder_match:
+ print("Could not find 'Discog Labels' folder in bookmarks", file=sys.stderr)
+ return labels
+
+ folder_content = folder_match.group(1)
+
+ # Find Discogs label URLs within that folder
+ # Pattern: https://www.discogs.com/label/6458-Indochina
+ pattern = r'href="https://www\.discogs\.com/label/(\d+)-([^"?]+)'
+
+ for match in re.finditer(pattern, folder_content):
+ label_id = int(match.group(1))
+ label_name = match.group(2).replace("-", " ")
+ labels.append((label_id, label_name))
+
+ return labels
+
+def get_label_releases(label_id: int, label_name: str, max_pages: int = 5) -> list[dict]:
+ """Fetch releases from a Discogs label."""
+ releases = []
+
+ for page in range(1, max_pages + 1):
+ url = f"https://api.discogs.com/labels/{label_id}/releases?page={page}&per_page=100"
+ print(f" Fetching {label_name} page {page}...", file=sys.stderr)
+
+ try:
+ resp = requests.get(url, headers=HEADERS, timeout=30)
+ resp.raise_for_status()
+ data = resp.json()
+ except Exception as e:
+ print(f" Error: {e}", file=sys.stderr)
+ break
+
+ for r in data.get("releases", []):
+ # Skip compilations, singles, etc - focus on albums
+ if r.get("format") and "Album" not in str(r.get("format", "")):
+ # Still include if no format specified
+ pass
+
+ artist = r.get("artist", "Various")
+ title = r.get("title", "")
+ year = r.get("year", "")
+
+ # Clean up artist name
+ artist = re.sub(r'\s*\(\d+\)$', '', artist) # Remove disambiguation numbers
+
+ if artist and title and artist.lower() != "various":
+ releases.append({
+ "artist": artist,
+ "album": title,
+ "year": year,
+ "label": label_name,
+ "discogs_id": r.get("id"),
+ })
+
+ # Check if more pages
+ if page >= data.get("pagination", {}).get("pages", 1):
+ break
+
+ time.sleep(1) # Rate limit
+
+ return releases
+
+def get_labels_from_config() -> list[tuple[int, str]]:
+ """Get Discogs labels from music_config.json."""
+ labels = []
+ config_path = SCRIPTS_DIR / "music_config.json"
+
+ if not config_path.exists():
+ return labels
+
+ try:
+ with open(config_path) as f:
+ config = json.load(f)
+
+ for entry in config.get("discogs_labels", []):
+ url = entry.get("url", "")
+ name = entry.get("name", "")
+ # Extract ID from URL like https://www.discogs.com/label/6170-Tempa
+ match = re.search(r'/label/(\d+)', url)
+ if match and name:
+ labels.append((int(match.group(1)), name))
+ except Exception as e:
+ print(f"Error reading config: {e}", file=sys.stderr)
+
+ return labels
+
+
+def main():
+ # Get labels from bookmarks and config
+ labels = get_labels_from_bookmarks()
+ config_labels = get_labels_from_config()
+
+ # Merge, avoiding duplicates by ID
+ seen_ids = {lid for lid, _ in labels}
+ for lid, lname in config_labels:
+ if lid not in seen_ids:
+ labels.append((lid, lname))
+ seen_ids.add(lid)
+
+ if not labels:
+ print("No Discogs labels found in bookmarks or config!", file=sys.stderr)
+ sys.exit(1)
+
+ print(f"Found {len(labels)} labels in bookmarks:", file=sys.stderr)
+ for lid, lname in labels:
+ print(f" - {lname} ({lid})", file=sys.stderr)
+
+ all_releases = []
+
+ for label_id, label_name in labels:
+ print(f"Scraping {label_name}...", file=sys.stderr)
+ releases = get_label_releases(label_id, label_name)
+ all_releases.extend(releases)
+ print(f" Found {len(releases)} releases", file=sys.stderr)
+ time.sleep(2) # Be nice to Discogs
+
+ # Dedupe by artist+album
+ seen = set()
+ unique = []
+ for r in all_releases:
+ key = f"{r['artist'].lower()}|{r['album'].lower()}"
+ if key not in seen:
+ seen.add(key)
+ unique.append(r)
+
+ output = {
+ "labels": [{"id": lid, "name": lname} for lid, lname in labels],
+ "releases": unique,
+ "scraped_at": time.strftime("%Y-%m-%d %H:%M:%S"),
+ }
+
+ print(json.dumps(output, indent=2))
+
+if __name__ == "__main__":
+ main()
diff --git a/transcode_album.sh b/transcode_album.sh
new file mode 100755
index 0000000..d1ad1aa
--- /dev/null
+++ b/transcode_album.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+# Transcode FLAC album to Opus, preserving metadata
+# Usage: ./transcode_album.sh <source_dir> [--delete-source]
+#
+# Converts all FLAC files to Opus 128kbps (transparent quality, ~10x smaller)
+# Preserves all metadata tags and album art
+
+set -e
+
+SOURCE_DIR="$1"
+DELETE_SOURCE=false
+BITRATE="128k"
+GROUP="mediaserver"
+
+if [[ "$2" == "--delete-source" ]]; then
+ DELETE_SOURCE=true
+fi
+
+if [[ -z "$SOURCE_DIR" ]] || [[ ! -d "$SOURCE_DIR" ]]; then
+ echo "Usage: $0 <source_directory> [--delete-source]"
+ exit 1
+fi
+
+# Count FLAC files
+flac_count=$(find "$SOURCE_DIR" -maxdepth 1 -iname "*.flac" | wc -l)
+if [[ $flac_count -eq 0 ]]; then
+ echo "No FLAC files found in $SOURCE_DIR"
+ exit 0
+fi
+
+echo "Transcoding $flac_count FLAC files to Opus ($BITRATE)..."
+echo "Source: $SOURCE_DIR"
+
+converted=0
+failed=0
+
+# Process each FLAC file
+find "$SOURCE_DIR" -maxdepth 1 -iname "*.flac" | while read -r flac_file; do
+ filename=$(basename "$flac_file")
+ opus_file="${flac_file%.flac}.opus"
+ opus_file="${opus_file%.FLAC}.opus"
+
+ echo " Converting: $filename"
+
+ if ffmpeg -hide_banner -loglevel warning -i "$flac_file" \
+ -c:a libopus -b:a "$BITRATE" -vbr on \
+ -map_metadata 0 \
+ -y "$opus_file" 2>&1; then
+
+ # Preserve original timestamp
+ touch -r "$flac_file" "$opus_file"
+
+ # Fix permissions
+ chgrp "$GROUP" "$opus_file" 2>/dev/null || true
+ chmod 664 "$opus_file" 2>/dev/null || true
+
+ if $DELETE_SOURCE; then
+ rm "$flac_file"
+ echo " ✓ Converted and removed FLAC"
+ else
+ echo " ✓ Converted (FLAC kept)"
+ fi
+ ((converted++)) || true
+ else
+ echo " ✗ Failed to convert"
+ ((failed++)) || true
+ fi
+done
+
+# Also convert any MP3s that are suspiciously large (>15MB per file avg)
+# Actually, skip this - MP3s are already compressed
+
+# Handle cover art - keep it
+# ffmpeg should copy embedded art automatically for opus
+
+echo ""
+echo "Done! Converted $converted files."
+if $DELETE_SOURCE; then
+ # Calculate space saved
+ echo "FLAC files deleted to save space."
+fi