#!/usr/bin/env python3 """ Susan Morning Report Generates a system health report and emails it to Tom. Run via cron at 06:45 (after transcoder finishes). """ import subprocess import json import os import re import smtplib from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart from datetime import datetime, timedelta from pathlib import Path # Config EMAIL_CONFIG = "/etc/susan/email.json" TRANSCODER_LOG = "/var/log/transcoder.log" TRANSCODER_DB = "/var/lib/transcoder/cache.db" TO_EMAIL = "tom@tomflux.xyz" def run_cmd(cmd: str, timeout: int = 30) -> str: """Run a shell command and return output.""" try: result = subprocess.run( cmd, shell=True, capture_output=True, text=True, timeout=timeout ) return result.stdout.strip() except Exception as e: return f"Error: {e}" def get_uptime() -> dict: """Get system uptime info.""" uptime_raw = run_cmd("uptime -p") load = run_cmd("cat /proc/loadavg").split()[:3] boot_time = run_cmd("uptime -s") return { "uptime": uptime_raw.replace("up ", ""), "since": boot_time, "load_avg": f"{load[0]} / {load[1]} / {load[2]}" if len(load) >= 3 else "unknown" } def get_disk_space() -> list: """Get disk usage for important mounts.""" disks = [] # Only show root and /disks* mounts, exclude virtual filesystems df_output = run_cmd("df -h --output=target,size,used,avail,pcent -x tmpfs -x devtmpfs -x squashfs | tail -n +2") for line in df_output.split('\n'): if line.strip(): parts = line.split() if len(parts) >= 5: mount = parts[0] # Only include / and /disks*, /home, /var if mount in ['/', '/home', '/var'] or mount.startswith('/disks'): disks.append({ "mount": mount, "size": parts[1], "used": parts[2], "avail": parts[3], "percent": parts[4] }) return disks def get_raid_status() -> dict: """Get 3ware RAID status using tw_cli.""" status = {"available": False, "details": None, "drives": [], "array": None} # Try tw_cli first tw_output = run_cmd("sudo tw_cli /c0 show 2>/dev/null") if "Error" not in tw_output and tw_output and "Unit" in tw_output: status["available"] = True status["details"] = tw_output # Parse array status for line in tw_output.split('\n'): if line.startswith('u0'): parts = line.split() if len(parts) >= 3: status["array"] = { "unit": parts[0], "type": parts[1], "status": parts[2], "verify": parts[4] if len(parts) > 4 and '%' in parts[4] else None } # Parse drive lines (p4, p5, p6, etc) if line.startswith('p'): parts = line.split() if len(parts) >= 3: status["drives"].append({ "port": parts[0], "status": parts[1], "size": parts[3] if len(parts) > 3 else "unknown" }) else: # Fallback to checking for mdadm md_output = run_cmd("cat /proc/mdstat 2>/dev/null") if md_output and "Error" not in md_output and "md" in md_output: status["available"] = True status["details"] = md_output status["type"] = "mdadm" return status def get_smart_status() -> list: """Get SMART status for drives. Note: requires sudo for smartctl.""" drives = [] # Find physical block devices (skip loop, ram, etc) lsblk = run_cmd("lsblk -d -o NAME,SIZE,TYPE | grep disk | grep -v loop") for line in lsblk.split('\n'): if line.strip(): parts = line.split() if parts and not parts[0].startswith('loop'): dev = f"/dev/{parts[0]}" size = parts[1] if len(parts) > 1 else "unknown" # Try smartctl with sudo smart = run_cmd(f"sudo smartctl -H {dev} 2>/dev/null | grep -iE 'overall-health|result|PASSED|FAILED'") reallocated = run_cmd(f"sudo smartctl -A {dev} 2>/dev/null | grep -i 'Reallocated_Sector'") # Determine health status if "PASSED" in smart: health = "PASSED" elif "FAILED" in smart: health = "FAILED" elif "sudo:" in smart or not smart: health = "needs sudo" elif "Unable to detect" in run_cmd(f"sudo smartctl -i {dev} 2>&1"): health = "RAID array (skip)" else: health = "unknown" # Skip RAID virtual devices (they show as large and can't be queried) if health == "RAID array (skip)": continue # Also skip if smartctl says it's not a physical device if "unknown" in health and "T" in size: # Multi-TB device with unknown = likely RAID continue drive_info = { "device": dev, "size": size, "health": health, } if reallocated: # Extract reallocated sector count (last number on line) match = re.search(r'(\d+)\s*$', reallocated.strip()) if match: drive_info["reallocated_sectors"] = int(match.group(1)) drives.append(drive_info) return drives def get_memory() -> dict: """Get memory usage.""" mem_output = run_cmd("free -h | grep Mem") parts = mem_output.split() if len(parts) >= 4: return { "total": parts[1], "used": parts[2], "available": parts[6] if len(parts) > 6 else parts[3] } return {"total": "unknown", "used": "unknown", "available": "unknown"} def get_cpu_info() -> dict: """Get CPU info and temperature.""" info = {} # CPU model model = run_cmd("grep 'model name' /proc/cpuinfo | head -1 | cut -d: -f2") info["model"] = model.strip() if model else "unknown" # Temperatures temps = run_cmd("sensors 2>/dev/null | grep -E 'Core|temp' | head -5") info["temps"] = temps if temps else "sensors not available" return info def get_transcoder_status() -> dict: """Get last night's transcoder results + queue stats from DB.""" import sqlite3 status = {"ran": False, "summary": None, "queue": None} # Get queue stats from database db_path = Path(TRANSCODER_DB) if db_path.exists(): try: conn = sqlite3.connect(TRANSCODER_DB) # Pending files cursor = conn.execute(""" SELECT COUNT(*), SUM(original_size) FROM files WHERE is_hevc = 0 AND status = 'pending' """) row = cursor.fetchone() pending_count = row[0] or 0 pending_size = row[1] or 0 # Already HEVC cursor = conn.execute("SELECT COUNT(*) FROM files WHERE is_hevc = 1") hevc_count = cursor.fetchone()[0] or 0 # Lifetime stats cursor = conn.execute("SELECT total_files_transcoded, total_space_saved FROM stats WHERE id = 1") row = cursor.fetchone() lifetime_transcoded = row[0] if row else 0 lifetime_saved = row[1] if row else 0 # Failed count cursor = conn.execute("SELECT COUNT(*) FROM files WHERE status = 'failed'") failed_count = cursor.fetchone()[0] or 0 conn.close() status["queue"] = { "pending_count": pending_count, "pending_size": pending_size, "pending_size_human": f"{pending_size / (1024**3):.1f} GB" if pending_size else "0 GB", "hevc_count": hevc_count, "failed_count": failed_count, "lifetime_transcoded": lifetime_transcoded, "lifetime_saved": lifetime_saved, "lifetime_saved_human": f"{lifetime_saved / (1024**3):.1f} GB" if lifetime_saved else "0 GB" } except Exception as e: status["queue"] = {"error": str(e)} # Check log for last night's run log_path = Path(TRANSCODER_LOG) if log_path.exists(): try: log_content = log_path.read_text() except PermissionError: log_content = run_cmd(f"sudo cat {TRANSCODER_LOG} 2>/dev/null") if "Error" in log_content: log_content = "" if log_content: # Find the last SESSION COMPLETE block sessions = re.findall( r'SESSION COMPLETE.*?Transcoded:\s+(\d+).*?Failed:\s+(\d+).*?Space saved:\s+([\d.]+\s+\w+)', log_content, re.DOTALL ) if sessions: last = sessions[-1] status["ran"] = True status["transcoded"] = int(last[0]) status["failed"] = int(last[1]) status["space_saved"] = last[2] status["summary"] = f"{last[0]} transcoded, {last[1]} failed, {last[2]} saved" else: today = datetime.now().strftime("%Y-%m-%d") yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") if today in log_content or yesterday in log_content: status["ran"] = True status["summary"] = "Ran (check log for details)" return status def get_failed_services() -> list: """Get any failed systemd services.""" output = run_cmd("systemctl --failed --no-pager --plain | grep -E '\.service|\.socket|\.mount' | awk '{print $1}'") failed = [] for line in output.split('\n'): name = line.strip() if name and not name.startswith('UNIT') and '●' not in name: failed.append(name) return failed def generate_report() -> str: """Generate the full morning report.""" now = datetime.now() report = [] report.append("=" * 50) report.append(f"πŸ–₯️ SUSAN MORNING REPORT") report.append(f"πŸ“… {now.strftime('%A, %B %d %Y at %H:%M')}") report.append("=" * 50) report.append("") # Uptime uptime = get_uptime() report.append("⏱️ UPTIME") report.append(f" Up: {uptime['uptime']}") report.append(f" Since: {uptime['since']}") report.append(f" Load: {uptime['load_avg']}") report.append("") # Memory mem = get_memory() report.append("🧠 MEMORY") report.append(f" Used: {mem['used']} / {mem['total']}") report.append(f" Available: {mem['available']}") report.append("") # Disk Space disks = get_disk_space() report.append("πŸ’Ύ DISK SPACE") for disk in disks: warn = " ⚠️" if disk['percent'].replace('%', '').isdigit() and int(disk['percent'].replace('%', '')) > 85 else "" report.append(f" {disk['mount']}: {disk['used']}/{disk['size']} ({disk['percent']} used){warn}") report.append("") # RAID Status raid = get_raid_status() report.append("πŸ”’ RAID STATUS") if raid["available"]: if raid.get("array"): arr = raid["array"] status_icon = "βœ…" if arr["status"] == "OK" else ("⚠️" if "VERIFY" in arr["status"] else "❌") report.append(f" Array: {arr['type']} {status_icon} {arr['status']}") if arr.get("verify"): report.append(f" Verify progress: {arr['verify']}") if raid.get("drives"): report.append(f" Drives: {len(raid['drives'])} disks") for drive in raid["drives"]: d_icon = "βœ…" if drive["status"] == "OK" else "❌" report.append(f" {drive['port']}: {d_icon} {drive['status']} ({drive['size']} TB)") else: report.append(" tw_cli not available - install 3ware tools for RAID monitoring") report.append("") # Drive Health (SMART) drives = get_smart_status() if drives: report.append("πŸ”§ DRIVE HEALTH (SMART)") for drive in drives: warn = "" if drive.get("reallocated_sectors", 0) > 0: warn = f" ⚠️ {drive['reallocated_sectors']} reallocated sectors!" health_icon = "βœ…" if drive["health"] == "PASSED" else "❌" report.append(f" {drive['device']} ({drive['size']}): {health_icon} {drive['health']}{warn}") report.append("") # Transcoder transcoder = get_transcoder_status() report.append("🎬 TRANSCODER") # Last night's run if transcoder.get("ran"): report.append(f" Last run: βœ… {transcoder.get('summary', 'Completed')}") elif transcoder.get("summary"): report.append(f" Last run: ⏸️ {transcoder['summary']}") else: report.append(f" Last run: No data") # Queue stats from database if transcoder.get("queue") and not transcoder["queue"].get("error"): q = transcoder["queue"] report.append(f" Queue: {q['pending_count']} files ({q['pending_size_human']}) waiting") if q['failed_count'] > 0: report.append(f" ⚠️ Failed: {q['failed_count']} files need attention") report.append(f" Library: {q['hevc_count']} files already HEVC") if q['lifetime_transcoded'] > 0: report.append(f" Lifetime: {q['lifetime_transcoded']} transcoded, {q['lifetime_saved_human']} saved") elif transcoder.get("queue", {}).get("error"): report.append(f" DB error: {transcoder['queue']['error']}") report.append("") # Failed Services failed = get_failed_services() report.append("🚨 SYSTEMD SERVICES") if failed: report.append(f" ❌ {len(failed)} failed: {', '.join(failed)}") else: report.append(" βœ… All services OK") report.append("") report.append("=" * 50) report.append("End of report") report.append("=" * 50) return "\n".join(report) def send_email(subject: str, body: str): """Send the report via email.""" with open(EMAIL_CONFIG) as f: cfg = json.load(f) smtp = cfg['smtp'] msg = MIMEMultipart() msg['Subject'] = subject msg['From'] = smtp['from'] msg['To'] = TO_EMAIL # Plain text version msg.attach(MIMEText(body, 'plain')) server = smtplib.SMTP_SSL(smtp['server'], smtp['port']) server.login(smtp['username'], smtp['password']) server.sendmail(smtp['from'], [TO_EMAIL], msg.as_string()) server.quit() def main(): report = generate_report() # Print to stdout (for logging) print(report) # Email it today = datetime.now().strftime("%Y-%m-%d") subject = f"πŸ–₯️ Susan Morning Report - {today}" try: send_email(subject, report) print("\nβœ… Report emailed successfully") except Exception as e: print(f"\n❌ Failed to send email: {e}") if __name__ == "__main__": main()