| """ |
| Dashboard - ASCII-formatted system status display for the Codette training lab. |
| |
| Shows: |
| - Latest training run stats |
| - Best adapter scores |
| - Dataset sizes and quality |
| - Failure rates |
| - Improvement trends |
| |
| No web framework required; pure terminal output. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import os |
| import sys |
| from datetime import datetime |
| from pathlib import Path |
| from typing import Any, Dict, List, Optional |
|
|
| _THIS_DIR = Path(__file__).resolve().parent |
| _PROJECT_ROOT = _THIS_DIR.parent |
| if str(_PROJECT_ROOT) not in sys.path: |
| sys.path.insert(0, str(_PROJECT_ROOT)) |
|
|
| from observatory.metrics_logger import MetricsLogger |
| from observatory.performance_tracker import PerformanceTracker |
| from observatory.dataset_quality_monitor import DatasetQualityMonitor |
|
|
|
|
| class Dashboard: |
| """ASCII dashboard for the Codette training lab.""" |
|
|
| WIDTH = 76 |
|
|
| def __init__( |
| self, |
| metrics_log: Optional[str] = None, |
| quality_log: Optional[str] = None, |
| eval_results: Optional[str] = None, |
| ): |
| self.logger = MetricsLogger(log_file=metrics_log) |
| self.tracker = PerformanceTracker(logger=self.logger) |
| self.quality_monitor = DatasetQualityMonitor(quality_file=quality_log) |
| self.eval_results_path = eval_results |
|
|
| |
|
|
| def _header(self) -> List[str]: |
| lines = [] |
| lines.append("") |
| lines.append("+" + "=" * (self.WIDTH - 2) + "+") |
| lines.append("|" + " CODETTE TRAINING LAB OBSERVATORY ".center(self.WIDTH - 2) + "|") |
| lines.append("|" + f" {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')} ".center(self.WIDTH - 2) + "|") |
| lines.append("+" + "=" * (self.WIDTH - 2) + "+") |
| return lines |
|
|
| def _section(self, title: str) -> List[str]: |
| lines = [] |
| lines.append("") |
| lines.append("+" + "-" * (self.WIDTH - 2) + "+") |
| lines.append("|" + f" {title} ".ljust(self.WIDTH - 2) + "|") |
| lines.append("+" + "-" * (self.WIDTH - 2) + "+") |
| return lines |
|
|
| def _row(self, label: str, value: str) -> str: |
| """Single label: value row.""" |
| content = f" {label:<30s} {value}" |
| return "|" + content.ljust(self.WIDTH - 2) + "|" |
|
|
| def _bar_row(self, label: str, value: float, max_width: int = 30) -> str: |
| """Row with ASCII progress bar.""" |
| filled = int(value * max_width) |
| bar = "[" + "#" * filled + "." * (max_width - filled) + "]" |
| content = f" {label:<22s} {value:>6.3f} {bar}" |
| return "|" + content.ljust(self.WIDTH - 2) + "|" |
|
|
| def _empty_row(self) -> str: |
| return "|" + " " * (self.WIDTH - 2) + "|" |
|
|
| def _footer(self) -> List[str]: |
| return ["+" + "=" * (self.WIDTH - 2) + "+", ""] |
|
|
| |
|
|
| def _latest_training_section(self) -> List[str]: |
| lines = self._section("LATEST TRAINING RUN") |
|
|
| latest = self.logger.get_latest() |
| if not latest: |
| lines.append(self._row("Status", "No training runs logged yet")) |
| return lines |
|
|
| lines.append(self._row("Adapter", latest.get("adapter", "N/A"))) |
| lines.append(self._row("Timestamp", latest.get("timestamp", "N/A"))) |
| lines.append(self._row("Dataset Version", latest.get("dataset_version", "N/A"))) |
| lines.append(self._row("Dataset Size", str(latest.get("dataset_size", 0)))) |
| lines.append(self._row("Epoch", str(latest.get("epoch", 0)))) |
| lines.append(self._bar_row("Reasoning Score", latest.get("reasoning_score", 0))) |
| lines.append(self._row("Loss", f"{latest.get('loss', 0):.6f}")) |
|
|
| params = latest.get("training_params", {}) |
| if params: |
| lines.append(self._empty_row()) |
| lines.append(self._row("Training Parameters", "")) |
| for k, v in list(params.items())[:6]: |
| lines.append(self._row(f" {k}", str(v))) |
|
|
| return lines |
|
|
| def _best_adapters_section(self) -> List[str]: |
| lines = self._section("TOP ADAPTERS") |
|
|
| best = self.tracker.best_adapters(top_n=5) |
| if not best: |
| lines.append(self._row("Status", "No adapter data available")) |
| return lines |
|
|
| |
| hdr = f" {'#':<3} {'Adapter':<26} {'Score':>7} {'Loss':>7} {'Epoch':>5}" |
| lines.append("|" + hdr.ljust(self.WIDTH - 2) + "|") |
| sep = f" {'--':<3} {'------':<26} {'-----':>7} {'----':>7} {'-----':>5}" |
| lines.append("|" + sep.ljust(self.WIDTH - 2) + "|") |
|
|
| for i, entry in enumerate(best, 1): |
| name = entry.get("adapter", "?")[:25] |
| score = entry.get("reasoning_score", 0) |
| loss = entry.get("loss", 0) |
| epoch = entry.get("epoch", 0) |
| row = f" {i:<3} {name:<26} {score:>7.4f} {loss:>7.4f} {epoch:>5}" |
| lines.append("|" + row.ljust(self.WIDTH - 2) + "|") |
|
|
| return lines |
|
|
| def _dataset_quality_section(self) -> List[str]: |
| lines = self._section("DATASET QUALITY") |
|
|
| latest = self.quality_monitor.get_latest() |
| if not latest: |
| lines.append(self._row("Status", "No quality data recorded")) |
| return lines |
|
|
| lines.append(self._row("Dataset Version", latest.get("dataset_version", "N/A"))) |
| lines.append(self._row("Total Examples", str(latest.get("total_examples", 0)))) |
| lines.append(self._row("Valid Examples", str(latest.get("valid_examples", 0)))) |
| lines.append(self._bar_row("Validity Rate", latest.get("validity_rate", 0))) |
| lines.append(self._row("Avg Response Length", f"{latest.get('avg_response_length', 0):.1f} words")) |
| lines.append(self._row("Duplicate Rate", f"{latest.get('duplicate_rate', 0):.2%}")) |
| lines.append(self._row("Near-Duplicate Rate", f"{latest.get('near_duplicate_rate', 0):.2%}")) |
| lines.append(self._bar_row("Topic Diversity", min(latest.get("topic_diversity", 0) * 10, 1.0))) |
| lines.append(self._row("Topic Concentration", f"{latest.get('topic_concentration', 0):.2%}")) |
|
|
| |
| regressions = self.quality_monitor.check_latest_regressions() |
| if regressions: |
| lines.append(self._empty_row()) |
| for r in regressions: |
| sev = r["severity"].upper() |
| msg = f" [{sev}] {r['metric']}: {r['percent_change']:+.1f}%" |
| lines.append("|" + msg.ljust(self.WIDTH - 2) + "|") |
|
|
| return lines |
|
|
| def _improvement_trends_section(self) -> List[str]: |
| lines = self._section("IMPROVEMENT TRENDS") |
|
|
| trends = self.tracker.improvement_trends() |
| if not trends: |
| lines.append(self._row("Status", "Insufficient data for trends")) |
| return lines |
|
|
| for t in trends[:5]: |
| name = t["adapter"][:22] |
| delta = t["delta"] |
| pct = t["percent_change"] |
| runs = t["num_runs"] |
| sign = "+" if delta >= 0 else "" |
| indicator = "^" if delta > 0.01 else ("v" if delta < -0.01 else "=") |
|
|
| row = (f" {indicator} {name:<22} " |
| f"delta: {sign}{delta:.4f} " |
| f"({sign}{pct:.1f}%) " |
| f"[{runs} runs]") |
| lines.append("|" + row.ljust(self.WIDTH - 2) + "|") |
|
|
| return lines |
|
|
| def _failure_rates_section(self) -> List[str]: |
| lines = self._section("EVALUATION FAILURE RATES") |
|
|
| if not self.eval_results_path or not os.path.exists(self.eval_results_path): |
| lines.append(self._row("Status", "No evaluation results file specified")) |
| return lines |
|
|
| try: |
| with open(self.eval_results_path, "r", encoding="utf-8") as f: |
| results = json.load(f) |
| except (json.JSONDecodeError, OSError): |
| lines.append(self._row("Status", "Could not load evaluation results")) |
| return lines |
|
|
| |
| overall = results.get("overall", {}) |
| if overall: |
| overall_score = overall.get("overall", 0) |
| lines.append(self._bar_row("Overall Score", overall_score)) |
| lines.append(self._empty_row()) |
|
|
| |
| categories = results.get("categories", {}) |
| if categories: |
| hdr = f" {'Category':<20} {'Score':>7} {'Prompts':>8}" |
| lines.append("|" + hdr.ljust(self.WIDTH - 2) + "|") |
| sep = f" {'--------':<20} {'-----':>7} {'-------':>8}" |
| lines.append("|" + sep.ljust(self.WIDTH - 2) + "|") |
|
|
| for cat, data in sorted(categories.items()): |
| avg = data.get("average_scores", {}).get("overall", 0) |
| n = data.get("prompts_scored", 0) |
| status = "*" if avg < 0.4 else ("~" if avg < 0.55 else " ") |
| row = f" {status}{cat:<19} {avg:>7.4f} {n:>8}" |
| lines.append("|" + row.ljust(self.WIDTH - 2) + "|") |
|
|
| lines.append(self._empty_row()) |
| lines.append("|" + " * = failing, ~ = weak".ljust(self.WIDTH - 2) + "|") |
|
|
| return lines |
|
|
| def _sparkline_section(self) -> List[str]: |
| lines = self._section("SCORE HISTORY") |
|
|
| adapters = self.logger.get_unique_adapters() |
| if not adapters: |
| lines.append(self._row("Status", "No history data")) |
| return lines |
|
|
| for adapter in adapters[:6]: |
| progression = self.tracker.score_progression(adapter) |
| if not progression: |
| continue |
| scores = [p["reasoning_score"] for p in progression] |
| spark = PerformanceTracker._sparkline(scores, width=30) |
| name = adapter[:20] |
| row = f" {name:<21} {spark} [{scores[0]:.3f}->{scores[-1]:.3f}]" |
| lines.append("|" + row.ljust(self.WIDTH - 2) + "|") |
|
|
| return lines |
|
|
| |
|
|
| def render(self) -> str: |
| """Render the complete dashboard.""" |
| all_lines: List[str] = [] |
| all_lines.extend(self._header()) |
| all_lines.extend(self._latest_training_section()) |
| all_lines.extend(self._best_adapters_section()) |
| all_lines.extend(self._dataset_quality_section()) |
| all_lines.extend(self._improvement_trends_section()) |
| all_lines.extend(self._failure_rates_section()) |
| all_lines.extend(self._sparkline_section()) |
| all_lines.extend(self._footer()) |
| return "\n".join(all_lines) |
|
|
|
|
| |
| |
| |
|
|
| def main() -> None: |
| parser = argparse.ArgumentParser( |
| description="Codette Observatory Dashboard - ASCII system status display" |
| ) |
| parser.add_argument( |
| "--metrics-log", "-m", |
| default=None, |
| help="Path to observatory_metrics.json", |
| ) |
| parser.add_argument( |
| "--quality-log", "-q", |
| default=None, |
| help="Path to dataset_quality_log.json", |
| ) |
| parser.add_argument( |
| "--eval-results", "-e", |
| default=None, |
| help="Path to benchmark evaluation results JSON", |
| ) |
| parser.add_argument( |
| "--section", "-s", |
| choices=["training", "adapters", "quality", "trends", "failures", "history", "all"], |
| default="all", |
| help="Show only a specific section (default: all)", |
| ) |
|
|
| args = parser.parse_args() |
|
|
| dashboard = Dashboard( |
| metrics_log=args.metrics_log, |
| quality_log=args.quality_log, |
| eval_results=args.eval_results, |
| ) |
|
|
| if args.section == "all": |
| print(dashboard.render()) |
| else: |
| section_map = { |
| "training": dashboard._latest_training_section, |
| "adapters": dashboard._best_adapters_section, |
| "quality": dashboard._dataset_quality_section, |
| "trends": dashboard._improvement_trends_section, |
| "failures": dashboard._failure_rates_section, |
| "history": dashboard._sparkline_section, |
| } |
| func = section_map.get(args.section) |
| if func: |
| lines = dashboard._header() |
| lines.extend(func()) |
| lines.extend(dashboard._footer()) |
| print("\n".join(lines)) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|