Spaces:

KarlQuant
/

Quasar-Executo

Running

App Files Files Community

KarlQuant commited on 24 days ago

Commit

6578cff

verified ·

1 Parent(s): a41e1f5

Upload 2 files

Browse files

Files changed (2) hide show

http_diagnostic.py +250 -0
test_websocket.py +165 -0

http_diagnostic.py ADDED Viewed

	@@ -0,0 +1,250 @@

+#!/usr/bin/env python3
+"""
+QUASAR Hub HTTP Diagnostic — Check metrics without WebSocket
+This is the FASTEST way to diagnose what's happening.
+No wscat, no WebSocket needed — just HTTP polling.
+Usage:
+    python3 http_diagnostic.py [--watch]
+"""
+import json
+import sys
+import time
+import requests
+from datetime import datetime
+def get_hub_state(hub_url: str = "http://127.0.0.1:7860"):
+    """Fetch current hub state."""
+    try:
+        resp = requests.get(f"{hub_url}/api/state", timeout=2)
+        resp.raise_for_status()
+        return resp.json()
+    except requests.exceptions.ConnectionError:
+        return None
+    except Exception as e:
+        print(f"[!] Error: {e}")
+        return None
+def get_hub_health(hub_url: str = "http://127.0.0.1:7860"):
+    """Fetch hub health."""
+    try:
+        resp = requests.get(f"{hub_url}/api/health", timeout=2)
+        resp.raise_for_status()
+        return resp.json()
+    except Exception:
+        return None
+def format_value(key: str, value):
+    """Format metric value with color coding."""
+    if "loss" in key.lower():
+        # Green if decreasing (< 0.5), red if high (> 0.7), yellow otherwise
+        if isinstance(value, (int, float)):
+            if value < 0.3:
+                return f"🟢 {value:.4f}"
+            elif value > 0.7:
+                return f"🔴 {value:.4f}"
+            else:
+                return f"🟡 {value:.4f}"
+    elif "accuracy" in key.lower():
+        if isinstance(value, (int, float)):
+            if value > 0.7:
+                return f"🟢 {value:.4f}"
+            elif value < 0.3:
+                return f"🔴 {value:.4f}"
+            else:
+                return f"🟡 {value:.4f}"
+    elif isinstance(value, float):
+        return f"{value:.6f}"
+    return str(value)
+def print_diagnostic(hub_url: str = "http://127.0.0.1:7860"):
+    """Print a complete diagnostic report."""
+    print("\n" + "=" * 70)
+    print(f"QUASAR Hub Diagnostic — {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print("=" * 70)
+    # Check health
+    print("\n[1] HUB HEALTH")
+    print("-" * 70)
+    health = get_hub_health(hub_url)
+    if not health:
+        print(f"✗ Hub not responding at {hub_url}")
+        print("  Try: curl http://127.0.0.1:7860/api/health")
+        return False
+    print(f"✓ Hub is online")
+    print(f"  Status:           {health.get('status', '?')}")
+    print(f"  Service:          {health.get('service', '?')}")
+    print(f"  Spaces connected: {health.get('spaces_connected', 0)}")
+    print(f"  Messages RX:      {health.get('messages_rx', 0)}")
+    print(f"  Uptime:           {health.get('uptime_seconds', 0):.0f}s")
+    # Get state
+    print("\n[2] ASSET RANKINGS")
+    print("-" * 70)
+    state = get_hub_state(hub_url)
+    if not state:
+        print("✗ Could not fetch state")
+        return False
+    rankings = state.get("rankings", [])
+    if not rankings:
+        print("⚠ NO ASSETS CONNECTED YET")
+        print("  → Asset spaces need to connect and send metrics")
+        print("  → Check if asset spaces are running")
+        return False
+    print(f"✓ {len(rankings)} assets connected\n")
+    # Show each asset
+    for i, asset in enumerate(rankings[:8], 1):
+        name = asset.get("space_name", "?")
+        print(f"{i}. {name}")
+        training = asset.get("training", {})
+        voting = asset.get("voting", {})
+        # Training metrics
+        actor_loss = training.get("actor_loss", 0)
+        avn_acc = training.get("avn_accuracy", 0)
+        training_steps = training.get("training_steps", 0)
+        print(f"   Training:  {training_steps:5d} steps")
+        print(f"   Actor Loss: {format_value('actor_loss', actor_loss)} (expect < 0.3)")
+        print(f"   AVN Acc:    {format_value('avn_accuracy', avn_acc)} (expect > 0.7)")
+        # Voting metrics
+        buy = voting.get("buy_count", 0)
+        sell = voting.get("sell_count", 0)
+        signal = voting.get("dominant_signal", "?")
+        print(f"   Signal:    {signal} ({buy}B / {sell}S)")
+        # Check if metrics are being updated
+        last_updated = asset.get("last_updated", 0)
+        if last_updated:
+            age = time.time() - last_updated
+            if age < 10:
+                print(f"   ✓ Updated {age:.0f}s ago")
+            else:
+                print(f"   ⚠ Stale ({age:.0f}s old) — space may have disconnected")
+        print()
+    # Metric history
+    print("[3] METRIC HISTORY")
+    print("-" * 70)
+    history = state.get("metric_history", {})
+    if history:
+        print(f"✓ Collecting metric history for {len(history)} assets")
+        for name, points in list(history.items())[:3]:
+            if points:
+                latest = points[-1]
+                print(f"  {name}: {len(points)} data points (latest: {latest})")
+    else:
+        print("⚠ No metric history yet")
+        print("  → Metrics will be recorded once spaces send non-zero values")
+    print("\n" + "=" * 70)
+    print("DIAGNOSIS")
+    print("=" * 70)
+    if not rankings:
+        print("""
+❌ PROBLEM: No assets connected to hub
+   → Asset spaces need to connect to ws://hub:7860/ws/publish/{name}
+   → Check if asset spaces are running and have correct hub URL
+   → Check firewall/network rules allow port 7860
+""")
+        return False
+    # Check for zero metrics
+    has_nonzero_metrics = False
+    for asset in rankings:
+        training = asset.get("training", {})
+        if training.get("actor_loss", 0) != 0 or training.get("avn_accuracy", 0) != 0:
+            has_nonzero_metrics = True
+            break
+    if not has_nonzero_metrics:
+        print("""
+⚠ WARNING: Assets connected but no training metrics being sent
+   → Asset spaces are sending voting data but NOT training metrics
+   → They need to include "training" field with loss/accuracy
+   → Expected format:
+     {
+       "training": {
+         "actor_loss": 0.234,
+         "critic_loss": 0.567,
+         "avn_loss": 0.123,
+         "avn_accuracy": 0.87
+       }
+     }
+""")
+        return False
+    print("""
+✅ EVERYTHING LOOKS GOOD!
+   → Assets are connected
+   → Training metrics are flowing
+   → Dashboard should display metrics in real-time
+""")
+    return True
+def watch_mode(hub_url: str = "http://127.0.0.1:7860", interval: int = 5):
+    """Continuously monitor hub state."""
+    print(f"[*] Watching hub at {hub_url} (update every {interval}s)")
+    print("    Press Ctrl+C to stop\n")
+    iteration = 0
+    try:
+        while True:
+            iteration += 1
+            print(f"\n{'='*70}")
+            print(f"Update #{iteration} — {datetime.now().strftime('%H:%M:%S')}")
+            print(f"{'='*70}\n")
+            state = get_hub_state(hub_url)
+            if not state:
+                print("✗ Hub not responding")
+                break
+            rankings = state.get("rankings", [])
+            print(f"Connected: {len(rankings)} assets\n")
+            for asset in rankings[:5]:
+                name = asset.get("space_name", "?")
+                training = asset.get("training", {})
+                voting = asset.get("voting", {})
+                actor_loss = training.get("actor_loss", 0)
+                avn_acc = training.get("avn_accuracy", 0)
+                # Simple bar graph
+                loss_bar = "█" * int(actor_loss * 10) + "░" * (10 - int(actor_loss * 10))
+                acc_bar = "█" * int(avn_acc * 10) + "░" * (10 - int(avn_acc * 10))
+                print(f"{name:15} | Loss: [{loss_bar}] {actor_loss:.3f}")
+                print(f"{'':15} | Acc:  [{acc_bar}] {avn_acc:.3f}\n")
+            time.sleep(interval)
+    except KeyboardInterrupt:
+        print("\n[*] Stopped.")
+if __name__ == "__main__":
+    watch = "--watch" in sys.argv
+    if watch:
+        watch_mode()
+    else:
+        success = print_diagnostic()
+        sys.exit(0 if success else 1)

test_websocket.py ADDED Viewed

	@@ -0,0 +1,165 @@

+#!/usr/bin/env python3
+"""
+WebSocket Test Client — Monitor what the hub is broadcasting
+No external dependencies beyond websockets (already installed)
+Usage:
+    python3 test_websocket.py [--subscribe|--publish] [--space ASSET_NAME]
+"""
+import asyncio
+import json
+import sys
+import websockets
+from datetime import datetime
+async def test_subscribe(hub_url: str):
+    """Listen to what the hub is broadcasting."""
+    print(f"[*] Connecting to hub subscriber at {hub_url}/ws/subscribe")
+    try:
+        async with websockets.connect(f"{hub_url}/ws/subscribe") as ws:
+            print(f"[✓] Connected! Listening for metrics updates...\n")
+            count = 0
+            while True:
+                try:
+                    msg = await asyncio.wait_for(ws.recv(), timeout=10.0)
+                    count += 1
+                    data = json.loads(msg)
+                    ts = datetime.now().strftime("%H:%M:%S")
+                    print(f"[{ts}] Message #{count}:")
+                    print(f"  {json.dumps(data, indent=2)}\n")
+                except asyncio.TimeoutError:
+                    print("[!] No messages received for 10 seconds...")
+                    print("    → Asset spaces may not be connected yet")
+                    sys.exit(1)
+    except Exception as e:
+        print(f"[✗] Connection failed: {e}")
+        print(f"    Make sure hub is running and accessible at {hub_url}")
+        sys.exit(1)
+async def test_publish(hub_url: str, space_name: str):
+    """Send a test metric to the hub."""
+    print(f"[*] Connecting to hub publisher for space: {space_name}")
+    test_message = {
+        "training": {
+            "training_steps": 9999,
+            "actor_loss": 0.123,
+            "critic_loss": 0.456,
+            "avn_loss": 0.789,
+            "avn_accuracy": 0.95,
+        },
+        "voting": {
+            "dominant_signal": "BUY",
+            "buy_count": 42,
+            "sell_count": 18,
+        }
+    }
+    try:
+        uri = f"{hub_url}/ws/publish/{space_name}"
+        print(f"[*] Connecting to {uri}")
+        async with websockets.connect(uri) as ws:
+            print(f"[✓] Connected! Sending test message...")
+            await ws.send(json.dumps(test_message))
+            print(f"[✓] Sent:\n{json.dumps(test_message, indent=2)}")
+            # Keep connection open for 5 seconds
+            print(f"[*] Keeping connection open for 5 seconds...")
+            await asyncio.sleep(5)
+            print(f"[✓] Done!")
+    except Exception as e:
+        print(f"[✗] Error: {e}")
+        sys.exit(1)
+async def main():
+    # Default hub URL (adjust if needed)
+    hub_url = "ws://127.0.0.1:7860"
+    if len(sys.argv) > 1:
+        if "--subscribe" in sys.argv:
+            print("=" * 60)
+            print("QUASAR Hub WebSocket Monitor (Subscribe Mode)")
+            print("=" * 60)
+            await test_subscribe(hub_url)
+        elif "--publish" in sys.argv:
+            space_name = "TEST_ASSET"
+            if "--space" in sys.argv:
+                idx = sys.argv.index("--space")
+                if idx + 1 < len(sys.argv):
+                    space_name = sys.argv[idx + 1]
+            print("=" * 60)
+            print(f"QUASAR Hub WebSocket Test (Publish Mode)")
+            print("=" * 60)
+            await test_publish(hub_url, space_name)
+        else:
+            print_usage()
+    else:
+        print_usage()
+def print_usage():
+    print("""
+╔════════════════════════════════════════════════════════════════╗
+║            QUASAR WebSocket Test Tool v1.0                     ║
+╚════════════════════════════════════════════════════════════════╝
+USAGE:
+    # Monitor what hub is broadcasting (metrics from all spaces)
+    python3 test_websocket.py --subscribe
+    # Send a test metric to hub (publish as a space)
+    python3 test_websocket.py --publish --space TEST_ASSET
+    # Send test metrics with a different space name
+    python3 test_websocket.py --publish --space V100_1h
+EXAMPLE WORKFLOW:
+  Terminal 1 (Monitor hub):
+    $ python3 test_websocket.py --subscribe
+    [✓] Connected! Listening for metrics updates...
+  Terminal 2 (Send test data):
+    $ python3 test_websocket.py --publish --space V100_1h
+    [✓] Connected! Sending test message...
+    [✓] Sent:
+    {
+      "training": {...},
+      "voting": {...}
+    }
+  Terminal 1 (should see the message):
+    [12:34:56] Message #1:
+    {
+      "space_name": "V100_1h",
+      "training": {...},
+      "voting": {...}
+    }
+TROUBLESHOOTING:
+  "Connection refused" → Hub not running on port 7860
+    $ curl http://127.0.0.1:7860/api/health
+  No messages on subscribe → Asset spaces not connected
+    Check if asset spaces are running and sending data
+  "Module not found: websockets" → Install it
+    $ pip install websockets
+""")
+if __name__ == "__main__":
+    asyncio.run(main())