Codette-Reasoning / tests /validate_phase7_realtime.py
Jonathan Harrison
Full Codette codebase sync — transparency release
74f2af5
#!/usr/bin/env python3
"""Phase 7 Real-Time Validation Against Running Web Server
Tests all three routing paths (SIMPLE/MEDIUM/COMPLEX) against the running web server.
Compares actual latencies versus estimates and validates component activation.
Usage:
python validate_phase7_realtime.py
Prerequisites:
- codette_web.bat must be running at http://localhost:7860
- Web server must show "Phase 7 Executive Controller initialized"
"""
import requests
import time
import json
import sys
from datetime import datetime
from typing import Optional, Dict, Any
from pathlib import Path
# Test queries organized by complexity
TEST_QUERIES = {
"SIMPLE": [
{
"query": "What is the speed of light?",
"expected_latency_ms": (150, 250), # 150-250ms
"expect_components": False, # All should be false
},
{
"query": "Define entropy",
"expected_latency_ms": (150, 250),
"expect_components": False,
},
],
"MEDIUM": [
{
"query": "How does quantum mechanics relate to consciousness?",
"expected_latency_ms": (800, 1200), # 800-1200ms
"expect_components": True, # Some should be true
"min_components": 3, # At least 3 should be active
},
{
"query": "What are the implications of artificial intelligence for society?",
"expected_latency_ms": (800, 1200),
"expect_components": True,
"min_components": 3,
},
],
"COMPLEX": [
{
"query": "Can machines be truly conscious? And how should we ethically govern AI?",
"expected_latency_ms": (2000, 3500), # 2000-3500ms
"expect_components": True,
"expect_all": True, # All components should be activated
},
],
}
class Phase7Validator:
"""Validates Phase 7 routing in real-time against running web server."""
def __init__(self, server_url: str = "http://localhost:7860"):
self.server_url = server_url
self.results = {
"SIMPLE": [],
"MEDIUM": [],
"COMPLEX": [],
}
self.validation_start = None
self.validation_end = None
def is_server_running(self) -> bool:
"""Check if web server is running."""
try:
response = requests.get(f"{self.server_url}/api/health", timeout=2)
return response.status_code == 200
except:
return False
def query_server(
self, query: str, complexity: str
) -> Optional[Dict[str, Any]]:
"""Send query to web server and capture response with metadata."""
start_time = time.time()
try:
response = requests.post(
f"{self.server_url}/api/chat",
json={"message": query, "complexity_hint": complexity},
timeout=10,
)
actual_latency_ms = (time.time() - start_time) * 1000
if response.status_code == 200:
data = response.json()
# Try to extract phase7_routing from response
phase7_routing = None
if isinstance(data, dict):
# Direct format
if "phase7_routing" in data:
phase7_routing = data.get("phase7_routing")
# Nested in metadata
elif "metadata" in data and isinstance(data["metadata"], dict):
phase7_routing = data["metadata"].get("phase7_routing")
return {
"success": True,
"response": data,
"actual_latency_ms": actual_latency_ms,
"phase7_routing": phase7_routing,
"status_code": response.status_code,
}
else:
return {
"success": False,
"status_code": response.status_code,
"actual_latency_ms": actual_latency_ms,
"error": response.text,
}
except requests.exceptions.Timeout:
return {
"success": False,
"error": "Request timeout (10s)",
"actual_latency_ms": (time.time() - start_time) * 1000,
}
except Exception as e:
return {
"success": False,
"error": str(e),
"actual_latency_ms": (time.time() - start_time) * 1000,
}
def validate_latency(
self, actual_ms: float, expected_range: tuple, complexity: str
) -> tuple[bool, str]:
"""Check if actual latency falls within expected range."""
min_ms, max_ms = expected_range
if min_ms <= actual_ms <= max_ms:
return True, f"OK ({actual_ms:.0f}ms within {min_ms}-{max_ms}ms)"
elif actual_ms < min_ms:
return False, f"FAST ({actual_ms:.0f}ms < {min_ms}ms expected)"
else:
return False, f"SLOW ({actual_ms:.0f}ms > {max_ms}ms expected)"
def validate_components(
self,
phase7_routing: Optional[Dict],
expect_components: bool,
expect_all: bool = False,
min_components: int = 0,
) -> tuple[bool, str]:
"""Validate component activation matches expectations."""
if not phase7_routing:
return False, "phase7_routing metadata missing"
if "components_activated" not in phase7_routing:
return False, "components_activated missing from metadata"
components = phase7_routing["components_activated"]
active_count = sum(1 for v in components.values() if v)
total_count = len(components)
if expect_all:
if active_count == total_count:
return True, f"OK (all {total_count} components activated)"
else:
return (
False,
f"NOT OK ({active_count}/{total_count} activated, expected all)",
)
if expect_components:
if active_count >= min_components:
return (
True,
f"OK ({active_count}/{total_count} activated, >= {min_components} required)",
)
else:
return (
False,
f"NOT OK ({active_count}/{total_count} activated, < {min_components} required)",
)
# expect_components = False (SIMPLE)
if active_count == 0:
return True, f"OK (all {total_count} components skipped)"
else:
return False, f"NOT OK ({active_count}/{total_count} activated, expected none)"
def print_header(self, title: str):
"""Print a formatted header."""
print(f"\n{'='*75}")
print(f" {title}")
print(f"{'='*75}\n")
def run_validation(self) -> bool:
"""Run full Phase 7 validation suite."""
self.print_header("Phase 7 Real-Time Validation")
# Check server
print("Step 1: Checking if web server is running...")
if not self.is_server_running():
print("[ERROR] Web server not responding at http://localhost:7860")
print(" Please start codette_web.bat first")
return False
print("[OK] Web server is running\n")
self.validation_start = datetime.now()
# Test each complexity level
all_passed = True
for complexity in ["SIMPLE", "MEDIUM", "COMPLEX"]:
self.print_header(f"Testing {complexity} Routing Path")
for test_case in TEST_QUERIES[complexity]:
query = test_case["query"]
print(f"Query: {query}")
# Send query
result = self.query_server(query, complexity)
if not result["success"]:
print(f" [FAIL] Server error: {result.get('error')}")
all_passed = False
continue
# Check latency
latency_ok, latency_msg = self.validate_latency(
result["actual_latency_ms"],
test_case["expected_latency_ms"],
complexity,
)
latency_status = "[OK]" if latency_ok else "[SLOW/FAST]"
print(f" Latency: {latency_status} {latency_msg}")
if not latency_ok:
all_passed = False
# Check components
components_ok, components_msg = self.validate_components(
result["phase7_routing"],
test_case.get("expect_components", False),
test_case.get("expect_all", False),
test_case.get("min_components", 0),
)
components_status = "[OK]" if components_ok else "[FAIL]"
print(f" Components: {components_status} {components_msg}")
if not components_ok:
all_passed = False
# Extract reasoning if available
if (
result["phase7_routing"]
and "reasoning" in result["phase7_routing"]
):
reasoning = result["phase7_routing"]["reasoning"]
print(f" Routing: {reasoning}")
# Store result
self.results[complexity].append(
{
"query": query,
"latency_ok": latency_ok,
"actual_latency_ms": result["actual_latency_ms"],
"components_ok": components_ok,
"phase7_routing": result["phase7_routing"],
}
)
print()
self.validation_end = datetime.now()
return all_passed
def generate_report(self) -> str:
"""Generate comprehensive validation report."""
report_lines = []
report_lines.append("\n" + "=" * 75)
report_lines.append(" PHASE 7 VALIDATION REPORT")
report_lines.append("=" * 75)
# Summary
report_lines.append(f"\nValidation Time: {self.validation_start}")
report_lines.append(f"Duration: {self.validation_end - self.validation_start}")
# Results by complexity
for complexity in ["SIMPLE", "MEDIUM", "COMPLEX"]:
results = self.results[complexity]
if not results:
continue
report_lines.append(f"\n{complexity} Queries:")
report_lines.append("-" * 75)
latencies = [
r["actual_latency_ms"] for r in results
]
avg_latency = sum(latencies) / len(latencies)
min_latency = min(latencies)
max_latency = max(latencies)
report_lines.append(f" Count: {len(results)}")
report_lines.append(
f" Latencies: min={min_latency:.0f}ms, avg={avg_latency:.0f}ms, max={max_latency:.0f}ms"
)
latency_passed = sum(
1 for r in results if r["latency_ok"]
) / len(results)
components_passed = sum(1 for r in results if r["components_ok"]) / len(
results
)
report_lines.append(
f" Latency Validation: {latency_passed:.0%} passed"
)
report_lines.append(
f" Components Validation: {components_passed:.0%} passed"
)
# Validation checklist
report_lines.append("\n" + "=" * 75)
report_lines.append("VALIDATION CHECKLIST")
report_lines.append("=" * 75 + "\n")
checklist = [
(
"Server launches with Phase 7 initialized",
self.is_server_running(),
),
(
"SIMPLE queries run in 150-250ms range",
all(r["latency_ok"]
for r in self.results["SIMPLE"]),
),
(
"MEDIUM queries run in 800-1200ms range",
all(r["latency_ok"]
for r in self.results["MEDIUM"]),
),
(
"COMPLEX queries run in 2000-3500ms range",
all(r["latency_ok"]
for r in self.results["COMPLEX"]),
),
(
"SIMPLE queries have zero components activated",
all(r["components_ok"]
for r in self.results["SIMPLE"]),
),
(
"MEDIUM queries have selective components activated",
all(r["components_ok"]
for r in self.results["MEDIUM"]),
),
(
"COMPLEX queries have all components activated",
all(r["components_ok"]
for r in self.results["COMPLEX"]),
),
]
for check, passed in checklist:
status = "[OK]" if passed else "[FAIL]"
report_lines.append(f" {status} {check}")
# Overall result
all_passed = all(passed for _, passed in checklist)
report_lines.append("\n" + "=" * 75)
if all_passed:
report_lines.append("RESULT: ALL VALIDATION CHECKS PASSED [OK]")
else:
report_lines.append("RESULT: SOME VALIDATION CHECKS FAILED [FAIL]")
report_lines.append("=" * 75 + "\n")
return "\n".join(report_lines)
def main():
"""Run Phase 7 real-time validation."""
validator = Phase7Validator()
# Run validation
if not validator.run_validation():
print("[ERROR] Validation encountered issues")
sys.exit(1)
# Generate and print report
report = validator.generate_report()
print(report)
# Save report to file
report_path = Path("phase7_validation_report.txt")
report_path.write_text(report)
print(f"Validation report saved to: {report_path}")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\nValidation interrupted by user")
sys.exit(1)
except Exception as e:
print(f"\nERROR: {e}")
import traceback
traceback.print_exc()
sys.exit(1)