File size: 9,760 Bytes
74f2af5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 | """Tests for Phase 7 Executive Controller
Validates:
1. Routing decisions for SIMPLE/MEDIUM/COMPLEX queries
2. Component activation correctness
3. Transparency metadata generation
4. Latency and correctness estimates
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from reasoning_forge.query_classifier import QueryComplexity
from reasoning_forge.executive_controller import (
ExecutiveController,
ExecutiveControllerWithLearning,
ComponentDecision,
)
def test_simple_routing():
"""Test that SIMPLE queries skip heavy machinery."""
ctrl = ExecutiveController(verbose=True)
decision = ctrl.route_query("What is the speed of light?", QueryComplexity.SIMPLE)
assert decision.query_complexity == QueryComplexity.SIMPLE
assert decision.component_activation['debate'] == False
assert decision.component_activation['semantic_tension'] == False
assert decision.component_activation['preflight_predictor'] == False
assert decision.estimated_latency_ms < 200 # Fast
assert decision.estimated_correctness > 0.90
assert decision.estimated_compute_cost < 10 # Low cost
print("[OK] SIMPLE routing correct")
def test_medium_routing():
"""Test that MEDIUM queries use selective components."""
ctrl = ExecutiveController(verbose=True)
decision = ctrl.route_query(
"How does quantum mechanics relate to consciousness?",
QueryComplexity.MEDIUM
)
assert decision.query_complexity == QueryComplexity.MEDIUM
assert decision.component_activation['debate'] == True
assert decision.component_activation['semantic_tension'] == True
assert decision.component_activation['specialization_tracking'] == True
assert decision.component_activation['preflight_predictor'] == False # Skipped
assert decision.component_config.get('debate_rounds') == 1
assert 800 < decision.estimated_latency_ms < 1000 # Medium latency
assert decision.estimated_correctness > 0.70
assert 20 < decision.estimated_compute_cost < 30
print("[OK] MEDIUM routing correct")
def test_complex_routing():
"""Test that COMPLEX queries use full machinery."""
ctrl = ExecutiveController(verbose=True)
decision = ctrl.route_query(
"Can machines be truly conscious?",
QueryComplexity.COMPLEX
)
assert decision.query_complexity == QueryComplexity.COMPLEX
assert decision.component_activation['debate'] == True
assert decision.component_activation['semantic_tension'] == True
assert decision.component_activation['preflight_predictor'] == True
assert decision.component_activation['specialization_tracking'] == True
assert decision.component_config.get('debate_rounds') == 3
assert decision.estimated_latency_ms > 2000 # Slow but thorough
assert 40 < decision.estimated_compute_cost < 60
print("[OK] COMPLEX routing correct")
def test_route_transparency_metadata():
"""Test that routing transparency metadata is generated correctly."""
ctrl = ExecutiveController()
decision = ctrl.route_query(
"What is entropy?",
QueryComplexity.SIMPLE
)
# Simulate execution with measured latency
metadata = ExecutiveController.create_route_metadata(
decision=decision,
actual_latency_ms=145, # Slightly faster than estimated
actual_conflicts=0,
gamma=0.95
)
assert 'phase7_routing' in metadata
routing = metadata['phase7_routing']
assert routing['query_complexity'] == 'simple'
assert 'components_activated' in routing
assert routing['components_activated']['debate'] == False
assert routing['components_activated']['semantic_tension'] == False
# Check latency analysis
assert routing['latency_analysis']['estimated_ms'] == decision.estimated_latency_ms
assert routing['latency_analysis']['actual_ms'] == 145
assert routing['latency_analysis']['savings_ms'] > 0 # Faster than estimated
# Check metrics
assert routing['metrics']['conflicts_detected'] == 0
assert routing['metrics']['gamma_coherence'] == 0.95
print("[OK] Transparency metadata correct")
def test_routing_statistics():
"""Test that controller tracks routing statistics."""
ctrl = ExecutiveController()
# Simulate several queries
ctrl.route_query("What is light?", QueryComplexity.SIMPLE)
ctrl.route_query("What is light?", QueryComplexity.SIMPLE)
ctrl.route_query("How does light work?", QueryComplexity.MEDIUM)
ctrl.route_query("Can light be conscious?", QueryComplexity.COMPLEX)
stats = ctrl.get_routing_statistics()
assert stats['total_queries_routed'] == 4
assert 'component_activation_counts' in stats
print(f" Stats: {stats}")
print("[OK] Routing statistics tracked")
def test_component_activation_counts():
"""Test that component activation counts are accurate."""
ctrl = ExecutiveController()
# Route several queries
for _ in range(3):
ctrl.route_query("What?", QueryComplexity.SIMPLE)
for _ in range(2):
ctrl.route_query("How?", QueryComplexity.MEDIUM)
for _ in range(1):
ctrl.route_query("Why?", QueryComplexity.COMPLEX)
stats = ctrl.get_routing_statistics()
counts = stats['component_activation_counts']
# SIMPLE queries (3): only synthesis should be False
# MEDIUM/COMPLEX queries (3): debate should be activated 3 times
assert counts.get('debate', 0) == 3 # MEDIUM (2) + COMPLEX (1)
assert counts.get('semantic_tension', 0) == 3
assert counts.get('specialization_tracking', 0) == 3
print(f" Component activation counts: {counts}")
print("[OK] Component activation counts correct")
def test_learning_routing():
"""Test that learning router initializes and learns."""
ctrl = ExecutiveControllerWithLearning(verbose=False) # Quieter for test
# Initial route (no learned patterns yet)
decision = ctrl.route_query("What's the speed?", QueryComplexity.SIMPLE)
assert decision.query_complexity == QueryComplexity.SIMPLE
# Directly set learned routes (simulating what update_routes_from_history would do)
ctrl.learned_routes = {
'simple': 0.95, # Use lowercase to match QueryComplexity.value
'medium': 0.80,
'complex': 0.85,
}
# Check that learned routes were set
assert 'simple' in ctrl.learned_routes
assert 'medium' in ctrl.learned_routes
assert 'complex' in ctrl.learned_routes
# Simple routes should have highest confidence
assert ctrl.learned_routes['simple'] >= ctrl.learned_routes['medium']
# Test get_route_confidence
simple_confidence = ctrl.get_route_confidence(QueryComplexity.SIMPLE)
assert simple_confidence == 0.95, f"Expected 0.95, got {simple_confidence}"
print(f" Learned routes: {ctrl.learned_routes}")
print("[OK] Learning router works")
def test_compute_cost_ranking():
"""Test that compute costs are ranked correctly: SIMPLE < MEDIUM < COMPLEX."""
ctrl = ExecutiveController()
simple_decision = ctrl.route_query("Q1?", QueryComplexity.SIMPLE)
medium_decision = ctrl.route_query("Q2?", QueryComplexity.MEDIUM)
complex_decision = ctrl.route_query("Q3?", QueryComplexity.COMPLEX)
# Reset counts
ctrl.route_activation_counts = {}
assert simple_decision.estimated_compute_cost < medium_decision.estimated_compute_cost
assert medium_decision.estimated_compute_cost < complex_decision.estimated_compute_cost
print(f" Cost ranking: {simple_decision.estimated_compute_cost} < "
f"{medium_decision.estimated_compute_cost} < "
f"{complex_decision.estimated_compute_cost}")
print("[OK] Compute cost ranking correct")
def test_latency_ranking():
"""Test that latencies are ranked correctly: SIMPLE < MEDIUM < COMPLEX."""
ctrl = ExecutiveController()
simple = ctrl.route_query("Q1?", QueryComplexity.SIMPLE)
medium = ctrl.route_query("Q2?", QueryComplexity.MEDIUM)
complex = ctrl.route_query("Q3?", QueryComplexity.COMPLEX)
assert simple.estimated_latency_ms < medium.estimated_latency_ms
assert medium.estimated_latency_ms < complex.estimated_latency_ms
print(f" Latency ranking: {simple.estimated_latency_ms}ms < "
f"{medium.estimated_latency_ms}ms < "
f"{complex.estimated_latency_ms}ms")
print("[OK] Latency ranking correct")
def test_component_decision_asdict():
"""Test that ComponentDecision can be serialized."""
ctrl = ExecutiveController()
decision = ctrl.route_query("Test query", QueryComplexity.SIMPLE)
# Should be able to convert to dict
decision_dict = {
'query_complexity': decision.query_complexity.value,
'component_activation': decision.component_activation,
'reasoning': decision.reasoning,
'estimated_latency_ms': decision.estimated_latency_ms,
'estimated_correctness': decision.estimated_correctness,
'estimated_compute_cost': decision.estimated_compute_cost,
}
assert decision_dict['query_complexity'] == 'simple'
assert decision_dict['reasoning'] != ""
print("[OK] ComponentDecision serializable")
if __name__ == '__main__':
print("\n" + "=" * 70)
print("Phase 7 Executive Controller Tests")
print("=" * 70 + "\n")
test_simple_routing()
test_medium_routing()
test_complex_routing()
test_route_transparency_metadata()
test_routing_statistics()
test_component_activation_counts()
test_learning_routing()
test_compute_cost_ranking()
test_latency_ranking()
test_component_decision_asdict()
print("\n" + "=" * 70)
print("PASS: All Phase 7 Executive Controller tests passed!")
print("=" * 70 + "\n")
|