File size: 9,760 Bytes
74f2af5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
"""Tests for Phase 7 Executive Controller

Validates:
1. Routing decisions for SIMPLE/MEDIUM/COMPLEX queries
2. Component activation correctness
3. Transparency metadata generation
4. Latency and correctness estimates
"""

import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent.parent))

from reasoning_forge.query_classifier import QueryComplexity
from reasoning_forge.executive_controller import (
    ExecutiveController,
    ExecutiveControllerWithLearning,
    ComponentDecision,
)


def test_simple_routing():
    """Test that SIMPLE queries skip heavy machinery."""
    ctrl = ExecutiveController(verbose=True)
    decision = ctrl.route_query("What is the speed of light?", QueryComplexity.SIMPLE)

    assert decision.query_complexity == QueryComplexity.SIMPLE
    assert decision.component_activation['debate'] == False
    assert decision.component_activation['semantic_tension'] == False
    assert decision.component_activation['preflight_predictor'] == False
    assert decision.estimated_latency_ms < 200  # Fast
    assert decision.estimated_correctness > 0.90
    assert decision.estimated_compute_cost < 10  # Low cost
    print("[OK] SIMPLE routing correct")


def test_medium_routing():
    """Test that MEDIUM queries use selective components."""
    ctrl = ExecutiveController(verbose=True)
    decision = ctrl.route_query(
        "How does quantum mechanics relate to consciousness?",
        QueryComplexity.MEDIUM
    )

    assert decision.query_complexity == QueryComplexity.MEDIUM
    assert decision.component_activation['debate'] == True
    assert decision.component_activation['semantic_tension'] == True
    assert decision.component_activation['specialization_tracking'] == True
    assert decision.component_activation['preflight_predictor'] == False  # Skipped
    assert decision.component_config.get('debate_rounds') == 1
    assert 800 < decision.estimated_latency_ms < 1000  # Medium latency
    assert decision.estimated_correctness > 0.70
    assert 20 < decision.estimated_compute_cost < 30
    print("[OK] MEDIUM routing correct")


def test_complex_routing():
    """Test that COMPLEX queries use full machinery."""
    ctrl = ExecutiveController(verbose=True)
    decision = ctrl.route_query(
        "Can machines be truly conscious?",
        QueryComplexity.COMPLEX
    )

    assert decision.query_complexity == QueryComplexity.COMPLEX
    assert decision.component_activation['debate'] == True
    assert decision.component_activation['semantic_tension'] == True
    assert decision.component_activation['preflight_predictor'] == True
    assert decision.component_activation['specialization_tracking'] == True
    assert decision.component_config.get('debate_rounds') == 3
    assert decision.estimated_latency_ms > 2000  # Slow but thorough
    assert 40 < decision.estimated_compute_cost < 60
    print("[OK] COMPLEX routing correct")


def test_route_transparency_metadata():
    """Test that routing transparency metadata is generated correctly."""
    ctrl = ExecutiveController()
    decision = ctrl.route_query(
        "What is entropy?",
        QueryComplexity.SIMPLE
    )

    # Simulate execution with measured latency
    metadata = ExecutiveController.create_route_metadata(
        decision=decision,
        actual_latency_ms=145,  # Slightly faster than estimated
        actual_conflicts=0,
        gamma=0.95
    )

    assert 'phase7_routing' in metadata
    routing = metadata['phase7_routing']

    assert routing['query_complexity'] == 'simple'
    assert 'components_activated' in routing
    assert routing['components_activated']['debate'] == False
    assert routing['components_activated']['semantic_tension'] == False

    # Check latency analysis
    assert routing['latency_analysis']['estimated_ms'] == decision.estimated_latency_ms
    assert routing['latency_analysis']['actual_ms'] == 145
    assert routing['latency_analysis']['savings_ms'] > 0  # Faster than estimated

    # Check metrics
    assert routing['metrics']['conflicts_detected'] == 0
    assert routing['metrics']['gamma_coherence'] == 0.95

    print("[OK] Transparency metadata correct")


def test_routing_statistics():
    """Test that controller tracks routing statistics."""
    ctrl = ExecutiveController()

    # Simulate several queries
    ctrl.route_query("What is light?", QueryComplexity.SIMPLE)
    ctrl.route_query("What is light?", QueryComplexity.SIMPLE)
    ctrl.route_query("How does light work?", QueryComplexity.MEDIUM)
    ctrl.route_query("Can light be conscious?", QueryComplexity.COMPLEX)

    stats = ctrl.get_routing_statistics()

    assert stats['total_queries_routed'] == 4
    assert 'component_activation_counts' in stats
    print(f"  Stats: {stats}")
    print("[OK] Routing statistics tracked")


def test_component_activation_counts():
    """Test that component activation counts are accurate."""
    ctrl = ExecutiveController()

    # Route several queries
    for _ in range(3):
        ctrl.route_query("What?", QueryComplexity.SIMPLE)
    for _ in range(2):
        ctrl.route_query("How?", QueryComplexity.MEDIUM)
    for _ in range(1):
        ctrl.route_query("Why?", QueryComplexity.COMPLEX)

    stats = ctrl.get_routing_statistics()
    counts = stats['component_activation_counts']

    # SIMPLE queries (3): only synthesis should be False
    # MEDIUM/COMPLEX queries (3): debate should be activated 3 times
    assert counts.get('debate', 0) == 3  # MEDIUM (2) + COMPLEX (1)
    assert counts.get('semantic_tension', 0) == 3
    assert counts.get('specialization_tracking', 0) == 3

    print(f"  Component activation counts: {counts}")
    print("[OK] Component activation counts correct")


def test_learning_routing():
    """Test that learning router initializes and learns."""
    ctrl = ExecutiveControllerWithLearning(verbose=False)  # Quieter for test

    # Initial route (no learned patterns yet)
    decision = ctrl.route_query("What's the speed?", QueryComplexity.SIMPLE)
    assert decision.query_complexity == QueryComplexity.SIMPLE

    # Directly set learned routes (simulating what update_routes_from_history would do)
    ctrl.learned_routes = {
        'simple': 0.95,   # Use lowercase to match QueryComplexity.value
        'medium': 0.80,
        'complex': 0.85,
    }

    # Check that learned routes were set
    assert 'simple' in ctrl.learned_routes
    assert 'medium' in ctrl.learned_routes
    assert 'complex' in ctrl.learned_routes

    # Simple routes should have highest confidence
    assert ctrl.learned_routes['simple'] >= ctrl.learned_routes['medium']

    # Test get_route_confidence
    simple_confidence = ctrl.get_route_confidence(QueryComplexity.SIMPLE)
    assert simple_confidence == 0.95, f"Expected 0.95, got {simple_confidence}"

    print(f"  Learned routes: {ctrl.learned_routes}")
    print("[OK] Learning router works")


def test_compute_cost_ranking():
    """Test that compute costs are ranked correctly: SIMPLE < MEDIUM < COMPLEX."""
    ctrl = ExecutiveController()

    simple_decision = ctrl.route_query("Q1?", QueryComplexity.SIMPLE)
    medium_decision = ctrl.route_query("Q2?", QueryComplexity.MEDIUM)
    complex_decision = ctrl.route_query("Q3?", QueryComplexity.COMPLEX)

    # Reset counts
    ctrl.route_activation_counts = {}

    assert simple_decision.estimated_compute_cost < medium_decision.estimated_compute_cost
    assert medium_decision.estimated_compute_cost < complex_decision.estimated_compute_cost

    print(f"  Cost ranking: {simple_decision.estimated_compute_cost} < "
          f"{medium_decision.estimated_compute_cost} < "
          f"{complex_decision.estimated_compute_cost}")
    print("[OK] Compute cost ranking correct")


def test_latency_ranking():
    """Test that latencies are ranked correctly: SIMPLE < MEDIUM < COMPLEX."""
    ctrl = ExecutiveController()

    simple = ctrl.route_query("Q1?", QueryComplexity.SIMPLE)
    medium = ctrl.route_query("Q2?", QueryComplexity.MEDIUM)
    complex = ctrl.route_query("Q3?", QueryComplexity.COMPLEX)

    assert simple.estimated_latency_ms < medium.estimated_latency_ms
    assert medium.estimated_latency_ms < complex.estimated_latency_ms

    print(f"  Latency ranking: {simple.estimated_latency_ms}ms < "
          f"{medium.estimated_latency_ms}ms < "
          f"{complex.estimated_latency_ms}ms")
    print("[OK] Latency ranking correct")


def test_component_decision_asdict():
    """Test that ComponentDecision can be serialized."""
    ctrl = ExecutiveController()
    decision = ctrl.route_query("Test query", QueryComplexity.SIMPLE)

    # Should be able to convert to dict
    decision_dict = {
        'query_complexity': decision.query_complexity.value,
        'component_activation': decision.component_activation,
        'reasoning': decision.reasoning,
        'estimated_latency_ms': decision.estimated_latency_ms,
        'estimated_correctness': decision.estimated_correctness,
        'estimated_compute_cost': decision.estimated_compute_cost,
    }

    assert decision_dict['query_complexity'] == 'simple'
    assert decision_dict['reasoning'] != ""
    print("[OK] ComponentDecision serializable")


if __name__ == '__main__':
    print("\n" + "=" * 70)
    print("Phase 7 Executive Controller Tests")
    print("=" * 70 + "\n")

    test_simple_routing()
    test_medium_routing()
    test_complex_routing()
    test_route_transparency_metadata()
    test_routing_statistics()
    test_component_activation_counts()
    test_learning_routing()
    test_compute_cost_ranking()
    test_latency_ranking()
    test_component_decision_asdict()

    print("\n" + "=" * 70)
    print("PASS: All Phase 7 Executive Controller tests passed!")
    print("=" * 70 + "\n")