| |
| """Phase 5 End-to-End Integration Tests |
| |
| Tests the complete Phase 5 system: |
| 1. ReinforcementConfig tunable coefficients |
| 2. AdapterRouter with MemoryWeighting integration |
| 3. CodetteOrchestrator routing with memory context |
| 4. Gamma stabilization field health monitoring |
| 5. RoutingMetrics observability |
| |
| Run with: python test_phase5_e2e.py |
| """ |
|
|
| import sys |
| import os |
| from pathlib import Path |
|
|
| |
| sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) |
|
|
| from reasoning_forge.memory_weighting import MemoryWeighting, ReinforcementConfig |
| from reasoning_forge.coherence_field import CoherenceFieldGamma, GammaHealthMetrics, InterventionType |
| from reasoning_forge.routing_metrics import RoutingMetrics, AdapterSelectionRecord |
| from inference.adapter_router import AdapterRouter, RouteResult |
|
|
|
|
| def test_reinforcement_config(): |
| """Test ReinforcementConfig tunable coefficients.""" |
| print("\n=== Test 1: Reinforcement Config ===") |
|
|
| |
| config = ReinforcementConfig() |
| assert config.boost_successful == 0.08, "Default boost should be 0.08" |
| assert config.penalize_failed == 0.08, "Default penalize should be 0.08" |
| assert config.reward_soft_consensus == 0.03, "Default soft_consensus should be 0.03" |
| print("[OK] Default coefficients loaded") |
|
|
| |
| custom_dict = { |
| "boost_successful": 0.12, |
| "penalize_failed": 0.10, |
| "reward_soft_consensus": 0.05, |
| } |
| custom = ReinforcementConfig.from_dict(custom_dict) |
| assert custom.boost_successful == 0.12, "Custom boost not applied" |
| assert custom.penalize_failed == 0.10, "Custom penalize not applied" |
| print("[OK] Custom coefficients loaded from dict") |
|
|
| |
| exported = custom.to_dict() |
| assert exported["boost_successful"] == 0.12, "Export failed" |
| print("[OK] Coefficients exported to dict") |
|
|
| |
| partial = ReinforcementConfig.from_dict({"boost_successful": 0.15}) |
| assert partial.boost_successful == 0.15, "Partial override failed" |
| assert partial.penalize_failed == 0.08, "Default not used for missing key" |
| print("[OK] Partial config with defaults works") |
|
|
| return True |
|
|
|
|
| def test_adapter_router_with_memory(): |
| """Test AdapterRouter memory weighting integration.""" |
| print("\n=== Test 2: AdapterRouter with Memory ===") |
|
|
| |
| router_no_mem = AdapterRouter(available_adapters=["newton", "davinci", "empathy"]) |
| assert router_no_mem.memory_weighting is None, "Router should not have memory" |
| print("[OK] Router created without memory") |
|
|
| |
| query = "Explain the physics of gravity" |
| route = router_no_mem.route(query, strategy="keyword") |
| assert route.primary == "newton", "Should select newton for physics query" |
| assert route.confidence > 0.0, "Confidence should be set" |
| print(f"[OK] Routed to {route.primary} with confidence {route.confidence:.2f}") |
|
|
| |
| explanation = router_no_mem.explain_routing(route) |
| assert "primary" in explanation, "Explanation missing primary" |
| assert explanation["memory_aware"] is False, "Should show memory not available" |
| print("[OK] Routing explanation works without memory") |
|
|
| return True |
|
|
|
|
| def test_gamma_health_monitoring(): |
| """Test Gamma (Γ) stabilization field.""" |
| print("\n=== Test 3: Gamma Health Monitoring ===") |
|
|
| gamma = CoherenceFieldGamma() |
|
|
| |
| class MockConflict: |
| def __init__(self): |
| self.strength = 0.25 |
|
|
| conflicts = [MockConflict(), MockConflict()] |
| responses = { |
| "newton": "Physics perspective", |
| "davinci": "Creative perspective", |
| "empathy": "Emotional perspective", |
| } |
|
|
| |
| health = gamma.compute_health( |
| conflicts=conflicts, |
| responses=responses, |
| adapter_weights={"newton": 1.0, "davinci": 1.0, "empathy": 1.0}, |
| ) |
|
|
| assert 0.0 <= health.gamma <= 1.0, "Gamma should be in [0, 1]" |
| assert len(gamma.health_history) == 1, "Should record health metric" |
| print(f"[OK] Healthy state: Gamma = {health.gamma:.3f}") |
| assert health.is_stable(), "Should be in stable zone" |
| print("[OK] Status correctly identified as stable") |
|
|
| |
| mono_responses = {"newton": "Only newton perspective"} |
| weak_conflicts = [] |
|
|
| health_collapse = gamma.compute_health( |
| conflicts=weak_conflicts, |
| responses=mono_responses, |
| adapter_weights={"newton": 2.0}, |
| ) |
|
|
| print(f"[OK] Collapsed state: Gamma = {health_collapse.gamma:.3f}") |
| if health_collapse.gamma < 0.4: |
| assert health_collapse.is_collapsing(), "Should detect collapse" |
| print("[OK] Collapse correctly detected") |
|
|
| |
| intervention = gamma.get_intervention(health_collapse, ["davinci", "empathy"]) |
| if intervention: |
| assert intervention.intervention_type == InterventionType.DIVERSITY_INJECTION, \ |
| "Should inject diversity on collapse" |
| print(f"[OK] Intervention recommended: {intervention.intervention_type.value}") |
|
|
| return True |
|
|
|
|
| def test_routing_metrics(): |
| """Test RoutingMetrics observability.""" |
| print("\n=== Test 4: Routing Metrics ===") |
|
|
| metrics = RoutingMetrics() |
| assert metrics.total_queries == 0, "Should start at 0" |
| print("[OK] RoutingMetrics initialized") |
|
|
| |
| record1 = RoutingMetrics.create_record( |
| query="What is quantum mechanics?", |
| primary_adapter="quantum", |
| secondary_adapters=["physics"], |
| strategy="keyword", |
| confidence_before_boost=0.75, |
| confidence_after_boost=0.85, |
| memory_boost_applied=True, |
| ) |
| metrics.record_route(record1) |
|
|
| assert metrics.total_queries == 1, "Should count query" |
| assert metrics.adapter_selection_counts["quantum"] == 1, "Should count selection" |
| assert metrics.memory_boost_count == 1, "Should count boost" |
| print("[OK] Route recorded and metrics updated") |
|
|
| |
| for i in range(4): |
| record = RoutingMetrics.create_record( |
| query="Another query", |
| primary_adapter="newton", |
| secondary_adapters=[], |
| strategy="keyword", |
| confidence_before_boost=0.6, |
| confidence_after_boost=0.6, |
| memory_boost_applied=False, |
| ) |
| metrics.record_route(record) |
|
|
| assert metrics.total_queries == 5, "Should have 5 queries" |
| assert metrics.adapter_selection_counts["newton"] == 4, "Newton selected 4 times" |
| print(f"[OK] Recorded 5 queries total") |
|
|
| |
| summary = metrics.get_summary() |
| assert summary["total_queries"] == 5, "Summary should show total queries" |
| assert "quantum" in summary["adapter_stats"], "Should have quantum stats" |
| assert "newton" in summary["adapter_stats"], "Should have newton stats" |
| print(f"[OK] Summary generated with {len(summary['adapter_stats'])} adapters") |
|
|
| |
| newton_stats = metrics.get_adapter_stats("newton") |
| assert newton_stats["total_selections"] == 4, "Newton should have 4 selections" |
| assert newton_stats["memory_boost_hits"] == 0, "Newton had no boosts" |
| print(f"[OK] Adapter stats: {newton_stats['total_selections']} selections") |
|
|
| |
| recent = metrics.get_recent_routes(limit=3) |
| assert len(recent) == 3, "Should return 3 recent routes" |
| assert recent[0]["primary"] == "newton", "Most recent should be newton" |
| print("[OK] Recent routes retrieved") |
|
|
| return True |
|
|
|
|
| def test_phase5_integration(): |
| """Test complete Phase 5 integration (all components together).""" |
| print("\n=== Test 5: Phase 5 Complete Integration ===") |
|
|
| |
| router = AdapterRouter( |
| available_adapters=["newton", "davinci", "empathy", "philosophy"], |
| memory_weighting=None, |
| ) |
| print("[OK] Router created with Phase 5 integration ready") |
|
|
| |
| gamma = CoherenceFieldGamma() |
| print("[OK] Gamma stabilization field initialized") |
|
|
| |
| routing_metrics = RoutingMetrics() |
| print("[OK] Routing metrics tracker initialized") |
|
|
| |
| query = "How should society balance freedom and security?" |
| route = router.route(query, strategy="keyword", max_adapters=2) |
|
|
| |
| record = RoutingMetrics.create_record( |
| query=query, |
| primary_adapter=route.primary, |
| secondary_adapters=route.secondary, |
| strategy=route.strategy, |
| confidence_before_boost=0.7, |
| confidence_after_boost=0.7, |
| memory_boost_applied=False, |
| ) |
| routing_metrics.record_route(record) |
|
|
| |
| class MockConflict: |
| def __init__(self, agent_a, agent_b): |
| self.agent_a = agent_a |
| self.agent_b = agent_b |
| self.strength = 0.15 |
|
|
| conflicts = [MockConflict("newton", "philosophy")] |
| responses = { |
| "newton": "Mathematical security metrics", |
| "philosophy": "Ethical freedom considerations", |
| "davinci": "Innovative balance approaches", |
| } |
|
|
| |
| health = gamma.compute_health(conflicts, responses) |
| |
| if health.is_collapsing(): |
| status = "collapsing" |
| elif health.is_groupthinking(): |
| status = "groupthinking" |
| else: |
| status = "stable" |
| print(f"[OK] Health computed: Gamma = {health.gamma:.3f} ({status})") |
|
|
| |
| summary = routing_metrics.get_summary() |
| gamma_data = gamma.export_metrics() |
|
|
| assert summary["total_queries"] == 1, "Should have recorded 1 query" |
| assert "health_history" in gamma_data, "Should export health history" |
| print("[OK] All Phase 5 components working together") |
|
|
| return True |
|
|
|
|
| def main(): |
| """Run all Phase 5 tests.""" |
| print("=" * 70) |
| print("PHASE 5 END-TO-END INTEGRATION TESTS") |
| print("=" * 70) |
|
|
| tests = [ |
| ("Reinforcement Config", test_reinforcement_config), |
| ("AdapterRouter Memory", test_adapter_router_with_memory), |
| ("Gamma Health Monitoring", test_gamma_health_monitoring), |
| ("Routing Metrics", test_routing_metrics), |
| ("Phase 5 Integration", test_phase5_integration), |
| ] |
|
|
| passed = 0 |
| failed = 0 |
|
|
| for test_name, test_func in tests: |
| try: |
| if test_func(): |
| passed += 1 |
| print(f"\n[PASS] {test_name} PASSED") |
| else: |
| failed += 1 |
| print(f"\n[FAIL] {test_name} FAILED") |
| except Exception as e: |
| failed += 1 |
| print(f"\n[FAIL] {test_name} ERROR: {e}") |
| import traceback |
| traceback.print_exc() |
|
|
| print("\n" + "=" * 70) |
| print(f"RESULTS: {passed} passed, {failed} failed") |
| print("=" * 70) |
|
|
| return 0 if failed == 0 else 1 |
|
|
|
|
| if __name__ == "__main__": |
| sys.exit(main()) |
|
|