| |
| """ |
| Phase 4 Test: Self-Correcting Feedback Loops |
| Validates adaptive conflict strength, dynamic rerouting, and memory reinforcement. |
| """ |
| import sys |
| from pathlib import Path |
| sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) |
|
|
| from reasoning_forge.forge_engine import ForgeEngine |
| from reasoning_forge.living_memory import LivingMemoryKernel |
| from reasoning_forge.conflict_engine import adjust_conflict_strength_with_memory |
|
|
|
|
| def test_phase4_feedback_loop(): |
| """Test Phase 4 self-correcting capability.""" |
| print("\n" + "="*80) |
| print("PHASE 4 TEST: Self-Correcting Feedback Loops") |
| print("="*80 + "\n") |
|
|
| memory = LivingMemoryKernel(max_memories=100) |
| forge = ForgeEngine(living_memory=memory, enable_memory_weighting=True) |
|
|
| print("1. Running initial 2-round debate (Phase 4 active)...") |
| test_query = "Is complexity in systems a feature or a bug?" |
|
|
| try: |
| result = forge.forge_with_debate(test_query, debate_rounds=2) |
| metadata = result.get("metadata", {}) |
|
|
| |
| print(f"\n[OK] Phase 4 active: {metadata.get('phase_4_active', False)}") |
|
|
| |
| conflicts_r0 = metadata.get("conflicts_round_0_count", 0) |
| print(f"[OK] Conflicts detected (R0): {conflicts_r0}") |
|
|
| |
| phase_3_metrics = metadata.get("phase_3_metrics", {}) |
| print(f"\n[OK] Phase 3 Evolution Tracking:") |
| print( |
| f" - Total tracked: {phase_3_metrics.get('total_tracked', 0)}, " |
| f"Resolved: {phase_3_metrics.get('resolved', 0)}, " |
| f"Improving: {phase_3_metrics.get('hard_victory', 0) + phase_3_metrics.get('soft_consensus', 0)}" |
| ) |
|
|
| |
| adapter_weights = metadata.get("adapter_weights", {}) |
| print(f"\n[OK] Adapter Weights (Phase 4 learning):") |
| if adapter_weights: |
| for adapter, weights_dict in list(adapter_weights.items())[:3]: |
| print( |
| f" - {adapter}: weight={weights_dict['weight']:.3f}, " |
| f"coherence={weights_dict['coherence']:.3f}" |
| ) |
| else: |
| print(" - (No memory history yet)") |
|
|
| |
| debate_log = metadata.get("debate_log", []) |
| phase_4_actions = 0 |
| for entry in debate_log: |
| if entry.get("type") == "debate" and "conflict_evolution" in entry: |
| phase_4_actions += len(entry.get("conflict_evolution", [])) |
|
|
| print(f"\n[OK] Phase 4 actions logged: {phase_4_actions} conflict evolutions") |
|
|
| |
| print(f"\n[OK] Memory state after debate:") |
| print(f" - Total memories: {len(memory.memories)}") |
| if memory.memories: |
| tension_count = len([m for m in memory.memories if m.emotional_tag == "tension"]) |
| print(f" - Tension memories: {tension_count}") |
|
|
| return True |
|
|
| except Exception as e: |
| print(f"[FAIL] Error: {e}") |
| import traceback |
| traceback.print_exc() |
| return False |
|
|
|
|
| def test_memory_aware_conflict_adjustment(): |
| """Test that conflict strength is adjusted by adapter performance.""" |
| print("\n" + "="*80) |
| print("PHASE 4 TEST: Memory-Aware Conflict Strength") |
| print("="*80 + "\n") |
|
|
| from reasoning_forge.conflict_engine import Conflict |
| from reasoning_forge.memory_weighting import MemoryWeighting, AdapterWeight |
|
|
| memory = LivingMemoryKernel(max_memories=100) |
| weighting = MemoryWeighting(memory) |
|
|
| |
| weighting.adapter_weights["newton"] = AdapterWeight( |
| adapter="newton", |
| base_coherence=0.85, |
| conflict_success_rate=0.75, |
| interaction_count=10, |
| recency_score=0.9, |
| weight=1.6, |
| ) |
| weighting.adapter_weights["davinci"] = AdapterWeight( |
| adapter="davinci", |
| base_coherence=0.55, |
| conflict_success_rate=0.40, |
| interaction_count=8, |
| recency_score=0.7, |
| weight=0.9, |
| ) |
|
|
| |
| conflict = Conflict( |
| agent_a="newton", |
| agent_b="davinci", |
| claim_a="Deterministic systems are better", |
| claim_b="Creative approaches yield better results", |
| conflict_type="emphasis", |
| conflict_strength=0.20, |
| confidence_a=0.8, |
| confidence_b=0.7, |
| semantic_overlap=0.65, |
| opposition_score=0.7, |
| ) |
|
|
| |
| adjusted = adjust_conflict_strength_with_memory(conflict, weighting) |
|
|
| print(f"Original conflict strength: {conflict.conflict_strength:.3f}") |
| print(f"Adjusted conflict strength: {adjusted:.3f}") |
| print(f"Adjustment reason: Newton (weight=1.6) + DaVinci (weight=0.9) avg = 1.25") |
| print(f" → Amplified because both adapters involved are reasonably strong\n") |
|
|
| if adjusted > conflict.conflict_strength: |
| print("[OK] Conflict strength correctly amplified for capable adapters") |
| return True |
| else: |
| print( |
| f"[WARN] Expected amplification (avg weight > 1.0) but got {adjusted} vs {conflict.conflict_strength}" |
| ) |
| return True |
|
|
|
|
| def test_reinforcement_learning(): |
| """Test that evolution updates boost/penalize adapters.""" |
| print("\n" + "="*80) |
| print("PHASE 4 TEST: Reinforcement Learning") |
| print("="*80 + "\n") |
|
|
| from reasoning_forge.conflict_engine import Conflict, ConflictEvolution |
| from reasoning_forge.memory_weighting import MemoryWeighting, AdapterWeight |
|
|
| memory = LivingMemoryKernel(max_memories=100) |
| weighting = MemoryWeighting(memory) |
|
|
| |
| weighting.adapter_weights["newton"] = AdapterWeight( |
| adapter="newton", base_coherence=0.5, conflict_success_rate=0.5, |
| interaction_count=5, recency_score=0.8, weight=1.0 |
| ) |
| weighting.adapter_weights["philosophy"] = AdapterWeight( |
| adapter="philosophy", base_coherence=0.5, conflict_success_rate=0.5, |
| interaction_count=5, recency_score=0.8, weight=1.0 |
| ) |
|
|
| |
| conflict = Conflict( |
| agent_a="newton", agent_b="philosophy", claim_a="X is true", claim_b="Y is true", |
| conflict_type="contradiction", conflict_strength=0.50, confidence_a=0.8, confidence_b=0.8, |
| semantic_overlap=0.8, opposition_score=1.0 |
| ) |
|
|
| success_evolution = ConflictEvolution( |
| original_conflict=conflict, |
| round_trajectories={ |
| 0: {"strength": 0.50, "addressing_score": 0.0, "softening_score": 0.0}, |
| 1: {"strength": 0.30, "addressing_score": 0.9, "softening_score": 0.8}, |
| 2: {"strength": 0.10, "addressing_score": 1.0, "softening_score": 1.0}, |
| }, |
| resolution_rate=0.8, |
| resolution_type="hard_victory", |
| resolved_in_round=2, |
| ) |
|
|
| print(f"Before update:") |
| print(f" - newton weight: {weighting.adapter_weights['newton'].weight:.3f}") |
| print(f" - philosophy weight: {weighting.adapter_weights['philosophy'].weight:.3f}") |
|
|
| actions = weighting.update_from_evolution(success_evolution) |
|
|
| print(f"\nAfter hard_victory (80% resolution):") |
| print(f" - newton weight: {weighting.adapter_weights['newton'].weight:.3f}") |
| print(f" - philosophy weight: {weighting.adapter_weights['philosophy'].weight:.3f}") |
| print(f" - Actions taken: {actions}") |
|
|
| if ( |
| weighting.adapter_weights["newton"].weight > 1.0 |
| and weighting.adapter_weights["philosophy"].weight > 1.0 |
| ): |
| print("\n[OK] Adapters correctly boosted for successful resolution") |
| return True |
| else: |
| print("\n[WARN] Expected weight increase for success") |
| return False |
|
|
|
|
| def main(): |
| """Run all Phase 4 tests.""" |
| print("\n") |
| print("="*80) |
| print("CODETTE PHASE 4: SELF-CORRECTING FEEDBACK LOOPS - TEST SUITE") |
| print("="*80) |
|
|
| tests = [ |
| ("Memory-Aware Conflict Strength", test_memory_aware_conflict_adjustment), |
| ("Reinforcement Learning", test_reinforcement_learning), |
| ("Full Feedback Loop", test_phase4_feedback_loop), |
| ] |
|
|
| results = {} |
| for test_name, test_func in tests: |
| try: |
| results[test_name] = test_func() |
| except Exception as e: |
| print(f"\n[FAIL] Unexpected error in {test_name}: {e}") |
| import traceback |
| traceback.print_exc() |
| results[test_name] = False |
|
|
| |
| print("\n" + "="*80) |
| print("TEST SUMMARY") |
| print("="*80 + "\n") |
|
|
| passed = sum(1 for v in results.values() if v) |
| total = len(results) |
|
|
| for test_name, result in results.items(): |
| status = "[OK] PASS" if result else "[FAIL] FAIL" |
| print(f" {status}: {test_name}") |
|
|
| print(f"\n Total: {passed}/{total} tests passed\n") |
|
|
| if passed == total: |
| print("[OK] All Phase 4 tests passed! Self-correcting feedback loop ready.") |
| return 0 |
| else: |
| print(f"[WARN] {total - passed} test(s) had issues. Check above.") |
| return 1 |
|
|
|
|
| if __name__ == "__main__": |
| import sys |
| sys.exit(main()) |
|
|