File size: 4,150 Bytes
74f2af5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | #!/usr/bin/env python3
"""
Phase 3 End-to-End Test: Multi-Round Conflict Evolution Tracking
Quick validate that conflicts are tracked across multiple debate rounds.
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from reasoning_forge.forge_engine import ForgeEngine
from reasoning_forge.living_memory import LivingMemoryKernel
def test_phase3_multi_round():
"""Test forge_with_debate with multi-round conflict tracking."""
print("\n" + "="*80)
print("PHASE 3 TEST: Multi-Round Conflict Evolution Tracking")
print("="*80 + "\n")
# Create memory kernel
memory = LivingMemoryKernel(max_memories=100)
# Initialize forge with living memory
forge = ForgeEngine(living_memory=memory, enable_memory_weighting=True)
# Run a multi-round debate
test_query = "Should algorithms prioritize speed or interpretability?"
print(f"Running 3-round debate on: {test_query}\n")
try:
result = forge.forge_with_debate(test_query, debate_rounds=3)
metadata = result.get("metadata", {})
# Phase 1 metrics
conflicts_r0 = metadata.get("conflicts_round_0_count", 0)
print(f"[OK] Round 0 conflicts detected: {conflicts_r0}\n")
# Phase 3 metrics
phase3_metrics = metadata.get("phase_3_metrics", {})
print(f"Phase 3 Evolution Tracking:")
print(f" - Total conflicts tracked: {phase3_metrics.get('total_tracked', 0)}")
print(f" - Resolved: {phase3_metrics.get('resolved', 0)}")
print(f" - Hard victory: {phase3_metrics.get('hard_victory', 0)}")
print(f" - Soft consensus: {phase3_metrics.get('soft_consensus', 0)}")
print(f" - Stalled: {phase3_metrics.get('stalled', 0)}")
print(f" - Worsened: {phase3_metrics.get('worsened', 0)}")
print(f" - Avg resolution rate: {phase3_metrics.get('avg_resolution_rate', 0):.1%}\n")
# Show evolution trajectories for top conflicts
evolutions = metadata.get("evolution_data", [])
if evolutions:
print(f"Sample conflict evolution trajectories:")
for i, evolution in enumerate(evolutions[:3], 1):
print(f"\n {i}. {evolution['agents']}:")
print(f" - Type: {evolution['resolution_type']}")
print(f" - Resolution rate: {evolution['resolution_rate']:.1%}")
trajectory = evolution['trajectory']
for j, round_data in enumerate(trajectory):
strength = round_data.get('strength', 0)
addressing = round_data.get('addressing_score', 0)
print(f" - Round {j}: strength={strength:.3f}, addressing={addressing:.1%}")
# Check debate log for evolution data
debate_log = metadata.get("debate_log", [])
print(f"\nDebate log: {len(debate_log)} entries (Rounds 0-{len(debate_log)-1})")
for i, entry in enumerate(debate_log):
if entry.get("type") == "debate":
evolution_count = len(entry.get("conflict_evolution", []))
print(f" - Round {i}: {evolution_count} conflicts evolved")
print(f"\n[OK] Phase 3 multi-round tracking successful!")
return True
except Exception as e:
print(f"[FAIL] Error: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Run Phase 3 test."""
print("\n")
print("="*80)
print("CODETTE PHASE 3: MULTI-ROUND CONFLICT EVOLUTION - TEST")
print("="*80)
try:
result = test_phase3_multi_round()
print("\n" + "="*80)
print("TEST SUMMARY")
print("="*80 + "\n")
if result:
print("[OK] Phase 3 test passed! Multi-round tracking is working.")
return 0
else:
print("[FAIL] Phase 3 test failed. Check errors above.")
return 1
except Exception as e:
print(f"\n[FAIL] Unexpected error: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
import sys
sys.exit(main())
|