File size: 7,712 Bytes
74f2af5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | #!/usr/bin/env python3
"""
Phase 1 Validation Script
Quick test to verify conflict detection is working.
"""
import sys
import json
from pathlib import Path
# Add project to path
project_root = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(project_root))
def test_imports():
"""Test that all Phase 1 modules can be imported."""
print("\n" + "="*80)
print("PHASE 1 VALIDATION: IMPORT TEST")
print("="*80 + "\n")
try:
print("Importing TokenConfidenceEngine...")
from reasoning_forge.token_confidence import TokenConfidenceEngine
print(" β TokenConfidenceEngine imported")
print("Importing ConflictEngine...")
from reasoning_forge.conflict_engine import ConflictEngine
print(" β ConflictEngine imported")
print("Importing ForgeEngine...")
from reasoning_forge.forge_engine import ForgeEngine
print(" β ForgeEngine imported")
print("Importing ConflictTestRunner...")
from evaluation.conflict_tests import ConflictTestRunner
print(" β ConflictTestRunner imported")
return True
except Exception as e:
print(f"\nβ IMPORT FAILED: {e}")
import traceback
traceback.print_exc()
return False
def test_token_confidence_engine():
"""Test TokenConfidenceEngine basic functionality."""
print("\n" + "="*80)
print("PHASE 1 VALIDATION: TOKEN CONFIDENCE ENGINE")
print("="*80 + "\n")
try:
from reasoning_forge.token_confidence import TokenConfidenceEngine
engine = TokenConfidenceEngine()
print("β TokenConfidenceEngine initialized")
# Test semantic marker parsing
test_response = (
"I'm confident that this approach will work. However, it's possible that we'll "
"encounter issues. The data clearly shows a trend towards improvement."
)
peer_responses = {
"peer1": "This approach might be problematic in some cases.",
"peer2": "I argue that this is fundamentally sound.",
}
scores = engine.score_tokens(test_response, "agent1", peer_responses)
print(f"β Token confidence scoring completed")
print(f" - Claims extracted: {len(scores.claims)}")
print(f" - Token scores: {len(scores.token_scores)} tokens")
print(f" - Mean confidence: {sum(scores.token_scores) / max(len(scores.token_scores), 1):.3f}")
return True
except Exception as e:
print(f"\nβ TOKEN CONFIDENCE TEST FAILED: {e}")
import traceback
traceback.print_exc()
return False
def test_conflict_engine():
"""Test ConflictEngine basic functionality."""
print("\n" + "="*80)
print("PHASE 1 VALIDATION: CONFLICT ENGINE")
print("="*80 + "\n")
try:
from reasoning_forge.token_confidence import TokenConfidenceEngine
from reasoning_forge.conflict_engine import ConflictEngine
token_conf = TokenConfidenceEngine()
conflict_engine = ConflictEngine(token_confidence_engine=token_conf)
print("β ConflictEngine initialized")
# Test conflict detection with synthetic responses
agent_analyses = {
"agent_a": "The algorithm must be deterministic for maximum control. "
"This ensures predictability and reliability in all cases.",
"agent_b": "A probabilistic approach is superior because it captures the "
"inherent uncertainty in real-world systems. Determinism is rigid.",
}
conflicts = conflict_engine.detect_conflicts(agent_analyses)
print(f"β Conflict detection completed")
print(f" - Conflicts detected: {len(conflicts)}")
if conflicts:
top_conflict = conflicts[0]
print(f"\n Top conflict:")
print(f" - Type: {top_conflict.conflict_type}")
print(f" - Strength: {top_conflict.conflict_strength:.3f}")
print(f" - Agent A claim: {top_conflict.claim_a[:60]}...")
print(f" - Agent B claim: {top_conflict.claim_b[:60]}...")
print(f" - Overlap: {top_conflict.semantic_overlap:.3f}")
return True
except Exception as e:
print(f"\nβ CONFLICT ENGINE TEST FAILED: {e}")
import traceback
traceback.print_exc()
return False
def test_forge_integration():
"""Test that ForgeEngine initializes with conflict detection."""
print("\n" + "="*80)
print("PHASE 1 VALIDATION: FORGE ENGINE INTEGRATION")
print("="*80 + "\n")
try:
from reasoning_forge.forge_engine import ForgeEngine
print("Initializing ForgeEngine with conflict detection...")
forge = ForgeEngine()
print("β ForgeEngine initialized")
# Check that conflict engines are attached
if not hasattr(forge, 'token_confidence'):
raise AttributeError("ForgeEngine missing token_confidence engine")
print("β TokenConfidenceEngine attached to ForgeEngine")
if not hasattr(forge, 'conflict_engine'):
raise AttributeError("ForgeEngine missing conflict_engine")
print("β ConflictEngine attached to ForgeEngine")
# Test a simple debate (this will be slow without GPU)
print("\nTesting forge_with_debate() on a simple concept...")
print(" (This may take a moment without GPU acceleration)")
result = forge.forge_with_debate("Should an algorithm prioritize speed or clarity?", debate_rounds=1)
metadata = result.get("metadata", {})
print("β forge_with_debate() completed successfully")
# Check Phase 1 metrics
round_0_conflicts = metadata.get("conflicts_round_0_count", 0)
print(f" - Conflicts detected (R0): {round_0_conflicts}")
if "debate_log" in metadata:
print(f" - Debate rounds logged: {len(metadata['debate_log'])}")
if "ensemble_coherence" in metadata:
print(f" - Ensemble coherence: {metadata['ensemble_coherence']:.3f}")
return True
except Exception as e:
print(f"\nβ FORGE INTEGRATION TEST FAILED: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Run all validation tests."""
print("\n")
print("=" * 80)
print("CODETTE PHASE 1: CONFLICT DETECTION - VALIDATION SUITE")
print("=" * 80)
tests = [
("Imports", test_imports),
("Token Confidence Engine", test_token_confidence_engine),
("Conflict Engine", test_conflict_engine),
("Forge Integration", test_forge_integration),
]
results = {}
for test_name, test_func in tests:
try:
results[test_name] = test_func()
except KeyboardInterrupt:
print("\n\nβ Tests interrupted by user")
return 1
except Exception as e:
print(f"\nβ Unexpected error in {test_name}: {e}")
results[test_name] = False
# Summary
print("\n" + "="*80)
print("VALIDATION SUMMARY")
print("="*80 + "\n")
passed = sum(1 for v in results.values() if v)
total = len(results)
for test_name, result in results.items():
status = "β PASS" if result else "β FAIL"
print(f" {status}: {test_name}")
print(f"\n Total: {passed}/{total} tests passed\n")
if passed == total:
print("β All Phase 1 validations passed! Ready for testing.")
return 0
else:
print(f"β {total - passed} validation(s) failed. Check errors above.")
return 1
if __name__ == "__main__":
sys.exit(main())
|