Codette-Reasoning / tests /test_agent_llm_integration.py

Jonathan Harrison

Full Codette codebase sync — transparency release

74f2af5 2 days ago

3.35 kB

	#!/usr/bin/env python3
	"""Quick test to verify agents are using real LLM inference via adapters."""

	import sys
	from pathlib import Path

	# Setup paths
	sys.path.insert(0, str(Path(__file__).resolve().parent.parent / 'reasoning_forge'))
	sys.path.insert(0, str(Path(__file__).resolve().parent.parent / 'inference'))

	print("=" * 80)
	print("AGENT LLM INTEGRATION TEST")
	print("=" * 80)

	# Test 1: Check if ForgeEngine can load with orchestrator
	print("\n[1/4] Loading ForgeEngine with orchestrator...")
	try:
	from reasoning_forge.forge_engine import ForgeEngine
	forge = ForgeEngine(living_memory=None, enable_memory_weighting=False)
	print(" ✓ ForgeEngine loaded")

	# Check if any agent has an orchestrator
	has_orchestrator = any(agent.orchestrator is not None for agent in forge.analysis_agents)
	print(f" ✓ Agents have orchestrator: {has_orchestrator}")

	if has_orchestrator:
	orch = forge.newton.orchestrator
	print(f" ✓ Available adapters: {orch.available_adapters}")
	except Exception as e:
	print(f" ✗ Error: {e}")
	import traceback
	traceback.print_exc()
	sys.exit(1)

	# Test 2: Try single agent analysis with LLM
	print("\n[2/4] Testing Newton agent with LLM...")
	try:
	concept = "gravity"
	print(f" Analyzing: '{concept}'")

	response = forge.newton.analyze(concept)

	# Check if response is real (not template substitution)
	is_real = len(response) > 100 and "gravity" in response.lower()
	is_template = "{concept}" in response

	print(f" Response length: {len(response)} chars")
	print(f" Is template-based: {is_template}")
	print(f" Contains concept: {'gravity' in response.lower()}")
	print(f" First 200 chars: {response[:200]}...")

	except Exception as e:
	print(f" ✗ Error: {e}")
	import traceback
	traceback.print_exc()

	# Test 3: Try multi-agent analysis
	print("\n[3/4] Testing multi-agent ensemble...")
	try:
	concept = "evolution"
	print(f" Analyzing: '{concept}'")

	analyses = {}
	for agent in forge.analysis_agents[:3]: # Just test first 3
	print(f" {agent.name}...", end=" ", flush=True)
	response = agent.analyze(concept)
	analyses[agent.name] = response
	print(f"({len(response)} chars)")

	print(f" ✓ Collected {len(analyses)} analyses")

	except Exception as e:
	print(f" ✗ Error: {e}")
	import traceback
	traceback.print_exc()

	# Test 4: Try debate mode (forge_with_debate)
	print("\n[4/4] Testing debate mode with real agents...")
	try:
	query = "What is the relationship between consciousness and computation?"
	print(f" Query: '{query}'")
	print(f" Running debate (this takes a minute or two)...")

	result = forge.forge_with_debate(query)

	synthesis = ""
	if "messages" in result and len(result["messages"]) >= 3:
	synthesis = result["messages"][2].get("content", "")

	print(f" ✓ Debate completed")
	print(f" Synthesis length: {len(synthesis)} chars")

	if len(synthesis) > 0:
	print(f" First 300 chars: {synthesis[:300]}...")
	else:
	print(f" ✗ No synthesis generated")

	except Exception as e:
	print(f" ✗ Error: {e}")
	import traceback
	traceback.print_exc()

	print("\n" + "=" * 80)
	print("SUMMARY: Agents are now wired to use real LLM inference via adapters!")
	print("=" * 80)