Jonathan Harrison

Full Codette codebase sync — transparency release

74f2af5 3 days ago

11.3 kB

	"""
	Cocoon Stability Field — Collapse Detection Engine
	===================================================

	FFT-based stability validation that detects synthesis loop collapse
	BEFORE corrupted output is generated.

	Based on Codette_Deep_Simulation_v1.py cocoon_stability_field() equation:
	stability = ∫\|F(k)\|² dk < ε_threshold

	Purpose: Halt debate if system enters instability zone (gamma < 0.4,
	runaway vocabulary patterns, self-referential cascades).

	Recovered from: J:\codette-training-lab\new data\Codette_Deep_Simulation_v1.py
	"""

	import numpy as np
	from typing import Dict, List, Tuple, Optional
	import logging

	logger = logging.getLogger(__name__)


	class CocoonStabilityField:
	"""
	FFT-based stability validator for debate coherence.

	Monitors frequency-domain energy distribution in agent responses.
	If energy becomes too concentrated (self-similarity, repeating patterns)
	or too diffuse (completely incoherent), flags collapse risk.
	"""

	# Stability threshold parameters (empirically calibrated)
	ENERGY_CONCENTRATION_THRESHOLD = 0.85 # Max allowed variance in top frequencies
	SELF_SIMILARITY_THRESHOLD = 0.75 # Max allowed cosine similarity between consecutive responses
	COHERENCE_FLOOR = 0.3 # Minimum coherence before stability alert
	RUNAWAY_VOCABULARY_RATIO = 0.6 # % unique words triggering concern

	def __init__(self, verbose: bool = False):
	self.verbose = verbose
	self.frequency_signatures: Dict[str, np.ndarray] = {}
	self.stability_history: List[Dict] = []

	def text_to_spectrum(self, text: str, fft_size: int = 256) -> np.ndarray:
	"""
	Convert text to frequency spectrum for FFT analysis.

	Args:
	text: Response text to analyze
	fft_size: FFT size (should be power of 2)

	Returns:
	Normalized power spectrum [0, 1]
	"""
	# Character-based encoding
	char_codes = np.array([ord(c) % 256 for c in text[:1000]], dtype=np.float32)

	# Pad to fft_size
	padded = np.zeros(fft_size, dtype=np.float32)
	padded[: len(char_codes)] = char_codes

	# Apply FFT
	fft_result = np.fft.fft(padded)
	power_spectrum = np.abs(fft_result) ** 2

	# Normalize
	max_power = np.max(power_spectrum) or 1.0
	normalized_spectrum = power_spectrum / max_power

	return normalized_spectrum[:128] # Return only positive frequencies

	def check_energy_concentration(self, spectrum: np.ndarray) -> Tuple[float, bool]:
	"""
	Check if spectral energy is too concentrated (self-similarity syndrome).

	Concentrated energy = agent repeating itself/copying other agents.

	Args:
	spectrum: Power spectrum from FFT

	Returns:
	(concentration_ratio, is_concerning)
	"""
	# Get top 10 frequencies
	top_k = 10
	top_powers = np.sort(spectrum)[-top_k:]
	top_sum = np.sum(top_powers)
	total_sum = np.sum(spectrum) or 1.0

	concentration = top_sum / total_sum
	is_concerning = concentration > self.ENERGY_CONCENTRATION_THRESHOLD

	return concentration, is_concerning

	def check_self_similarity(self, agent_name: str,
	spectrum: np.ndarray) -> Tuple[float, bool]:
	"""
	Check if agent is repeating itself (same response shape).

	Args:
	agent_name: Name of agent for history lookup
	spectrum: New response spectrum

	Returns:
	(similarity_score, is_concerning)
	"""
	if agent_name not in self.frequency_signatures:
	self.frequency_signatures[agent_name] = spectrum
	return 0.0, False

	prev_spectrum = self.frequency_signatures[agent_name]
	similarity = np.dot(prev_spectrum, spectrum) / (
	np.linalg.norm(prev_spectrum) * np.linalg.norm(spectrum) + 1e-8
	)

	self.frequency_signatures[agent_name] = spectrum # Update

	is_concerning = similarity > self.SELF_SIMILARITY_THRESHOLD
	return float(similarity), is_concerning

	def check_vocabulary_diversity(self, text: str) -> Tuple[float, bool]:
	"""
	Check if response vocabulary is repeating (indicators of "Another perspective on...").

	Args:
	text: Response text

	Returns:
	(uniqueness_ratio, is_concerning)
	"""
	if len(text) < 20:
	return 1.0, False

	words = text.lower().split()
	if len(words) == 0:
	return 1.0, False

	unique_words = len(set(words))
	uniqueness = unique_words / len(words)

	is_concerning = uniqueness < (1.0 - self.RUNAWAY_VOCABULARY_RATIO)

	return uniqueness, is_concerning

	def validate_analysis(self, agent_name: str, text: str) -> Dict:
	"""
	Full stability validation for a single agent response.

	Args:
	agent_name: Name of agent
	text: Response text

	Returns:
	{
	'agent': str,
	'is_stable': bool,
	'stability_score': float (0-1),
	'flags': List[str],
	'spectrum': np.ndarray,
	'concerns': Dict
	}
	"""
	spectrum = self.text_to_spectrum(text)

	flags = []
	concerns = {
	'energy_concentration': None,
	'self_similarity': None,
	'vocabulary_diversity': None
	}

	# Check 1: Energy concentration
	conc, conc_concerning = self.check_energy_concentration(spectrum)
	concerns['energy_concentration'] = {
	'ratio': float(conc),
	'concerning': conc_concerning
	}
	if conc_concerning:
	flags.append('HIGH_ENERGY_CONCENTRATION')

	# Check 2: Self-similarity
	similarity, sim_concerning = self.check_self_similarity(agent_name, spectrum)
	concerns['self_similarity'] = {
	'ratio': float(similarity),
	'concerning': sim_concerning
	}
	if sim_concerning:
	flags.append('REPEATING_RESPONSE_PATTERN')

	# Check 3: Vocabulary diversity
	uniqueness, vocab_concerning = self.check_vocabulary_diversity(text)
	concerns['vocabulary_diversity'] = {
	'uniqueness': float(uniqueness),
	'concerning': vocab_concerning
	}
	if vocab_concerning:
	flags.append('LOW_VOCABULARY_DIVERSITY')

	# Check 4: Response length sanity
	if len(text) < 50:
	flags.append('SUSPICIOUSLY_SHORT')
	if len(text) > 10000:
	flags.append('SUSPICIOUSLY_LONG')

	# Overall stability score
	num_flags = len(flags)
	stability_score = max(0.0, 1.0 - (num_flags * 0.25))

	is_stable = stability_score > self.COHERENCE_FLOOR

	if self.verbose and flags:
	logger.info(f" {agent_name}: stability={stability_score:.2f}, flags={flags}")

	return {
	'agent': agent_name,
	'is_stable': is_stable,
	'stability_score': stability_score,
	'flags': flags,
	'spectrum': spectrum,
	'concerns': concerns
	}

	def validate_round(self, analyses: Dict[str, str],
	round_num: int) -> Tuple[bool, List[Dict], float]:
	"""
	Validate all agents' responses in a debate round.

	Args:
	analyses: Dict mapping agent_name → response_text
	round_num: Round number (for logging)

	Returns:
	(all_stable, validation_reports, avg_stability)
	"""
	reports = []
	stability_scores = []

	for agent_name, text in analyses.items():
	report = self.validate_analysis(agent_name, text)
	reports.append(report)
	stability_scores.append(report['stability_score'])

	avg_stability = np.mean(stability_scores) if stability_scores else 0.5

	all_stable = all(r['is_stable'] for r in reports)

	unstable_agents = [r['agent'] for r in reports if not r['is_stable']]
	if unstable_agents:
	logger.warning(
	f"Round {round_num}: Unstable agents detected: {unstable_agents} "
	f"(avg_stability={avg_stability:.2f})"
	)

	# Store in history
	self.stability_history.append({
	'round': round_num,
	'all_stable': all_stable,
	'avg_stability': avg_stability,
	'unstable_agents': unstable_agents,
	'reports': reports
	})

	return all_stable, reports, avg_stability

	def should_halt_debate(self, analyses: Dict[str, str],
	round_num: int, gamma: Optional[float] = None) -> Tuple[bool, str]:
	"""
	Determine if debate should halt before synthesis.

	Halt if:
	1. Multiple agents unstable
	2. Gamma coherence < 0.35 (system collapse zone)
	3. Too many "REPEATING_RESPONSE_PATTERN" flags

	Args:
	analyses: Current round analyses
	round_num: Current round number
	gamma: Current gamma coherence (optional)

	Returns:
	(should_halt, reason)
	"""
	all_stable, reports, avg_stability = self.validate_round(analyses, round_num)

	if not all_stable:
	unstable_count = sum(1 for r in reports if not r['is_stable'])
	if unstable_count >= 2:
	reason = (
	f"Multiple agents unstable ({unstable_count}/{len(reports)}) "
	f"at round {round_num}. Avg stability: {avg_stability:.2f}"
	)
	logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
	return True, reason

	if gamma is not None and gamma < 0.35:
	reason = f"System in collapse zone (gamma={gamma:.2f} < 0.35)"
	logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
	return True, reason

	# Check for repeating response patterns (synthesis loop indicator)
	repeating_count = sum(
	1 for r in reports
	if 'REPEATING_RESPONSE_PATTERN' in r['flags']
	)
	if repeating_count >= 2:
	reason = (
	f"Multiple agents repeating response patterns ({repeating_count}) "
	f"at round {round_num}. Synthesis loop risk."
	)
	logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
	return True, reason

	return False, ""

	def get_summary(self) -> Dict:
	"""Get stability history summary."""
	if not self.stability_history:
	return {"message": "No stability checks performed"}

	return {
	"total_rounds_checked": len(self.stability_history),
	"average_stability": np.mean([h['avg_stability'] for h in self.stability_history]),
	"halts_triggered": sum(1 for h in self.stability_history if not h['all_stable']),
	"recent": self.stability_history[-3:] if len(self.stability_history) >= 3 else self.stability_history,
	}