| """ |
| Cocoon Stability Field — Collapse Detection Engine |
| =================================================== |
| |
| FFT-based stability validation that detects synthesis loop collapse |
| BEFORE corrupted output is generated. |
| |
| Based on Codette_Deep_Simulation_v1.py cocoon_stability_field() equation: |
| stability = ∫|F(k)|² dk < ε_threshold |
| |
| Purpose: Halt debate if system enters instability zone (gamma < 0.4, |
| runaway vocabulary patterns, self-referential cascades). |
| |
| Recovered from: J:\codette-training-lab\new data\Codette_Deep_Simulation_v1.py |
| """ |
|
|
| import numpy as np |
| from typing import Dict, List, Tuple, Optional |
| import logging |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| class CocoonStabilityField: |
| """ |
| FFT-based stability validator for debate coherence. |
| |
| Monitors frequency-domain energy distribution in agent responses. |
| If energy becomes too concentrated (self-similarity, repeating patterns) |
| or too diffuse (completely incoherent), flags collapse risk. |
| """ |
|
|
| |
| ENERGY_CONCENTRATION_THRESHOLD = 0.85 |
| SELF_SIMILARITY_THRESHOLD = 0.75 |
| COHERENCE_FLOOR = 0.3 |
| RUNAWAY_VOCABULARY_RATIO = 0.6 |
|
|
| def __init__(self, verbose: bool = False): |
| self.verbose = verbose |
| self.frequency_signatures: Dict[str, np.ndarray] = {} |
| self.stability_history: List[Dict] = [] |
|
|
| def text_to_spectrum(self, text: str, fft_size: int = 256) -> np.ndarray: |
| """ |
| Convert text to frequency spectrum for FFT analysis. |
| |
| Args: |
| text: Response text to analyze |
| fft_size: FFT size (should be power of 2) |
| |
| Returns: |
| Normalized power spectrum [0, 1] |
| """ |
| |
| char_codes = np.array([ord(c) % 256 for c in text[:1000]], dtype=np.float32) |
|
|
| |
| padded = np.zeros(fft_size, dtype=np.float32) |
| padded[: len(char_codes)] = char_codes |
|
|
| |
| fft_result = np.fft.fft(padded) |
| power_spectrum = np.abs(fft_result) ** 2 |
|
|
| |
| max_power = np.max(power_spectrum) or 1.0 |
| normalized_spectrum = power_spectrum / max_power |
|
|
| return normalized_spectrum[:128] |
|
|
| def check_energy_concentration(self, spectrum: np.ndarray) -> Tuple[float, bool]: |
| """ |
| Check if spectral energy is too concentrated (self-similarity syndrome). |
| |
| Concentrated energy = agent repeating itself/copying other agents. |
| |
| Args: |
| spectrum: Power spectrum from FFT |
| |
| Returns: |
| (concentration_ratio, is_concerning) |
| """ |
| |
| top_k = 10 |
| top_powers = np.sort(spectrum)[-top_k:] |
| top_sum = np.sum(top_powers) |
| total_sum = np.sum(spectrum) or 1.0 |
|
|
| concentration = top_sum / total_sum |
| is_concerning = concentration > self.ENERGY_CONCENTRATION_THRESHOLD |
|
|
| return concentration, is_concerning |
|
|
| def check_self_similarity(self, agent_name: str, |
| spectrum: np.ndarray) -> Tuple[float, bool]: |
| """ |
| Check if agent is repeating itself (same response shape). |
| |
| Args: |
| agent_name: Name of agent for history lookup |
| spectrum: New response spectrum |
| |
| Returns: |
| (similarity_score, is_concerning) |
| """ |
| if agent_name not in self.frequency_signatures: |
| self.frequency_signatures[agent_name] = spectrum |
| return 0.0, False |
|
|
| prev_spectrum = self.frequency_signatures[agent_name] |
| similarity = np.dot(prev_spectrum, spectrum) / ( |
| np.linalg.norm(prev_spectrum) * np.linalg.norm(spectrum) + 1e-8 |
| ) |
|
|
| self.frequency_signatures[agent_name] = spectrum |
|
|
| is_concerning = similarity > self.SELF_SIMILARITY_THRESHOLD |
| return float(similarity), is_concerning |
|
|
| def check_vocabulary_diversity(self, text: str) -> Tuple[float, bool]: |
| """ |
| Check if response vocabulary is repeating (indicators of "Another perspective on..."). |
| |
| Args: |
| text: Response text |
| |
| Returns: |
| (uniqueness_ratio, is_concerning) |
| """ |
| if len(text) < 20: |
| return 1.0, False |
|
|
| words = text.lower().split() |
| if len(words) == 0: |
| return 1.0, False |
|
|
| unique_words = len(set(words)) |
| uniqueness = unique_words / len(words) |
|
|
| is_concerning = uniqueness < (1.0 - self.RUNAWAY_VOCABULARY_RATIO) |
|
|
| return uniqueness, is_concerning |
|
|
| def validate_analysis(self, agent_name: str, text: str) -> Dict: |
| """ |
| Full stability validation for a single agent response. |
| |
| Args: |
| agent_name: Name of agent |
| text: Response text |
| |
| Returns: |
| { |
| 'agent': str, |
| 'is_stable': bool, |
| 'stability_score': float (0-1), |
| 'flags': List[str], |
| 'spectrum': np.ndarray, |
| 'concerns': Dict |
| } |
| """ |
| spectrum = self.text_to_spectrum(text) |
|
|
| flags = [] |
| concerns = { |
| 'energy_concentration': None, |
| 'self_similarity': None, |
| 'vocabulary_diversity': None |
| } |
|
|
| |
| conc, conc_concerning = self.check_energy_concentration(spectrum) |
| concerns['energy_concentration'] = { |
| 'ratio': float(conc), |
| 'concerning': conc_concerning |
| } |
| if conc_concerning: |
| flags.append('HIGH_ENERGY_CONCENTRATION') |
|
|
| |
| similarity, sim_concerning = self.check_self_similarity(agent_name, spectrum) |
| concerns['self_similarity'] = { |
| 'ratio': float(similarity), |
| 'concerning': sim_concerning |
| } |
| if sim_concerning: |
| flags.append('REPEATING_RESPONSE_PATTERN') |
|
|
| |
| uniqueness, vocab_concerning = self.check_vocabulary_diversity(text) |
| concerns['vocabulary_diversity'] = { |
| 'uniqueness': float(uniqueness), |
| 'concerning': vocab_concerning |
| } |
| if vocab_concerning: |
| flags.append('LOW_VOCABULARY_DIVERSITY') |
|
|
| |
| if len(text) < 50: |
| flags.append('SUSPICIOUSLY_SHORT') |
| if len(text) > 10000: |
| flags.append('SUSPICIOUSLY_LONG') |
|
|
| |
| num_flags = len(flags) |
| stability_score = max(0.0, 1.0 - (num_flags * 0.25)) |
|
|
| is_stable = stability_score > self.COHERENCE_FLOOR |
|
|
| if self.verbose and flags: |
| logger.info(f" {agent_name}: stability={stability_score:.2f}, flags={flags}") |
|
|
| return { |
| 'agent': agent_name, |
| 'is_stable': is_stable, |
| 'stability_score': stability_score, |
| 'flags': flags, |
| 'spectrum': spectrum, |
| 'concerns': concerns |
| } |
|
|
| def validate_round(self, analyses: Dict[str, str], |
| round_num: int) -> Tuple[bool, List[Dict], float]: |
| """ |
| Validate all agents' responses in a debate round. |
| |
| Args: |
| analyses: Dict mapping agent_name → response_text |
| round_num: Round number (for logging) |
| |
| Returns: |
| (all_stable, validation_reports, avg_stability) |
| """ |
| reports = [] |
| stability_scores = [] |
|
|
| for agent_name, text in analyses.items(): |
| report = self.validate_analysis(agent_name, text) |
| reports.append(report) |
| stability_scores.append(report['stability_score']) |
|
|
| avg_stability = np.mean(stability_scores) if stability_scores else 0.5 |
|
|
| all_stable = all(r['is_stable'] for r in reports) |
|
|
| unstable_agents = [r['agent'] for r in reports if not r['is_stable']] |
| if unstable_agents: |
| logger.warning( |
| f"Round {round_num}: Unstable agents detected: {unstable_agents} " |
| f"(avg_stability={avg_stability:.2f})" |
| ) |
|
|
| |
| self.stability_history.append({ |
| 'round': round_num, |
| 'all_stable': all_stable, |
| 'avg_stability': avg_stability, |
| 'unstable_agents': unstable_agents, |
| 'reports': reports |
| }) |
|
|
| return all_stable, reports, avg_stability |
|
|
| def should_halt_debate(self, analyses: Dict[str, str], |
| round_num: int, gamma: Optional[float] = None) -> Tuple[bool, str]: |
| """ |
| Determine if debate should halt before synthesis. |
| |
| Halt if: |
| 1. Multiple agents unstable |
| 2. Gamma coherence < 0.35 (system collapse zone) |
| 3. Too many "REPEATING_RESPONSE_PATTERN" flags |
| |
| Args: |
| analyses: Current round analyses |
| round_num: Current round number |
| gamma: Current gamma coherence (optional) |
| |
| Returns: |
| (should_halt, reason) |
| """ |
| all_stable, reports, avg_stability = self.validate_round(analyses, round_num) |
|
|
| if not all_stable: |
| unstable_count = sum(1 for r in reports if not r['is_stable']) |
| if unstable_count >= 2: |
| reason = ( |
| f"Multiple agents unstable ({unstable_count}/{len(reports)}) " |
| f"at round {round_num}. Avg stability: {avg_stability:.2f}" |
| ) |
| logger.warning(f"STABILITY CHECK: Halting debate. {reason}") |
| return True, reason |
|
|
| if gamma is not None and gamma < 0.35: |
| reason = f"System in collapse zone (gamma={gamma:.2f} < 0.35)" |
| logger.warning(f"STABILITY CHECK: Halting debate. {reason}") |
| return True, reason |
|
|
| |
| repeating_count = sum( |
| 1 for r in reports |
| if 'REPEATING_RESPONSE_PATTERN' in r['flags'] |
| ) |
| if repeating_count >= 2: |
| reason = ( |
| f"Multiple agents repeating response patterns ({repeating_count}) " |
| f"at round {round_num}. Synthesis loop risk." |
| ) |
| logger.warning(f"STABILITY CHECK: Halting debate. {reason}") |
| return True, reason |
|
|
| return False, "" |
|
|
| def get_summary(self) -> Dict: |
| """Get stability history summary.""" |
| if not self.stability_history: |
| return {"message": "No stability checks performed"} |
|
|
| return { |
| "total_rounds_checked": len(self.stability_history), |
| "average_stability": np.mean([h['avg_stability'] for h in self.stability_history]), |
| "halts_triggered": sum(1 for h in self.stability_history if not h['all_stable']), |
| "recent": self.stability_history[-3:] if len(self.stability_history) >= 3 else self.stability_history, |
| } |
|
|