Spaces:

riazmo
/

Design-System-Extractor-2

Running

App Files Files Community

Design-System-Extractor-2 / core /validation.py

riazmo

Upload 3 files

a2498f7 verified about 1 month ago

raw

history blame contribute delete

5.51 kB

	"""
	Agent Output Validation
	========================

	JSON schemas for validating LLM agent outputs.
	Ensures data integrity between pipeline stages.
	"""

	from typing import Any, Optional

	try:
	from jsonschema import validate, ValidationError

	HAS_JSONSCHEMA = True
	except ImportError:
	HAS_JSONSCHEMA = False

	from core.logging import get_logger

	logger = get_logger("validation")


	# =============================================================================
	# SCHEMAS
	# =============================================================================

	BRAND_IDENTIFICATION_SCHEMA = {
	"type": "object",
	"properties": {
	"brand_primary": {"type": ["string", "null"]},
	"brand_secondary": {"type": ["string", "null"]},
	"brand_accent": {"type": ["string", "null"]},
	"palette_strategy": {"type": "string"},
	"cohesion_score": {"type": ["number", "integer"]},
	"cohesion_notes": {"type": "string"},
	"semantic_names": {"type": "object"},
	"self_evaluation": {"type": "object"},
	},
	"required": ["brand_primary", "palette_strategy"],
	}

	BENCHMARK_ADVICE_SCHEMA = {
	"type": "object",
	"properties": {
	"recommended_benchmark": {"type": "string"},
	"recommended_benchmark_name": {"type": "string"},
	"reasoning": {"type": "string"},
	"alignment_changes": {"type": "array"},
	"pros_of_alignment": {"type": "array"},
	"cons_of_alignment": {"type": "array"},
	"alternative_benchmarks": {"type": "array"},
	"self_evaluation": {"type": "object"},
	},
	"required": ["recommended_benchmark", "reasoning"],
	}

	BEST_PRACTICES_SCHEMA = {
	"type": "object",
	"properties": {
	"overall_score": {"type": ["number", "integer"]},
	"checks": {"type": "array"},
	"priority_fixes": {"type": "array"},
	"passing_practices": {"type": "array"},
	"failing_practices": {"type": "array"},
	"self_evaluation": {"type": "object"},
	},
	"required": ["overall_score", "priority_fixes"],
	}

	HEAD_SYNTHESIS_SCHEMA = {
	"type": "object",
	"properties": {
	"executive_summary": {"type": "string"},
	"scores": {"type": "object"},
	"benchmark_fit": {"type": "object"},
	"brand_analysis": {"type": "object"},
	"top_3_actions": {"type": "array"},
	"color_recommendations": {"type": "array"},
	"type_scale_recommendation": {"type": "object"},
	"spacing_recommendation": {"type": "object"},
	"self_evaluation": {"type": "object"},
	},
	"required": ["executive_summary", "top_3_actions"],
	}

	# Map agent names to schemas
	AGENT_SCHEMAS = {
	"aurora": BRAND_IDENTIFICATION_SCHEMA,
	"brand_identifier": BRAND_IDENTIFICATION_SCHEMA,
	"atlas": BENCHMARK_ADVICE_SCHEMA,
	"benchmark_advisor": BENCHMARK_ADVICE_SCHEMA,
	"sentinel": BEST_PRACTICES_SCHEMA,
	"best_practices": BEST_PRACTICES_SCHEMA,
	"nexus": HEAD_SYNTHESIS_SCHEMA,
	"head_synthesizer": HEAD_SYNTHESIS_SCHEMA,
	}


	# =============================================================================
	# VALIDATION FUNCTIONS
	# =============================================================================

	def validate_agent_output(data: Any, agent_name: str) -> tuple[bool, Optional[str]]:
	"""
	Validate an agent's output against its expected schema.

	Args:
	data: The output data (dict or dataclass with to_dict())
	agent_name: Name of the agent (e.g., 'aurora', 'nexus')

	Returns:
	(is_valid, error_message) tuple
	"""
	agent_key = agent_name.lower().strip()
	schema = AGENT_SCHEMAS.get(agent_key)

	if not schema:
	logger.warning(f"No schema found for agent: {agent_name}")
	return True, None # No schema = pass (don't block)

	# Convert dataclass to dict if needed
	if hasattr(data, "to_dict"):
	data_dict = data.to_dict()
	elif hasattr(data, "__dataclass_fields__"):
	from dataclasses import asdict
	data_dict = asdict(data)
	elif isinstance(data, dict):
	data_dict = data
	else:
	return False, f"Cannot validate: unexpected type {type(data)}"

	if not HAS_JSONSCHEMA:
	# Fallback: manual required-field check
	return _manual_validate(data_dict, schema, agent_name)

	try:
	validate(instance=data_dict, schema=schema)
	logger.debug(f"Validation passed for {agent_name}")
	return True, None
	except ValidationError as e:
	error_msg = f"Validation failed for {agent_name}: {e.message}"
	logger.warning(error_msg)
	return False, error_msg


	def _manual_validate(data: dict, schema: dict, agent_name: str) -> tuple[bool, Optional[str]]:
	"""Fallback validation without jsonschema library."""
	required = schema.get("required", [])
	missing = [field for field in required if field not in data]

	if missing:
	error_msg = f"{agent_name} output missing required fields: {missing}"
	logger.warning(error_msg)
	return False, error_msg

	return True, None


	def validate_all_agents(outputs: dict) -> dict[str, tuple[bool, Optional[str]]]:
	"""
	Validate all agent outputs at once.

	Args:
	outputs: Dict mapping agent_name → output data

	Returns:
	Dict mapping agent_name → (is_valid, error_message)
	"""
	results = {}
	for agent_name, data in outputs.items():
	results[agent_name] = validate_agent_output(data, agent_name)
	return results