| """Latent biological and technical state β hidden from the agent."""
|
|
|
| from __future__ import annotations
|
|
|
| from typing import Any, Dict, List, Optional
|
|
|
| from pydantic import BaseModel, Field
|
|
|
|
|
| class CellPopulation(BaseModel):
|
| """Ground-truth cell sub-population in the simulated tissue."""
|
|
|
| name: str
|
| proportion: float = Field(ge=0.0, le=1.0)
|
| marker_genes: List[str] = Field(default_factory=list)
|
| state: str = "quiescent"
|
| condition_response: Dict[str, float] = Field(default_factory=dict)
|
|
|
|
|
| class GeneProgram(BaseModel):
|
| """A latent gene-regulatory programme."""
|
|
|
| name: str
|
| genes: List[str] = Field(default_factory=list)
|
| activity_level: float = Field(0.5, ge=0.0, le=1.0)
|
| condition_dependent: bool = False
|
| conditions_active: List[str] = Field(default_factory=list)
|
|
|
|
|
| class LatentBiologicalState(BaseModel):
|
| """Hidden ground-truth biology the agent cannot directly observe."""
|
|
|
| cell_populations: List[CellPopulation] = Field(default_factory=list)
|
| true_de_genes: Dict[str, Dict[str, float]] = Field(
|
| default_factory=dict,
|
| description="comparison_key β {gene: log2FC}",
|
| )
|
| true_pathways: Dict[str, float] = Field(
|
| default_factory=dict,
|
| description="pathway β activity level",
|
| )
|
| gene_programs: List[GeneProgram] = Field(default_factory=list)
|
| true_trajectory: Optional[Dict[str, Any]] = None
|
| true_regulatory_network: Dict[str, List[str]] = Field(
|
| default_factory=dict,
|
| description="TF β target genes",
|
| )
|
| perturbation_effects: Dict[str, Dict[str, float]] = Field(
|
| default_factory=dict,
|
| description="perturbation β {gene: effect_size}",
|
| )
|
| confounders: Dict[str, float] = Field(default_factory=dict)
|
| true_markers: List[str] = Field(default_factory=list)
|
| causal_mechanisms: List[str] = Field(default_factory=list)
|
| n_true_cells: int = 10_000
|
|
|
|
|
| class TechnicalState(BaseModel):
|
| """Hidden technical parameters that shape experimental noise."""
|
|
|
| batch_effects: Dict[str, float] = Field(default_factory=dict)
|
| ambient_rna_fraction: float = 0.05
|
| doublet_rate: float = 0.04
|
| dropout_rate: float = 0.1
|
| sample_quality: float = Field(0.9, ge=0.0, le=1.0)
|
| library_complexity: float = Field(0.8, ge=0.0, le=1.0)
|
| sequencing_depth_factor: float = 1.0
|
| capture_efficiency: float = 0.6
|
|
|
|
|
| class ExperimentProgress(BaseModel):
|
| """Flags tracking which experiment stages have been completed."""
|
|
|
| samples_collected: bool = False
|
| cohort_selected: bool = False
|
| cells_cultured: bool = False
|
| library_prepared: bool = False
|
| perturbation_applied: bool = False
|
| cells_sequenced: bool = False
|
| qc_performed: bool = False
|
| data_filtered: bool = False
|
| data_normalized: bool = False
|
| batches_integrated: bool = False
|
| cells_clustered: bool = False
|
| de_performed: bool = False
|
| trajectories_inferred: bool = False
|
| pathways_analyzed: bool = False
|
| networks_inferred: bool = False
|
| markers_discovered: bool = False
|
| markers_validated: bool = False
|
| followup_designed: bool = False
|
| subagent_review_requested: bool = False
|
| conclusion_reached: bool = False
|
|
|
| n_cells_sequenced: Optional[int] = None
|
| n_cells_after_filter: Optional[int] = None
|
| n_clusters_found: Optional[int] = None
|
| n_de_genes_found: Optional[int] = None
|
| n_markers_found: Optional[int] = None
|
|
|
|
|
| class ResourceState(BaseModel):
|
| """Full internal resource tracking (superset of agent-visible ResourceUsage)."""
|
|
|
| budget_total: float = 100_000.0
|
| budget_used: float = 0.0
|
| time_limit_days: float = 180.0
|
| time_used_days: float = 0.0
|
| samples_available: int = 0
|
| samples_consumed: int = 0
|
| compute_hours_used: float = 0.0
|
| sequencing_lanes_used: int = 0
|
| reagent_kits_used: int = 0
|
|
|
| @property
|
| def budget_remaining(self) -> float:
|
| return max(0.0, self.budget_total - self.budget_used)
|
|
|
| @property
|
| def time_remaining_days(self) -> float:
|
| return max(0.0, self.time_limit_days - self.time_used_days)
|
|
|
| @property
|
| def budget_exhausted(self) -> bool:
|
| return self.budget_remaining <= 0
|
|
|
| @property
|
| def time_exhausted(self) -> bool:
|
| return self.time_remaining_days <= 0
|
|
|
|
|
| class FullLatentState(BaseModel):
|
| """Complete hidden state of the simulated biological world."""
|
|
|
| biology: LatentBiologicalState = Field(
|
| default_factory=LatentBiologicalState
|
| )
|
| technical: TechnicalState = Field(default_factory=TechnicalState)
|
| progress: ExperimentProgress = Field(default_factory=ExperimentProgress)
|
| resources: ResourceState = Field(default_factory=ResourceState)
|
| hidden_failure_conditions: List[str] = Field(default_factory=list)
|
| mechanism_confidence: Dict[str, float] = Field(default_factory=dict)
|
| discovered_de_genes: List[str] = Field(default_factory=list)
|
| discovered_clusters: List[str] = Field(default_factory=list)
|
| task_modality: str = "scRNA-seq"
|
| step_count: int = 0
|
| rng_seed: int = 42
|
|
|
|
|
|
|
| last_retain_frac: Optional[float] = Field(None, exclude=True)
|
| last_n_clusters: Optional[int] = Field(None, exclude=True)
|
| last_perturbation_efficiency: Optional[float] = Field(None, exclude=True)
|
|
|