from fastapi import FastAPI, HTTPException
from src.models import Action, TaskConfig
from src.env import GPUClusterEnv
from src.tasks import TASKS
import subprocess

app = FastAPI(title="GPU Cluster OpenEnv")
env = GPUClusterEnv()

@app.get("/")
def health_check():
    return {"status": "ok", "message": "GPUClusterEnv is running"}

@app.post("/reset")
def reset_env(task_id: str = "easy"):
    if task_id not in TASKS:
        raise HTTPException(status_code=404, detail="Task not found")
    obs = env.reset(TASKS[task_id])
    return {"observation": obs.dict()}

@app.post("/step")
def step_env(action: Action):
    try:
        result = env.step(action)
        return result.dict()
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

@app.get("/state")
def get_state():
    if env.state is None:
        raise HTTPException(status_code=400, detail="Environment not initialized")
    return {"observation": env.state.dict()}

@app.get("/tasks")
def list_tasks():
    return {
        "tasks": list(TASKS.keys()),
        "action_schema": Action.schema()
    }

@app.get("/grader")
def grader():
    # Normalizes total reward to a 0.0 - 1.0 score based on max possible baseline
    if env.state is None:
        return {"score": 0.0}
    max_expected_reward = env.config.max_steps * 10 # Arbitrary max for example
    score = max(0.0, min(1.0, env.total_reward / max_expected_reward))
    return {"score": score}

@app.post("/baseline")
def run_baseline():
    # Trigger the baseline script and return results
    result = subprocess.run(["python", "src/baseline.py"], capture_output=True, text=True)
    return {"output": result.stdout}