""" FastAPI server exposing the Project Polymath Workspace Environment. Endpoints: GET / — Command Center UI (HTML) GET /docs — Interactive OpenAPI (Swagger UI) GET /health — Liveness probe (JSON) POST /reset — Start a new negotiation POST /step — Apply an agent action GET /state — Read current state without advancing """ from __future__ import annotations import os import sys # Ensure Python can find your 'envs' and 'models' folders ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) if ROOT_DIR not in sys.path: sys.path.insert(0, ROOT_DIR) from typing import Any, Optional from fastapi import FastAPI from fastapi.responses import HTMLResponse from pydantic import BaseModel, ConfigDict, Field import uvicorn # Project Polymath Imports from envs.environment import WorkSpaceEnvironment from models.schemas import WorkSpaceAction # ── OpenAPI Documentation Setup ────────────────────────────────────────────── _OPENAPI_TAGS = [ { "name": "Environment", "description": "Episode lifecycle for Polymath: call **reset** before **step**.", }, { "name": "Interface", "description": "Browser UI for manual debugging (HTML, not JSON).", } ] app = FastAPI( title="Project Polymath: OpenEnv", version="1.0.0", openapi_tags=_OPENAPI_TAGS, description=( "Multi-agent negotiation environment. " "Agent must balance constraints from Finance, Security, and UX." ), ) # Force the environment into the mode you want the judges to see by default os.environ["BASELINE_ENV_MODE"] = "easy" _env = WorkSpaceEnvironment() # ── request / response schemas ─────────────────────────────────────────────── class ResetRequest(BaseModel): """Start a new episode.""" model_config = ConfigDict( json_schema_extra={ "examples": [{"topic": "Draft the new Mobile App PRD"}] } ) topic: str = Field( default="Draft the new Mobile App PRD", description="The core task the PM must complete." ) class StepRequest(BaseModel): """Apply one agent action.""" action: WorkSpaceAction = Field( description="The agent's action payload." ) class EnvResponse(BaseModel): """Observation and reward after reset, step, or state.""" observation: dict[str, Any] | None = Field(description="Environment feedback and turn count.") reward: float = Field(description="Reward for the last transition.") done: bool = Field(description="True after the episode concludes.") info: dict[str, Any] = Field(default_factory=dict) def _format_obs(obs: Any) -> dict[str, Any] | None: if obs is None: return None # obs is a WorkspaceObservation return { "feedback": getattr(obs, "feedback", "Episode Terminated."), "current_turn": getattr(obs, "current_turn", 0) } # ── FRONTEND HTML/CSS/JS PAYLOAD ───────────────────────────────────────────── _DEBUG_UI_HTML = """