Spaces:

nicopi
/

server

Sleeping

App Files Files Community

nicopi commited on Feb 27

Commit

5bd11be

verified ·

1 Parent(s): 931fffd

Upload 2 files

Browse files

Files changed (2) hide show

models.py +54 -0
server.py +221 -0

models.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from pydantic import BaseModel, Field
+from typing import Any, Optional, Dict
+from enum import Enum
+import uuid
+import time
+class JobStatus(str, Enum):
+    PENDING = "pending"
+    CLAIMED = "claimed"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    TIMEOUT = "timeout"
+class APIJob(BaseModel):
+    job_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    created_at: float = Field(default_factory=time.time)
+    claimed_at: Optional[float] = None
+    completed_at: Optional[float] = None
+    # Request fields
+    method: str  # GET, POST, PUT, DELETE, etc.
+    endpoint: str  # e.g. "/api/v1/inference"
+    headers: Dict[str, str] = {}
+    body: Optional[Any] = None
+    query_params: Dict[str, str] = {}
+    # Routing: which mirror should handle this (optional, None = any mirror that has it)
+    target_mirror: Optional[str] = None
+    # Response fields
+    status: JobStatus = JobStatus.PENDING
+    response_status_code: Optional[int] = None
+    response_headers: Dict[str, str] = {}
+    response_body: Optional[Any] = None
+    error: Optional[str] = None
+    # TTL: jobs older than this (seconds) are considered timed out
+    ttl: float = 30.0
+class ClaimRequest(BaseModel):
+    mirror_id: str
+    available_endpoints: list[str]  # list of endpoint prefixes this mirror can serve
+class CompleteRequest(BaseModel):
+    mirror_id: str
+    job_id: str
+    response_status_code: int
+    response_headers: Dict[str, str] = {}
+    response_body: Optional[Any] = None
+    error: Optional[str] = None

server.py ADDED Viewed

	@@ -0,0 +1,221 @@

+"""
+Queue Server - hosted on Hugging Face Spaces
+============================================
+Acts as the neutral relay between the public GUI and local mirrors.
+Run with:
+    pip install fastapi uvicorn
+    uvicorn server:app --host 0.0.0.0 --port 7860
+Environment variables:
+    QUEUE_API_KEY   - shared secret for authenticating mirrors and GUI clients
+    JOB_TTL         - seconds before a pending job is considered timed out (default 30)
+    POLL_INTERVAL   - seconds between mirror poll cycles, informational only (default 2)
+"""
+import asyncio
+import os
+import time
+import logging
+from contextlib import asynccontextmanager
+from typing import Optional
+from fastapi import FastAPI, HTTPException, Header, BackgroundTasks
+from fastapi.middleware.cors import CORSMiddleware
+# ---------------------------------------------------------------------------
+# Import shared models (copy shared/models.py next to this file on HF Space)
+# ---------------------------------------------------------------------------
+from models import APIJob, ClaimRequest, CompleteRequest, JobStatus
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger("queue-server")
+# ---------------------------------------------------------------------------
+# In-memory store. Swap for Redis on Upstash for production persistence.
+# ---------------------------------------------------------------------------
+_jobs: dict[str, APIJob] = {}
+_lock = asyncio.Lock()
+API_KEY = os.environ.get("QUEUE_API_KEY", "changeme")
+JOB_TTL = float(os.environ.get("JOB_TTL", 30))
+# ---------------------------------------------------------------------------
+# Background task: reap timed-out jobs
+# ---------------------------------------------------------------------------
+async def _reaper():
+    while True:
+        await asyncio.sleep(5)
+        now = time.time()
+        async with _lock:
+            for job in list(_jobs.values()):
+                if job.status in (JobStatus.PENDING, JobStatus.CLAIMED):
+                    age = now - job.created_at
+                    if age > job.ttl:
+                        job.status = JobStatus.TIMEOUT
+                        log.info(f"Job {job.job_id} timed out after {age:.1f}s")
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    task = asyncio.create_task(_reaper())
+    yield
+    task.cancel()
+app = FastAPI(title="API Proxy Queue", lifespan=lifespan)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Tighten this in production
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ---------------------------------------------------------------------------
+# Auth helper
+# ---------------------------------------------------------------------------
+def _check_auth(x_api_key: Optional[str]):
+    if x_api_key != API_KEY:
+        raise HTTPException(status_code=401, detail="Invalid API key")
+# ---------------------------------------------------------------------------
+# Routes: Client-facing (GUI / caller side)
+# ---------------------------------------------------------------------------
+@app.post("/jobs", response_model=APIJob, summary="Submit a new API job")
+async def submit_job(
+    job_in: APIJob,
+    x_api_key: Optional[str] = Header(default=None),
+):
+    _check_auth(x_api_key)
+    job_in.ttl = JOB_TTL
+    job_in.status = JobStatus.PENDING
+    async with _lock:
+        _jobs[job_in.job_id] = job_in
+    log.info(f"Job {job_in.job_id} submitted: {job_in.method} {job_in.endpoint}")
+    return job_in
+@app.get("/jobs/{job_id}", response_model=APIJob, summary="Poll for a job's result")
+async def get_job(
+    job_id: str,
+    x_api_key: Optional[str] = Header(default=None),
+):
+    _check_auth(x_api_key)
+    async with _lock:
+        job = _jobs.get(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return job
+@app.get("/jobs/{job_id}/wait", response_model=APIJob, summary="Long-poll until job completes or times out")
+async def wait_for_job(
+    job_id: str,
+    timeout: float = 25.0,
+    x_api_key: Optional[str] = Header(default=None),
+):
+    """
+    Blocks up to `timeout` seconds waiting for the job to complete.
+    Much more efficient than client-side polling.
+    """
+    _check_auth(x_api_key)
+    deadline = time.time() + min(timeout, JOB_TTL)
+    while time.time() < deadline:
+        async with _lock:
+            job = _jobs.get(job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        if job.status in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.TIMEOUT):
+            return job
+        await asyncio.sleep(0.3)
+    # Return current state even if still pending
+    async with _lock:
+        return _jobs[job_id]
+# ---------------------------------------------------------------------------
+# Routes: Mirror-facing
+# ---------------------------------------------------------------------------
+@app.post("/mirror/claim", response_model=Optional[APIJob], summary="Mirror claims a pending job it can serve")
+async def claim_job(
+    claim: ClaimRequest,
+    x_api_key: Optional[str] = Header(default=None),
+):
+    """
+    The mirror sends its ID and the list of endpoint prefixes it can serve.
+    The server atomically assigns the first matching pending job.
+    Returns null if nothing is available.
+    """
+    _check_auth(x_api_key)
+    now = time.time()
+    async with _lock:
+        for job in _jobs.values():
+            if job.status != JobStatus.PENDING:
+                continue
+            # Check TTL
+            if now - job.created_at > job.ttl:
+                continue
+            # Check target mirror constraint
+            if job.target_mirror and job.target_mirror != claim.mirror_id:
+                continue
+            # Check endpoint match
+            if not any(job.endpoint.startswith(ep) for ep in claim.available_endpoints):
+                continue
+            # Atomic claim
+            job.status = JobStatus.CLAIMED
+            job.claimed_at = now
+            log.info(f"Job {job.job_id} claimed by mirror '{claim.mirror_id}'")
+            return job
+    return None
+@app.post("/mirror/complete", summary="Mirror posts the result of a completed job")
+async def complete_job(
+    result: CompleteRequest,
+    x_api_key: Optional[str] = Header(default=None),
+):
+    _check_auth(x_api_key)
+    async with _lock:
+        job = _jobs.get(result.job_id)
+        if not job:
+            raise HTTPException(status_code=404, detail="Job not found")
+        if job.status != JobStatus.CLAIMED:
+            raise HTTPException(status_code=409, detail=f"Job is in state '{job.status}', cannot complete")
+        job.status = JobStatus.FAILED if result.error else JobStatus.COMPLETED
+        job.completed_at = time.time()
+        job.response_status_code = result.response_status_code
+        job.response_headers = result.response_headers
+        job.response_body = result.response_body
+        job.error = result.error
+    log.info(f"Job {result.job_id} completed by mirror '{result.mirror_id}' → {result.response_status_code}")
+    return {"ok": True}
+# ---------------------------------------------------------------------------
+# Debug / health
+# ---------------------------------------------------------------------------
+@app.get("/health")
+async def health():
+    async with _lock:
+        counts = {s.value: 0 for s in JobStatus}
+        for job in _jobs.values():
+            counts[job.status.value] += 1
+    return {"status": "ok", "jobs": counts}
+@app.get("/jobs", summary="List all jobs (debug)")
+async def list_jobs(
+    x_api_key: Optional[str] = Header(default=None),
+):
+    _check_auth(x_api_key)
+    async with _lock:
+        return list(_jobs.values())