| | from pydantic import BaseModel |
| | from typing import List, Optional, Dict, Any |
| |
|
| |
|
| | class ChatMessage(BaseModel): |
| | role: str |
| | content: str |
| |
|
| |
|
| | class ChatRequest(BaseModel): |
| | messages: List[ChatMessage] |
| | model: str = "sam-x-nano" |
| | max_tokens: Optional[int] = 512 |
| | temperature: Optional[float] = 0.8 |
| | top_k: Optional[int] = 40 |
| | top_p: Optional[float] = 0.9 |
| | repetition_penalty: Optional[float] = 1.1 |
| | stream: Optional[bool] = False |
| |
|
| |
|
| | class ChatResponse(BaseModel): |
| | id: str |
| | object: str = "chat.completion" |
| | created: int |
| | model: str |
| | choices: List[Dict[str, Any]] |
| | usage: Dict[str, int] |
| |
|
| |
|
| | class WorkerStatus(BaseModel): |
| | model_name: str |
| | is_active: bool |
| | load: float |
| | last_heartbeat: int |