Spaces:
Build error
feat(sprint4-session-b): endpoints jobs, ingestion et modèles IA
Browse filesNouveaux modèles SQLAlchemy :
- models/job.py : JobModel (id, corpus_id, page_id, status
pending/running/done/failed, started_at, finished_at, error_message)
- models/model_config_db.py : ModelConfigDB (corpus_id PK, provider_type,
selected_model_id, selected_model_display_name, updated_at)
- models/__init__.py : enregistrement JobModel + ModelConfigDB dans Base
Nouveaux routers (R10 — /api/v1/) :
- api/v1/jobs.py :
POST /corpora/{id}/run → 1 job par page du corpus (202, immédiat)
POST /pages/{id}/run → 1 job pour la page (202, immédiat)
GET /jobs/{id} → état du job (404 si inconnu)
POST /jobs/{id}/retry → relance FAILED→pending (409 si non-FAILED)
- api/v1/ingest.py :
POST /corpora/{id}/ingest/files → multipart, pages créées
POST /corpora/{id}/ingest/iiif-manifest → manifest IIIF 3.0 + 2.x
POST /corpora/{id}/ingest/iiif-images → liste d'URLs directes
- api/v1/models_api.py :
POST /settings/api-key → validation sans stockage (R06)
GET /models → list_all_models() mockable
POST /models/refresh → idem + refreshed_at
PUT /corpora/{id}/model → ModelConfigDB upsert
GET /corpora/{id}/model → config active (404 si absent)
Infrastructure :
- python-multipart ajouté à pyproject.toml (requis pour UploadFile)
- _fetch_json_manifest et _validate_api_key isolés pour être patchables
dans les tests sans dépendances réseau
Tests (61 nouveaux) :
- test_api_jobs.py : corpus.run, pages.run, get_job, retry (409/200)
- test_api_ingest.py : files (disk write vérifié), IIIF 3.0/2.x, images,
erreurs 404/422/502 ; mock _fetch_json_manifest
- test_api_models.py : api-key (R06 vérifié), models list/refresh,
PUT/GET model ; mock list_all_models + _validate_api_key
Total : 457 tests passent, 3 skippés (intégration réseau).
https://claude.ai/code/session_018woyEHc8HG2th7V4ewJ4Kg
- backend/app/api/v1/ingest.py +329 -0
- backend/app/api/v1/jobs.py +158 -0
- backend/app/api/v1/models_api.py +173 -0
- backend/app/main.py +4 -1
- backend/app/models/__init__.py +9 -1
- backend/app/models/job.py +40 -0
- backend/app/models/model_config_db.py +29 -0
- backend/pyproject.toml +1 -0
- backend/tests/test_api_ingest.py +419 -0
- backend/tests/test_api_jobs.py +289 -0
- backend/tests/test_api_models.py +365 -0
|
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Endpoints d'ingestion de corpus (R10 — préfixe /api/v1/).
|
| 3 |
+
|
| 4 |
+
POST /api/v1/corpora/{id}/ingest/files
|
| 5 |
+
POST /api/v1/corpora/{id}/ingest/iiif-manifest
|
| 6 |
+
POST /api/v1/corpora/{id}/ingest/iiif-images
|
| 7 |
+
|
| 8 |
+
Règle (R01) : aucune logique spécifique à un corpus particulier.
|
| 9 |
+
Règle : ingestion = création des PageModel en BDD uniquement.
|
| 10 |
+
L'analyse IA est déclenchée séparément via /run.
|
| 11 |
+
"""
|
| 12 |
+
# 1. stdlib
|
| 13 |
+
import logging
|
| 14 |
+
import uuid
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
|
| 17 |
+
# 2. third-party
|
| 18 |
+
import httpx
|
| 19 |
+
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
|
| 20 |
+
from pydantic import BaseModel
|
| 21 |
+
from sqlalchemy import func, select
|
| 22 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 23 |
+
|
| 24 |
+
# 3. local
|
| 25 |
+
from app import config as _config_module
|
| 26 |
+
from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
|
| 27 |
+
from app.models.database import get_db
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger(__name__)
|
| 30 |
+
|
| 31 |
+
router = APIRouter(tags=["ingestion"])
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# ── Schémas ───────────────────────────────────────────────────────────────────
|
| 35 |
+
|
| 36 |
+
class IIIFManifestRequest(BaseModel):
|
| 37 |
+
manifest_url: str
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class IIIFImagesRequest(BaseModel):
|
| 41 |
+
urls: list[str]
|
| 42 |
+
folio_labels: list[str]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class IngestResponse(BaseModel):
|
| 46 |
+
corpus_id: str
|
| 47 |
+
manuscript_id: str
|
| 48 |
+
pages_created: int
|
| 49 |
+
page_ids: list[str]
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
| 53 |
+
|
| 54 |
+
async def _get_corpus_or_404(corpus_id: str, db: AsyncSession) -> CorpusModel:
|
| 55 |
+
corpus = await db.get(CorpusModel, corpus_id)
|
| 56 |
+
if corpus is None:
|
| 57 |
+
raise HTTPException(status_code=404, detail="Corpus introuvable")
|
| 58 |
+
return corpus
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
async def _get_or_create_manuscript(
|
| 62 |
+
db: AsyncSession, corpus_id: str, title: str | None = None
|
| 63 |
+
) -> ManuscriptModel:
|
| 64 |
+
"""Retourne le premier manuscrit du corpus, ou en crée un par défaut."""
|
| 65 |
+
result = await db.execute(
|
| 66 |
+
select(ManuscriptModel).where(ManuscriptModel.corpus_id == corpus_id).limit(1)
|
| 67 |
+
)
|
| 68 |
+
ms = result.scalar_one_or_none()
|
| 69 |
+
if ms is not None:
|
| 70 |
+
return ms
|
| 71 |
+
|
| 72 |
+
corpus = await db.get(CorpusModel, corpus_id)
|
| 73 |
+
ms = ManuscriptModel(
|
| 74 |
+
id=str(uuid.uuid4()),
|
| 75 |
+
corpus_id=corpus_id,
|
| 76 |
+
title=title or (corpus.title if corpus else corpus_id),
|
| 77 |
+
total_pages=0,
|
| 78 |
+
)
|
| 79 |
+
db.add(ms)
|
| 80 |
+
await db.flush()
|
| 81 |
+
return ms
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
async def _next_sequence(db: AsyncSession, manuscript_id: str) -> int:
|
| 85 |
+
"""Retourne le prochain numéro de séquence disponible (max + 1, ou 1)."""
|
| 86 |
+
result = await db.execute(
|
| 87 |
+
select(func.max(PageModel.sequence)).where(
|
| 88 |
+
PageModel.manuscript_id == manuscript_id
|
| 89 |
+
)
|
| 90 |
+
)
|
| 91 |
+
max_seq = result.scalar_one_or_none()
|
| 92 |
+
return (max_seq or 0) + 1
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
async def _create_page(
|
| 96 |
+
db: AsyncSession,
|
| 97 |
+
manuscript_id: str,
|
| 98 |
+
corpus_id: str,
|
| 99 |
+
folio_label: str,
|
| 100 |
+
sequence: int,
|
| 101 |
+
image_master_path: str | None = None,
|
| 102 |
+
) -> PageModel:
|
| 103 |
+
page = PageModel(
|
| 104 |
+
id=f"{corpus_id}-{folio_label}",
|
| 105 |
+
manuscript_id=manuscript_id,
|
| 106 |
+
folio_label=folio_label,
|
| 107 |
+
sequence=sequence,
|
| 108 |
+
image_master_path=image_master_path,
|
| 109 |
+
processing_status="INGESTED",
|
| 110 |
+
)
|
| 111 |
+
db.add(page)
|
| 112 |
+
return page
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
async def _fetch_json_manifest(url: str) -> dict:
|
| 116 |
+
"""Télécharge un manifest IIIF. Fonction isolée pour faciliter les tests."""
|
| 117 |
+
async with httpx.AsyncClient() as client:
|
| 118 |
+
resp = await client.get(url, follow_redirects=True, timeout=30.0)
|
| 119 |
+
resp.raise_for_status()
|
| 120 |
+
return resp.json()
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def _extract_canvas_label(canvas: dict, index: int) -> str:
|
| 124 |
+
"""Extrait le folio_label d'un canvas IIIF (3.0 ou 2.x)."""
|
| 125 |
+
label = canvas.get("label")
|
| 126 |
+
if isinstance(label, dict):
|
| 127 |
+
for lang in ("none", "en", "fr", "la"):
|
| 128 |
+
values = label.get(lang)
|
| 129 |
+
if values:
|
| 130 |
+
return (values[0] if isinstance(values, list) else str(values)).strip()
|
| 131 |
+
elif isinstance(label, str) and label.strip():
|
| 132 |
+
return label.strip()
|
| 133 |
+
return f"f{index + 1:03d}r"
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def _extract_canvas_image_url(canvas: dict) -> str | None:
|
| 137 |
+
"""Extrait l'URL de l'image principale d'un canvas IIIF (3.0 ou 2.x)."""
|
| 138 |
+
# IIIF 3.0
|
| 139 |
+
items = canvas.get("items") or []
|
| 140 |
+
if items:
|
| 141 |
+
ann_items = (items[0].get("items") or []) if items else []
|
| 142 |
+
if ann_items:
|
| 143 |
+
body = ann_items[0].get("body") or {}
|
| 144 |
+
if isinstance(body, dict):
|
| 145 |
+
return body.get("id") or body.get("@id")
|
| 146 |
+
# IIIF 2.x
|
| 147 |
+
images = canvas.get("images") or []
|
| 148 |
+
if images:
|
| 149 |
+
resource = images[0].get("resource") or {}
|
| 150 |
+
return resource.get("@id")
|
| 151 |
+
# Fallback : ID du canvas
|
| 152 |
+
return canvas.get("id") or canvas.get("@id")
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
# ── Endpoints ─────────────────────────────────────────────────────────────────
|
| 156 |
+
|
| 157 |
+
@router.post("/corpora/{corpus_id}/ingest/files", response_model=IngestResponse, status_code=201)
|
| 158 |
+
async def ingest_files(
|
| 159 |
+
corpus_id: str,
|
| 160 |
+
files: list[UploadFile] = File(...),
|
| 161 |
+
db: AsyncSession = Depends(get_db),
|
| 162 |
+
) -> IngestResponse:
|
| 163 |
+
"""Ingère une liste de fichiers images (multipart/form-data).
|
| 164 |
+
|
| 165 |
+
Chaque fichier crée un PageModel. Le fichier est copié dans
|
| 166 |
+
data/corpora/{slug}/masters/{folio_label}/{filename}.
|
| 167 |
+
"""
|
| 168 |
+
corpus = await _get_corpus_or_404(corpus_id, db)
|
| 169 |
+
ms = await _get_or_create_manuscript(db, corpus_id)
|
| 170 |
+
seq = await _next_sequence(db, ms.id)
|
| 171 |
+
|
| 172 |
+
created: list[PageModel] = []
|
| 173 |
+
for i, upload in enumerate(files):
|
| 174 |
+
filename = Path(upload.filename or f"file_{i}").name
|
| 175 |
+
folio_label = Path(filename).stem # nom sans extension
|
| 176 |
+
|
| 177 |
+
master_dir = (
|
| 178 |
+
_config_module.settings.data_dir
|
| 179 |
+
/ "corpora"
|
| 180 |
+
/ corpus.slug
|
| 181 |
+
/ "masters"
|
| 182 |
+
/ folio_label
|
| 183 |
+
)
|
| 184 |
+
master_dir.mkdir(parents=True, exist_ok=True)
|
| 185 |
+
master_path = master_dir / filename
|
| 186 |
+
content = await upload.read()
|
| 187 |
+
master_path.write_bytes(content)
|
| 188 |
+
|
| 189 |
+
page = await _create_page(
|
| 190 |
+
db, ms.id, corpus.slug, folio_label, seq + i,
|
| 191 |
+
image_master_path=str(master_path),
|
| 192 |
+
)
|
| 193 |
+
created.append(page)
|
| 194 |
+
|
| 195 |
+
ms.total_pages = (ms.total_pages or 0) + len(created)
|
| 196 |
+
await db.commit()
|
| 197 |
+
|
| 198 |
+
logger.info(
|
| 199 |
+
"Fichiers ingérés",
|
| 200 |
+
extra={"corpus_id": corpus_id, "count": len(created)},
|
| 201 |
+
)
|
| 202 |
+
return IngestResponse(
|
| 203 |
+
corpus_id=corpus_id,
|
| 204 |
+
manuscript_id=ms.id,
|
| 205 |
+
pages_created=len(created),
|
| 206 |
+
page_ids=[p.id for p in created],
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
@router.post("/corpora/{corpus_id}/ingest/iiif-manifest", response_model=IngestResponse, status_code=201)
|
| 211 |
+
async def ingest_iiif_manifest(
|
| 212 |
+
corpus_id: str,
|
| 213 |
+
body: IIIFManifestRequest,
|
| 214 |
+
db: AsyncSession = Depends(get_db),
|
| 215 |
+
) -> IngestResponse:
|
| 216 |
+
"""Télécharge un manifest IIIF, extrait les canvases et crée les PageModel."""
|
| 217 |
+
corpus = await _get_corpus_or_404(corpus_id, db)
|
| 218 |
+
|
| 219 |
+
try:
|
| 220 |
+
manifest = await _fetch_json_manifest(body.manifest_url)
|
| 221 |
+
except httpx.HTTPStatusError as exc:
|
| 222 |
+
raise HTTPException(
|
| 223 |
+
status_code=502,
|
| 224 |
+
detail=f"Erreur HTTP lors du téléchargement du manifest : {exc.response.status_code}",
|
| 225 |
+
)
|
| 226 |
+
except (httpx.RequestError, httpx.TimeoutException) as exc:
|
| 227 |
+
raise HTTPException(
|
| 228 |
+
status_code=502,
|
| 229 |
+
detail=f"Erreur réseau lors du téléchargement du manifest : {exc}",
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
# Détecte le format IIIF (3.0 vs 2.x)
|
| 233 |
+
canvases: list[dict] = manifest.get("items") or []
|
| 234 |
+
if not canvases:
|
| 235 |
+
sequences = manifest.get("sequences") or []
|
| 236 |
+
canvases = sequences[0].get("canvases", []) if sequences else []
|
| 237 |
+
|
| 238 |
+
if not canvases:
|
| 239 |
+
raise HTTPException(
|
| 240 |
+
status_code=422,
|
| 241 |
+
detail="Le manifest IIIF ne contient aucun canvas (items vide)",
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
# Titre du manuscrit depuis le manifest
|
| 245 |
+
ms_title_raw = manifest.get("label") or {}
|
| 246 |
+
if isinstance(ms_title_raw, dict):
|
| 247 |
+
for lang in ("none", "fr", "en"):
|
| 248 |
+
v = ms_title_raw.get(lang)
|
| 249 |
+
if v:
|
| 250 |
+
ms_title = v[0] if isinstance(v, list) else str(v)
|
| 251 |
+
break
|
| 252 |
+
else:
|
| 253 |
+
ms_title = corpus.title
|
| 254 |
+
elif isinstance(ms_title_raw, str):
|
| 255 |
+
ms_title = ms_title_raw
|
| 256 |
+
else:
|
| 257 |
+
ms_title = corpus.title
|
| 258 |
+
|
| 259 |
+
ms = await _get_or_create_manuscript(db, corpus_id, title=ms_title)
|
| 260 |
+
seq = await _next_sequence(db, ms.id)
|
| 261 |
+
|
| 262 |
+
created: list[PageModel] = []
|
| 263 |
+
for i, canvas in enumerate(canvases):
|
| 264 |
+
folio_label = _extract_canvas_label(canvas, i)
|
| 265 |
+
image_url = _extract_canvas_image_url(canvas)
|
| 266 |
+
page = await _create_page(
|
| 267 |
+
db, ms.id, corpus.slug, folio_label, seq + i,
|
| 268 |
+
image_master_path=image_url,
|
| 269 |
+
)
|
| 270 |
+
created.append(page)
|
| 271 |
+
|
| 272 |
+
ms.total_pages = (ms.total_pages or 0) + len(created)
|
| 273 |
+
await db.commit()
|
| 274 |
+
|
| 275 |
+
logger.info(
|
| 276 |
+
"Manifest IIIF ingéré",
|
| 277 |
+
extra={"corpus_id": corpus_id, "url": body.manifest_url, "pages": len(created)},
|
| 278 |
+
)
|
| 279 |
+
return IngestResponse(
|
| 280 |
+
corpus_id=corpus_id,
|
| 281 |
+
manuscript_id=ms.id,
|
| 282 |
+
pages_created=len(created),
|
| 283 |
+
page_ids=[p.id for p in created],
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
@router.post("/corpora/{corpus_id}/ingest/iiif-images", response_model=IngestResponse, status_code=201)
|
| 288 |
+
async def ingest_iiif_images(
|
| 289 |
+
corpus_id: str,
|
| 290 |
+
body: IIIFImagesRequest,
|
| 291 |
+
db: AsyncSession = Depends(get_db),
|
| 292 |
+
) -> IngestResponse:
|
| 293 |
+
"""Ingère une liste d'URLs d'images IIIF directes.
|
| 294 |
+
|
| 295 |
+
urls et folio_labels doivent avoir la même longueur.
|
| 296 |
+
"""
|
| 297 |
+
if len(body.urls) != len(body.folio_labels):
|
| 298 |
+
raise HTTPException(
|
| 299 |
+
status_code=422,
|
| 300 |
+
detail=f"urls ({len(body.urls)}) et folio_labels ({len(body.folio_labels)}) doivent avoir la même longueur",
|
| 301 |
+
)
|
| 302 |
+
if not body.urls:
|
| 303 |
+
raise HTTPException(status_code=422, detail="La liste d'URLs est vide")
|
| 304 |
+
|
| 305 |
+
corpus = await _get_corpus_or_404(corpus_id, db)
|
| 306 |
+
ms = await _get_or_create_manuscript(db, corpus_id)
|
| 307 |
+
seq = await _next_sequence(db, ms.id)
|
| 308 |
+
|
| 309 |
+
created: list[PageModel] = []
|
| 310 |
+
for i, (url, folio_label) in enumerate(zip(body.urls, body.folio_labels)):
|
| 311 |
+
page = await _create_page(
|
| 312 |
+
db, ms.id, corpus.slug, folio_label, seq + i,
|
| 313 |
+
image_master_path=url,
|
| 314 |
+
)
|
| 315 |
+
created.append(page)
|
| 316 |
+
|
| 317 |
+
ms.total_pages = (ms.total_pages or 0) + len(created)
|
| 318 |
+
await db.commit()
|
| 319 |
+
|
| 320 |
+
logger.info(
|
| 321 |
+
"Images IIIF ingérées",
|
| 322 |
+
extra={"corpus_id": corpus_id, "count": len(created)},
|
| 323 |
+
)
|
| 324 |
+
return IngestResponse(
|
| 325 |
+
corpus_id=corpus_id,
|
| 326 |
+
manuscript_id=ms.id,
|
| 327 |
+
pages_created=len(created),
|
| 328 |
+
page_ids=[p.id for p in created],
|
| 329 |
+
)
|
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Endpoints de gestion des jobs de traitement (R10 — préfixe /api/v1/).
|
| 3 |
+
|
| 4 |
+
POST /api/v1/corpora/{id}/run → crée un job par page du corpus
|
| 5 |
+
POST /api/v1/pages/{id}/run → crée un job pour une page
|
| 6 |
+
GET /api/v1/jobs/{job_id} → état du job
|
| 7 |
+
POST /api/v1/jobs/{job_id}/retry → relance un job FAILED
|
| 8 |
+
|
| 9 |
+
Règle : les jobs sont créés en BDD et retournent immédiatement.
|
| 10 |
+
Le pipeline réel (analyzer) sera branché en Session C.
|
| 11 |
+
"""
|
| 12 |
+
# 1. stdlib
|
| 13 |
+
import uuid
|
| 14 |
+
from datetime import datetime, timezone
|
| 15 |
+
|
| 16 |
+
# 2. third-party
|
| 17 |
+
from fastapi import APIRouter, Depends, HTTPException
|
| 18 |
+
from pydantic import BaseModel, ConfigDict
|
| 19 |
+
from sqlalchemy import select
|
| 20 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 21 |
+
|
| 22 |
+
# 3. local
|
| 23 |
+
from app.models.corpus import CorpusModel, PageModel
|
| 24 |
+
from app.models.database import get_db
|
| 25 |
+
from app.models.job import JobModel
|
| 26 |
+
|
| 27 |
+
router = APIRouter(tags=["jobs"])
|
| 28 |
+
|
| 29 |
+
_JOB_STATUS_PENDING = "pending"
|
| 30 |
+
_JOB_STATUS_FAILED = "failed"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ── Schémas de réponse ────────────────────────────────────────────────────────
|
| 34 |
+
|
| 35 |
+
class JobResponse(BaseModel):
|
| 36 |
+
model_config = ConfigDict(from_attributes=True)
|
| 37 |
+
|
| 38 |
+
id: str
|
| 39 |
+
corpus_id: str
|
| 40 |
+
page_id: str | None
|
| 41 |
+
status: str
|
| 42 |
+
started_at: datetime | None
|
| 43 |
+
finished_at: datetime | None
|
| 44 |
+
error_message: str | None
|
| 45 |
+
created_at: datetime
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class CorpusRunResponse(BaseModel):
|
| 49 |
+
corpus_id: str
|
| 50 |
+
jobs_created: int
|
| 51 |
+
job_ids: list[str]
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
| 55 |
+
|
| 56 |
+
def _new_job(corpus_id: str, page_id: str | None) -> JobModel:
|
| 57 |
+
now = datetime.now(timezone.utc)
|
| 58 |
+
return JobModel(
|
| 59 |
+
id=str(uuid.uuid4()),
|
| 60 |
+
corpus_id=corpus_id,
|
| 61 |
+
page_id=page_id,
|
| 62 |
+
status=_JOB_STATUS_PENDING,
|
| 63 |
+
started_at=None,
|
| 64 |
+
finished_at=None,
|
| 65 |
+
error_message=None,
|
| 66 |
+
created_at=now,
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
# ── Endpoints ─────────────────────────────────────────────────────────────────
|
| 71 |
+
|
| 72 |
+
@router.post("/corpora/{corpus_id}/run", response_model=CorpusRunResponse, status_code=202)
|
| 73 |
+
async def run_corpus(
|
| 74 |
+
corpus_id: str, db: AsyncSession = Depends(get_db)
|
| 75 |
+
) -> CorpusRunResponse:
|
| 76 |
+
"""Lance le pipeline sur toutes les pages du corpus.
|
| 77 |
+
|
| 78 |
+
Crée un JobModel par page (status=pending). Retourne immédiatement.
|
| 79 |
+
Le pipeline réel sera branché en Session C.
|
| 80 |
+
"""
|
| 81 |
+
corpus = await db.get(CorpusModel, corpus_id)
|
| 82 |
+
if corpus is None:
|
| 83 |
+
raise HTTPException(status_code=404, detail="Corpus introuvable")
|
| 84 |
+
|
| 85 |
+
from app.models.corpus import ManuscriptModel
|
| 86 |
+
ms_result = await db.execute(
|
| 87 |
+
select(ManuscriptModel).where(ManuscriptModel.corpus_id == corpus_id)
|
| 88 |
+
)
|
| 89 |
+
ms_ids = [ms.id for ms in ms_result.scalars().all()]
|
| 90 |
+
|
| 91 |
+
pages_result = await db.execute(
|
| 92 |
+
select(PageModel).where(PageModel.manuscript_id.in_(ms_ids))
|
| 93 |
+
)
|
| 94 |
+
pages = list(pages_result.scalars().all())
|
| 95 |
+
|
| 96 |
+
jobs = [_new_job(corpus_id, page.id) for page in pages]
|
| 97 |
+
for job in jobs:
|
| 98 |
+
db.add(job)
|
| 99 |
+
await db.commit()
|
| 100 |
+
|
| 101 |
+
return CorpusRunResponse(
|
| 102 |
+
corpus_id=corpus_id,
|
| 103 |
+
jobs_created=len(jobs),
|
| 104 |
+
job_ids=[j.id for j in jobs],
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
@router.post("/pages/{page_id}/run", response_model=JobResponse, status_code=202)
|
| 109 |
+
async def run_page(
|
| 110 |
+
page_id: str, db: AsyncSession = Depends(get_db)
|
| 111 |
+
) -> JobModel:
|
| 112 |
+
"""Lance le pipeline sur une seule page. Retourne le job créé."""
|
| 113 |
+
page = await db.get(PageModel, page_id)
|
| 114 |
+
if page is None:
|
| 115 |
+
raise HTTPException(status_code=404, detail="Page introuvable")
|
| 116 |
+
|
| 117 |
+
from app.models.corpus import ManuscriptModel
|
| 118 |
+
manuscript = await db.get(ManuscriptModel, page.manuscript_id)
|
| 119 |
+
if manuscript is None:
|
| 120 |
+
raise HTTPException(status_code=404, detail="Manuscrit introuvable")
|
| 121 |
+
|
| 122 |
+
job = _new_job(manuscript.corpus_id, page_id)
|
| 123 |
+
db.add(job)
|
| 124 |
+
await db.commit()
|
| 125 |
+
await db.refresh(job)
|
| 126 |
+
return job
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
@router.get("/jobs/{job_id}", response_model=JobResponse)
|
| 130 |
+
async def get_job(job_id: str, db: AsyncSession = Depends(get_db)) -> JobModel:
|
| 131 |
+
"""Retourne l'état d'un job."""
|
| 132 |
+
job = await db.get(JobModel, job_id)
|
| 133 |
+
if job is None:
|
| 134 |
+
raise HTTPException(status_code=404, detail="Job introuvable")
|
| 135 |
+
return job
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
@router.post("/jobs/{job_id}/retry", response_model=JobResponse)
|
| 139 |
+
async def retry_job(job_id: str, db: AsyncSession = Depends(get_db)) -> JobModel:
|
| 140 |
+
"""Relance un job en état FAILED (remet le status à pending).
|
| 141 |
+
|
| 142 |
+
Retourne 409 si le job n'est pas dans l'état FAILED.
|
| 143 |
+
"""
|
| 144 |
+
job = await db.get(JobModel, job_id)
|
| 145 |
+
if job is None:
|
| 146 |
+
raise HTTPException(status_code=404, detail="Job introuvable")
|
| 147 |
+
if job.status != _JOB_STATUS_FAILED:
|
| 148 |
+
raise HTTPException(
|
| 149 |
+
status_code=409,
|
| 150 |
+
detail=f"Le job ne peut être relancé que depuis l'état 'failed' (statut actuel : '{job.status}')",
|
| 151 |
+
)
|
| 152 |
+
job.status = _JOB_STATUS_PENDING
|
| 153 |
+
job.error_message = None
|
| 154 |
+
job.started_at = None
|
| 155 |
+
job.finished_at = None
|
| 156 |
+
await db.commit()
|
| 157 |
+
await db.refresh(job)
|
| 158 |
+
return job
|
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Endpoints de gestion des modèles IA (R10 — préfixe /api/v1/).
|
| 3 |
+
|
| 4 |
+
POST /api/v1/settings/api-key → valide la clé sans la stocker (R06)
|
| 5 |
+
GET /api/v1/models → liste les modèles disponibles
|
| 6 |
+
POST /api/v1/models/refresh → force la mise à jour de la liste
|
| 7 |
+
PUT /api/v1/corpora/{id}/model → associe un modèle à un corpus
|
| 8 |
+
GET /api/v1/corpora/{id}/model → modèle actif d'un corpus
|
| 9 |
+
|
| 10 |
+
Règle R06 : la clé API ne transite jamais vers la BDD — elle reste
|
| 11 |
+
exclusivement dans les variables d'environnement.
|
| 12 |
+
"""
|
| 13 |
+
# 1. stdlib
|
| 14 |
+
import logging
|
| 15 |
+
from datetime import datetime, timezone
|
| 16 |
+
|
| 17 |
+
# 2. third-party
|
| 18 |
+
from fastapi import APIRouter, Depends, HTTPException
|
| 19 |
+
from pydantic import BaseModel, ConfigDict
|
| 20 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 21 |
+
|
| 22 |
+
# 3. local
|
| 23 |
+
from app.models.corpus import CorpusModel
|
| 24 |
+
from app.models.database import get_db
|
| 25 |
+
from app.models.model_config_db import ModelConfigDB
|
| 26 |
+
from app.services.ai.model_registry import list_all_models
|
| 27 |
+
|
| 28 |
+
logger = logging.getLogger(__name__)
|
| 29 |
+
|
| 30 |
+
router = APIRouter(tags=["models"])
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ── Schémas ───────────────────────────────────────────────────────────────────
|
| 34 |
+
|
| 35 |
+
class ApiKeyRequest(BaseModel):
|
| 36 |
+
api_key: str
|
| 37 |
+
provider_type: str = "google_ai_studio"
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class ApiKeyResponse(BaseModel):
|
| 41 |
+
valid: bool
|
| 42 |
+
provider: str
|
| 43 |
+
model_count: int
|
| 44 |
+
error: str | None = None
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class ModelSelectRequest(BaseModel):
|
| 48 |
+
model_id: str
|
| 49 |
+
provider_type: str
|
| 50 |
+
display_name: str = ""
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class ModelConfigResponse(BaseModel):
|
| 54 |
+
model_config = ConfigDict(from_attributes=True)
|
| 55 |
+
|
| 56 |
+
corpus_id: str
|
| 57 |
+
provider_type: str
|
| 58 |
+
selected_model_id: str
|
| 59 |
+
selected_model_display_name: str
|
| 60 |
+
updated_at: datetime
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class ModelsRefreshResponse(BaseModel):
|
| 64 |
+
models: list[dict]
|
| 65 |
+
count: int
|
| 66 |
+
refreshed_at: datetime
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# ── Validation de clé API (isolé pour les tests) ──────────────────────────────
|
| 70 |
+
|
| 71 |
+
def _validate_api_key(api_key: str, provider_type: str) -> tuple[bool, int, str | None]:
|
| 72 |
+
"""Essaie de lister les modèles avec la clé fournie.
|
| 73 |
+
|
| 74 |
+
Retourne (valid, model_count, error_message).
|
| 75 |
+
Fonction isolée au niveau module pour être patchable dans les tests.
|
| 76 |
+
"""
|
| 77 |
+
try:
|
| 78 |
+
from google import genai # import local pour éviter l'import top-level
|
| 79 |
+
client = genai.Client(api_key=api_key)
|
| 80 |
+
raw_models = list(client.models.list())
|
| 81 |
+
vision_count = sum(
|
| 82 |
+
1 for m in raw_models if "gemini" in (getattr(m, "name", "") or "").lower()
|
| 83 |
+
)
|
| 84 |
+
return True, vision_count, None
|
| 85 |
+
except Exception as exc:
|
| 86 |
+
return False, 0, str(exc)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# ── Endpoints ─────────────────────────────────────────────────────────────────
|
| 90 |
+
|
| 91 |
+
@router.post("/settings/api-key", response_model=ApiKeyResponse)
|
| 92 |
+
async def validate_api_key(body: ApiKeyRequest) -> ApiKeyResponse:
|
| 93 |
+
"""Valide qu'une clé API fonctionne (appel list_models).
|
| 94 |
+
|
| 95 |
+
La clé N'EST PAS stockée (R06). Elle reste dans les variables d'env.
|
| 96 |
+
"""
|
| 97 |
+
valid, count, error = _validate_api_key(body.api_key, body.provider_type)
|
| 98 |
+
return ApiKeyResponse(
|
| 99 |
+
valid=valid,
|
| 100 |
+
provider=body.provider_type,
|
| 101 |
+
model_count=count,
|
| 102 |
+
error=error,
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
@router.get("/models", response_model=list[dict])
|
| 107 |
+
async def get_models() -> list[dict]:
|
| 108 |
+
"""Liste tous les modèles disponibles sur les providers configurés."""
|
| 109 |
+
models = list_all_models()
|
| 110 |
+
return [m.model_dump() for m in models]
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
@router.post("/models/refresh", response_model=ModelsRefreshResponse)
|
| 114 |
+
async def refresh_models() -> ModelsRefreshResponse:
|
| 115 |
+
"""Force la mise à jour de la liste des modèles (vide le cache implicite)."""
|
| 116 |
+
models = list_all_models()
|
| 117 |
+
return ModelsRefreshResponse(
|
| 118 |
+
models=[m.model_dump() for m in models],
|
| 119 |
+
count=len(models),
|
| 120 |
+
refreshed_at=datetime.now(timezone.utc),
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
@router.put("/corpora/{corpus_id}/model", response_model=ModelConfigResponse)
|
| 125 |
+
async def set_corpus_model(
|
| 126 |
+
corpus_id: str,
|
| 127 |
+
body: ModelSelectRequest,
|
| 128 |
+
db: AsyncSession = Depends(get_db),
|
| 129 |
+
) -> ModelConfigDB:
|
| 130 |
+
"""Associe un modèle IA à un corpus. Crée ou met à jour la configuration."""
|
| 131 |
+
corpus = await db.get(CorpusModel, corpus_id)
|
| 132 |
+
if corpus is None:
|
| 133 |
+
raise HTTPException(status_code=404, detail="Corpus introuvable")
|
| 134 |
+
|
| 135 |
+
display_name = body.display_name or body.model_id
|
| 136 |
+
|
| 137 |
+
config = await db.get(ModelConfigDB, corpus_id)
|
| 138 |
+
if config is None:
|
| 139 |
+
config = ModelConfigDB(
|
| 140 |
+
corpus_id=corpus_id,
|
| 141 |
+
provider_type=body.provider_type,
|
| 142 |
+
selected_model_id=body.model_id,
|
| 143 |
+
selected_model_display_name=display_name,
|
| 144 |
+
updated_at=datetime.now(timezone.utc),
|
| 145 |
+
)
|
| 146 |
+
db.add(config)
|
| 147 |
+
else:
|
| 148 |
+
config.provider_type = body.provider_type
|
| 149 |
+
config.selected_model_id = body.model_id
|
| 150 |
+
config.selected_model_display_name = display_name
|
| 151 |
+
config.updated_at = datetime.now(timezone.utc)
|
| 152 |
+
|
| 153 |
+
await db.commit()
|
| 154 |
+
await db.refresh(config)
|
| 155 |
+
return config
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
@router.get("/corpora/{corpus_id}/model", response_model=ModelConfigResponse)
|
| 159 |
+
async def get_corpus_model(
|
| 160 |
+
corpus_id: str, db: AsyncSession = Depends(get_db)
|
| 161 |
+
) -> ModelConfigDB:
|
| 162 |
+
"""Retourne la configuration du modèle IA actif pour un corpus."""
|
| 163 |
+
corpus = await db.get(CorpusModel, corpus_id)
|
| 164 |
+
if corpus is None:
|
| 165 |
+
raise HTTPException(status_code=404, detail="Corpus introuvable")
|
| 166 |
+
|
| 167 |
+
config = await db.get(ModelConfigDB, corpus_id)
|
| 168 |
+
if config is None:
|
| 169 |
+
raise HTTPException(
|
| 170 |
+
status_code=404,
|
| 171 |
+
detail="Aucun modèle configuré pour ce corpus",
|
| 172 |
+
)
|
| 173 |
+
return config
|
|
@@ -15,7 +15,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
| 15 |
|
| 16 |
# 3. local — on importe les modèles pour que Base.metadata les connaisse
|
| 17 |
import app.models # noqa: F401 (enregistrement des modèles SQLAlchemy)
|
| 18 |
-
from app.api.v1 import corpora, export, pages, profiles
|
| 19 |
from app.models.database import Base, engine
|
| 20 |
|
| 21 |
logger = logging.getLogger(__name__)
|
|
@@ -55,3 +55,6 @@ app.include_router(corpora.router, prefix=_V1_PREFIX)
|
|
| 55 |
app.include_router(pages.router, prefix=_V1_PREFIX)
|
| 56 |
app.include_router(export.router, prefix=_V1_PREFIX)
|
| 57 |
app.include_router(profiles.router, prefix=_V1_PREFIX)
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# 3. local — on importe les modèles pour que Base.metadata les connaisse
|
| 17 |
import app.models # noqa: F401 (enregistrement des modèles SQLAlchemy)
|
| 18 |
+
from app.api.v1 import corpora, export, ingest, jobs, models_api, pages, profiles
|
| 19 |
from app.models.database import Base, engine
|
| 20 |
|
| 21 |
logger = logging.getLogger(__name__)
|
|
|
|
| 55 |
app.include_router(pages.router, prefix=_V1_PREFIX)
|
| 56 |
app.include_router(export.router, prefix=_V1_PREFIX)
|
| 57 |
app.include_router(profiles.router, prefix=_V1_PREFIX)
|
| 58 |
+
app.include_router(jobs.router, prefix=_V1_PREFIX)
|
| 59 |
+
app.include_router(ingest.router, prefix=_V1_PREFIX)
|
| 60 |
+
app.include_router(models_api.router, prefix=_V1_PREFIX)
|
|
@@ -3,5 +3,13 @@ Modèles SQLAlchemy — importés ici pour que Base.metadata les connaisse
|
|
| 3 |
au moment de la création des tables (Base.metadata.create_all).
|
| 4 |
"""
|
| 5 |
from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
__all__ = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
au moment de la création des tables (Base.metadata.create_all).
|
| 4 |
"""
|
| 5 |
from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
|
| 6 |
+
from app.models.job import JobModel
|
| 7 |
+
from app.models.model_config_db import ModelConfigDB
|
| 8 |
|
| 9 |
+
__all__ = [
|
| 10 |
+
"CorpusModel",
|
| 11 |
+
"ManuscriptModel",
|
| 12 |
+
"PageModel",
|
| 13 |
+
"JobModel",
|
| 14 |
+
"ModelConfigDB",
|
| 15 |
+
]
|
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Modèle SQLAlchemy 2.0 — table des jobs de traitement.
|
| 3 |
+
|
| 4 |
+
Un job suit l'exécution du pipeline sur une page.
|
| 5 |
+
corpus.run → crée un JobModel par page du corpus (page_id renseigné)
|
| 6 |
+
pages.run → crée un JobModel pour la page cible
|
| 7 |
+
|
| 8 |
+
Cycle de vie :
|
| 9 |
+
pending → running → done
|
| 10 |
+
↘ failed
|
| 11 |
+
"""
|
| 12 |
+
# 1. stdlib
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
|
| 15 |
+
# 2. third-party
|
| 16 |
+
from sqlalchemy import DateTime, ForeignKey, String, Text
|
| 17 |
+
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
| 18 |
+
|
| 19 |
+
# 3. local
|
| 20 |
+
from app.models.database import Base
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class JobModel(Base):
|
| 24 |
+
"""Suivi d'un job de pipeline (1 job = 1 page)."""
|
| 25 |
+
|
| 26 |
+
__tablename__ = "jobs"
|
| 27 |
+
|
| 28 |
+
id: Mapped[str] = mapped_column(String, primary_key=True)
|
| 29 |
+
corpus_id: Mapped[str] = mapped_column(
|
| 30 |
+
String, ForeignKey("corpora.id"), nullable=False, index=True
|
| 31 |
+
)
|
| 32 |
+
page_id: Mapped[str | None] = mapped_column(
|
| 33 |
+
String, ForeignKey("pages.id"), nullable=True, index=True
|
| 34 |
+
)
|
| 35 |
+
# pending / running / done / failed
|
| 36 |
+
status: Mapped[str] = mapped_column(String, nullable=False, default="pending")
|
| 37 |
+
started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
| 38 |
+
finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
| 39 |
+
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
|
| 40 |
+
created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False)
|
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Modèle SQLAlchemy 2.0 — configuration du modèle IA par corpus.
|
| 3 |
+
|
| 4 |
+
Une seule ligne par corpus (corpus_id = PK).
|
| 5 |
+
La clé API n'est JAMAIS stockée ici (R06) — elle reste dans l'environnement.
|
| 6 |
+
"""
|
| 7 |
+
# 1. stdlib
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
|
| 10 |
+
# 2. third-party
|
| 11 |
+
from sqlalchemy import DateTime, ForeignKey, String
|
| 12 |
+
from sqlalchemy.orm import Mapped, mapped_column
|
| 13 |
+
|
| 14 |
+
# 3. local
|
| 15 |
+
from app.models.database import Base
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class ModelConfigDB(Base):
|
| 19 |
+
"""Modèle IA sélectionné pour un corpus (1 entrée par corpus)."""
|
| 20 |
+
|
| 21 |
+
__tablename__ = "model_configs"
|
| 22 |
+
|
| 23 |
+
corpus_id: Mapped[str] = mapped_column(
|
| 24 |
+
String, ForeignKey("corpora.id"), primary_key=True
|
| 25 |
+
)
|
| 26 |
+
provider_type: Mapped[str] = mapped_column(String, nullable=False)
|
| 27 |
+
selected_model_id: Mapped[str] = mapped_column(String, nullable=False)
|
| 28 |
+
selected_model_display_name: Mapped[str] = mapped_column(String, nullable=False)
|
| 29 |
+
updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False)
|
|
@@ -18,6 +18,7 @@ dependencies = [
|
|
| 18 |
"httpx>=0.27",
|
| 19 |
"lxml>=5.2",
|
| 20 |
"Pillow>=10.3",
|
|
|
|
| 21 |
]
|
| 22 |
|
| 23 |
[project.optional-dependencies]
|
|
|
|
| 18 |
"httpx>=0.27",
|
| 19 |
"lxml>=5.2",
|
| 20 |
"Pillow>=10.3",
|
| 21 |
+
"python-multipart>=0.0.9",
|
| 22 |
]
|
| 23 |
|
| 24 |
[project.optional-dependencies]
|
|
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests des endpoints d'ingestion /api/v1/corpora/{id}/ingest/* (Sprint 4 — Session B).
|
| 3 |
+
|
| 4 |
+
Stratégie :
|
| 5 |
+
- BDD SQLite en mémoire
|
| 6 |
+
- Appels réseau mockés via monkeypatch (_fetch_json_manifest)
|
| 7 |
+
- Écriture disque mockée via monkeypatch (Path.mkdir, Path.write_bytes)
|
| 8 |
+
|
| 9 |
+
Vérifie :
|
| 10 |
+
- POST /ingest/files → pages créées, IDs retournés
|
| 11 |
+
- POST /ingest/iiif-manifest → manifest parsé, pages créées
|
| 12 |
+
- POST /ingest/iiif-images → pages créées depuis liste d'URLs
|
| 13 |
+
- 404 si corpus inexistant
|
| 14 |
+
- 422 si données invalides
|
| 15 |
+
"""
|
| 16 |
+
# 1. stdlib
|
| 17 |
+
import uuid
|
| 18 |
+
from datetime import datetime, timezone
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
from unittest.mock import AsyncMock, patch
|
| 21 |
+
|
| 22 |
+
# 2. third-party
|
| 23 |
+
import pytest
|
| 24 |
+
|
| 25 |
+
# 3. local
|
| 26 |
+
import app.api.v1.ingest as ingest_module
|
| 27 |
+
from app.models.corpus import CorpusModel
|
| 28 |
+
from tests.conftest_api import async_client, db_session # noqa: F401
|
| 29 |
+
|
| 30 |
+
_NOW = datetime.now(timezone.utc)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ---------------------------------------------------------------------------
|
| 34 |
+
# Helpers
|
| 35 |
+
# ---------------------------------------------------------------------------
|
| 36 |
+
|
| 37 |
+
async def _make_corpus(db, slug="test-ingest"):
|
| 38 |
+
corpus = CorpusModel(
|
| 39 |
+
id=str(uuid.uuid4()), slug=slug, title="Corpus Test",
|
| 40 |
+
profile_id="medieval-illuminated", created_at=_NOW, updated_at=_NOW,
|
| 41 |
+
)
|
| 42 |
+
db.add(corpus)
|
| 43 |
+
await db.commit()
|
| 44 |
+
await db.refresh(corpus)
|
| 45 |
+
return corpus
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _iiif3_manifest(n_canvases: int = 3) -> dict:
|
| 49 |
+
"""Génère un manifest IIIF 3.0 minimal avec n canvases."""
|
| 50 |
+
return {
|
| 51 |
+
"@context": "http://iiif.io/api/presentation/3/context.json",
|
| 52 |
+
"id": "https://example.com/manifest",
|
| 53 |
+
"type": "Manifest",
|
| 54 |
+
"label": {"fr": ["Beatus de Saint-Sever"]},
|
| 55 |
+
"items": [
|
| 56 |
+
{
|
| 57 |
+
"id": f"https://example.com/canvas/{i}",
|
| 58 |
+
"type": "Canvas",
|
| 59 |
+
"label": {"none": [f"f{i:03d}r"]},
|
| 60 |
+
"width": 1500, "height": 2000,
|
| 61 |
+
"items": [
|
| 62 |
+
{
|
| 63 |
+
"id": f"https://example.com/canvas/{i}/page",
|
| 64 |
+
"type": "AnnotationPage",
|
| 65 |
+
"items": [
|
| 66 |
+
{
|
| 67 |
+
"id": f"https://example.com/canvas/{i}/annotation",
|
| 68 |
+
"type": "Annotation",
|
| 69 |
+
"motivation": "painting",
|
| 70 |
+
"body": {
|
| 71 |
+
"id": f"https://example.com/images/{i}.jpg",
|
| 72 |
+
"type": "Image",
|
| 73 |
+
"format": "image/jpeg",
|
| 74 |
+
},
|
| 75 |
+
"target": f"https://example.com/canvas/{i}",
|
| 76 |
+
}
|
| 77 |
+
],
|
| 78 |
+
}
|
| 79 |
+
],
|
| 80 |
+
}
|
| 81 |
+
for i in range(1, n_canvases + 1)
|
| 82 |
+
],
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def _iiif2_manifest(n_canvases: int = 2) -> dict:
|
| 87 |
+
"""Génère un manifest IIIF 2.x minimal."""
|
| 88 |
+
return {
|
| 89 |
+
"@context": "http://iiif.io/api/presentation/2/context.json",
|
| 90 |
+
"@type": "sc:Manifest",
|
| 91 |
+
"label": "Test Manuscript 2.x",
|
| 92 |
+
"sequences": [
|
| 93 |
+
{
|
| 94 |
+
"canvases": [
|
| 95 |
+
{
|
| 96 |
+
"@id": f"https://example.com/canvas/{i}",
|
| 97 |
+
"@type": "sc:Canvas",
|
| 98 |
+
"label": f"f{i:03d}r",
|
| 99 |
+
"images": [
|
| 100 |
+
{
|
| 101 |
+
"resource": {
|
| 102 |
+
"@id": f"https://example.com/images/{i}.jpg"
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
],
|
| 106 |
+
}
|
| 107 |
+
for i in range(1, n_canvases + 1)
|
| 108 |
+
]
|
| 109 |
+
}
|
| 110 |
+
],
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# ---------------------------------------------------------------------------
|
| 115 |
+
# POST /api/v1/corpora/{id}/ingest/files
|
| 116 |
+
# ---------------------------------------------------------------------------
|
| 117 |
+
|
| 118 |
+
@pytest.mark.asyncio
|
| 119 |
+
async def test_ingest_files_corpus_not_found(async_client):
|
| 120 |
+
response = await async_client.post(
|
| 121 |
+
"/api/v1/corpora/nonexistent/ingest/files",
|
| 122 |
+
files=[("files", ("img.jpg", b"data", "image/jpeg"))],
|
| 123 |
+
)
|
| 124 |
+
assert response.status_code == 404
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
@pytest.mark.asyncio
|
| 128 |
+
async def test_ingest_files_ok(async_client, db_session, tmp_path, monkeypatch):
|
| 129 |
+
corpus = await _make_corpus(db_session)
|
| 130 |
+
monkeypatch.setattr(_config_module := __import__("app.config", fromlist=["config"]), "settings",
|
| 131 |
+
type("S", (), {"data_dir": tmp_path})())
|
| 132 |
+
|
| 133 |
+
import app.config as _cfg
|
| 134 |
+
import app.api.v1.ingest as _ingest
|
| 135 |
+
original_data_dir = _cfg.settings.data_dir
|
| 136 |
+
_cfg.settings.data_dir = tmp_path
|
| 137 |
+
|
| 138 |
+
try:
|
| 139 |
+
response = await async_client.post(
|
| 140 |
+
f"/api/v1/corpora/{corpus.id}/ingest/files",
|
| 141 |
+
files=[
|
| 142 |
+
("files", ("f001r.jpg", b"fake_jpeg_data_1", "image/jpeg")),
|
| 143 |
+
("files", ("f002r.jpg", b"fake_jpeg_data_2", "image/jpeg")),
|
| 144 |
+
],
|
| 145 |
+
)
|
| 146 |
+
assert response.status_code == 201
|
| 147 |
+
data = response.json()
|
| 148 |
+
assert data["pages_created"] == 2
|
| 149 |
+
assert len(data["page_ids"]) == 2
|
| 150 |
+
assert data["corpus_id"] == corpus.id
|
| 151 |
+
finally:
|
| 152 |
+
_cfg.settings.data_dir = original_data_dir
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
@pytest.mark.asyncio
|
| 156 |
+
async def test_ingest_files_creates_manuscript(async_client, db_session, tmp_path):
|
| 157 |
+
corpus = await _make_corpus(db_session)
|
| 158 |
+
|
| 159 |
+
import app.config as _cfg
|
| 160 |
+
original = _cfg.settings.data_dir
|
| 161 |
+
_cfg.settings.data_dir = tmp_path
|
| 162 |
+
try:
|
| 163 |
+
response = await async_client.post(
|
| 164 |
+
f"/api/v1/corpora/{corpus.id}/ingest/files",
|
| 165 |
+
files=[("files", ("f001r.jpg", b"data", "image/jpeg"))],
|
| 166 |
+
)
|
| 167 |
+
data = response.json()
|
| 168 |
+
assert "manuscript_id" in data
|
| 169 |
+
assert data["manuscript_id"] # non-vide
|
| 170 |
+
finally:
|
| 171 |
+
_cfg.settings.data_dir = original
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
@pytest.mark.asyncio
|
| 175 |
+
async def test_ingest_files_folio_from_filename(async_client, db_session, tmp_path):
|
| 176 |
+
"""Le folio_label est dérivé du nom de fichier (sans extension)."""
|
| 177 |
+
corpus = await _make_corpus(db_session)
|
| 178 |
+
|
| 179 |
+
import app.config as _cfg
|
| 180 |
+
original = _cfg.settings.data_dir
|
| 181 |
+
_cfg.settings.data_dir = tmp_path
|
| 182 |
+
try:
|
| 183 |
+
response = await async_client.post(
|
| 184 |
+
f"/api/v1/corpora/{corpus.id}/ingest/files",
|
| 185 |
+
files=[("files", ("f013v.jpg", b"data", "image/jpeg"))],
|
| 186 |
+
)
|
| 187 |
+
data = response.json()
|
| 188 |
+
# L'ID de page contient le folio_label
|
| 189 |
+
assert any("f013v" in pid for pid in data["page_ids"])
|
| 190 |
+
finally:
|
| 191 |
+
_cfg.settings.data_dir = original
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
@pytest.mark.asyncio
|
| 195 |
+
async def test_ingest_files_writes_to_disk(async_client, db_session, tmp_path):
|
| 196 |
+
"""Les fichiers sont bien écrits dans data/corpora/{slug}/masters/."""
|
| 197 |
+
corpus = await _make_corpus(db_session, slug="test-write")
|
| 198 |
+
|
| 199 |
+
import app.config as _cfg
|
| 200 |
+
original = _cfg.settings.data_dir
|
| 201 |
+
_cfg.settings.data_dir = tmp_path
|
| 202 |
+
try:
|
| 203 |
+
await async_client.post(
|
| 204 |
+
f"/api/v1/corpora/{corpus.id}/ingest/files",
|
| 205 |
+
files=[("files", ("f001r.jpg", b"JPEG_CONTENT", "image/jpeg"))],
|
| 206 |
+
)
|
| 207 |
+
expected = tmp_path / "corpora" / "test-write" / "masters" / "f001r" / "f001r.jpg"
|
| 208 |
+
assert expected.exists()
|
| 209 |
+
assert expected.read_bytes() == b"JPEG_CONTENT"
|
| 210 |
+
finally:
|
| 211 |
+
_cfg.settings.data_dir = original
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
# ---------------------------------------------------------------------------
|
| 215 |
+
# POST /api/v1/corpora/{id}/ingest/iiif-manifest
|
| 216 |
+
# ---------------------------------------------------------------------------
|
| 217 |
+
|
| 218 |
+
@pytest.mark.asyncio
|
| 219 |
+
async def test_ingest_manifest_corpus_not_found(async_client):
|
| 220 |
+
response = await async_client.post(
|
| 221 |
+
"/api/v1/corpora/nonexistent/ingest/iiif-manifest",
|
| 222 |
+
json={"manifest_url": "https://example.com/manifest"},
|
| 223 |
+
)
|
| 224 |
+
assert response.status_code == 404
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
@pytest.mark.asyncio
|
| 228 |
+
async def test_ingest_manifest_iiif3_ok(async_client, db_session, monkeypatch):
|
| 229 |
+
corpus = await _make_corpus(db_session)
|
| 230 |
+
manifest = _iiif3_manifest(n_canvases=3)
|
| 231 |
+
|
| 232 |
+
async def fake_fetch(url: str) -> dict:
|
| 233 |
+
return manifest
|
| 234 |
+
|
| 235 |
+
monkeypatch.setattr(ingest_module, "_fetch_json_manifest", fake_fetch)
|
| 236 |
+
|
| 237 |
+
response = await async_client.post(
|
| 238 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
|
| 239 |
+
json={"manifest_url": "https://example.com/manifest"},
|
| 240 |
+
)
|
| 241 |
+
assert response.status_code == 201
|
| 242 |
+
data = response.json()
|
| 243 |
+
assert data["pages_created"] == 3
|
| 244 |
+
assert len(data["page_ids"]) == 3
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
@pytest.mark.asyncio
|
| 248 |
+
async def test_ingest_manifest_iiif2_ok(async_client, db_session, monkeypatch):
|
| 249 |
+
corpus = await _make_corpus(db_session)
|
| 250 |
+
manifest = _iiif2_manifest(n_canvases=2)
|
| 251 |
+
|
| 252 |
+
async def fake_fetch(url: str) -> dict:
|
| 253 |
+
return manifest
|
| 254 |
+
|
| 255 |
+
monkeypatch.setattr(ingest_module, "_fetch_json_manifest", fake_fetch)
|
| 256 |
+
|
| 257 |
+
response = await async_client.post(
|
| 258 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
|
| 259 |
+
json={"manifest_url": "https://example.com/manifest"},
|
| 260 |
+
)
|
| 261 |
+
assert response.status_code == 201
|
| 262 |
+
assert response.json()["pages_created"] == 2
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
@pytest.mark.asyncio
|
| 266 |
+
async def test_ingest_manifest_extracts_folio_labels(async_client, db_session, monkeypatch):
|
| 267 |
+
"""Les folio_labels sont extraits des labels des canvases."""
|
| 268 |
+
corpus = await _make_corpus(db_session)
|
| 269 |
+
manifest = _iiif3_manifest(n_canvases=2)
|
| 270 |
+
|
| 271 |
+
async def fake_fetch(url: str) -> dict:
|
| 272 |
+
return manifest
|
| 273 |
+
|
| 274 |
+
monkeypatch.setattr(ingest_module, "_fetch_json_manifest", fake_fetch)
|
| 275 |
+
|
| 276 |
+
data = (await async_client.post(
|
| 277 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
|
| 278 |
+
json={"manifest_url": "https://example.com/manifest"},
|
| 279 |
+
)).json()
|
| 280 |
+
|
| 281 |
+
# Canvas labels: "f001r", "f002r"
|
| 282 |
+
assert any("f001r" in pid for pid in data["page_ids"])
|
| 283 |
+
assert any("f002r" in pid for pid in data["page_ids"])
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
@pytest.mark.asyncio
|
| 287 |
+
async def test_ingest_manifest_empty_canvases_422(async_client, db_session, monkeypatch):
|
| 288 |
+
"""Manifest sans canvases → 422."""
|
| 289 |
+
corpus = await _make_corpus(db_session)
|
| 290 |
+
|
| 291 |
+
async def fake_fetch(url: str) -> dict:
|
| 292 |
+
return {"type": "Manifest", "items": []}
|
| 293 |
+
|
| 294 |
+
monkeypatch.setattr(ingest_module, "_fetch_json_manifest", fake_fetch)
|
| 295 |
+
|
| 296 |
+
response = await async_client.post(
|
| 297 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
|
| 298 |
+
json={"manifest_url": "https://example.com/manifest"},
|
| 299 |
+
)
|
| 300 |
+
assert response.status_code == 422
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
@pytest.mark.asyncio
|
| 304 |
+
async def test_ingest_manifest_network_error_502(async_client, db_session, monkeypatch):
|
| 305 |
+
"""Erreur réseau → 502."""
|
| 306 |
+
corpus = await _make_corpus(db_session)
|
| 307 |
+
import httpx
|
| 308 |
+
|
| 309 |
+
async def fake_fetch(url: str) -> dict:
|
| 310 |
+
raise httpx.RequestError("Connection refused")
|
| 311 |
+
|
| 312 |
+
monkeypatch.setattr(ingest_module, "_fetch_json_manifest", fake_fetch)
|
| 313 |
+
|
| 314 |
+
response = await async_client.post(
|
| 315 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
|
| 316 |
+
json={"manifest_url": "https://example.com/manifest"},
|
| 317 |
+
)
|
| 318 |
+
assert response.status_code == 502
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
@pytest.mark.asyncio
|
| 322 |
+
async def test_ingest_manifest_returns_corpus_id(async_client, db_session, monkeypatch):
|
| 323 |
+
corpus = await _make_corpus(db_session)
|
| 324 |
+
monkeypatch.setattr(ingest_module, "_fetch_json_manifest", AsyncMock(return_value=_iiif3_manifest(1)))
|
| 325 |
+
|
| 326 |
+
data = (await async_client.post(
|
| 327 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
|
| 328 |
+
json={"manifest_url": "https://example.com/manifest"},
|
| 329 |
+
)).json()
|
| 330 |
+
assert data["corpus_id"] == corpus.id
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
# ---------------------------------------------------------------------------
|
| 334 |
+
# POST /api/v1/corpora/{id}/ingest/iiif-images
|
| 335 |
+
# ---------------------------------------------------------------------------
|
| 336 |
+
|
| 337 |
+
@pytest.mark.asyncio
|
| 338 |
+
async def test_ingest_images_corpus_not_found(async_client):
|
| 339 |
+
response = await async_client.post(
|
| 340 |
+
"/api/v1/corpora/nonexistent/ingest/iiif-images",
|
| 341 |
+
json={"urls": ["https://x.com/1.jpg"], "folio_labels": ["f001r"]},
|
| 342 |
+
)
|
| 343 |
+
assert response.status_code == 404
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
@pytest.mark.asyncio
|
| 347 |
+
async def test_ingest_images_ok(async_client, db_session):
|
| 348 |
+
corpus = await _make_corpus(db_session)
|
| 349 |
+
urls = ["https://example.com/img1.jpg", "https://example.com/img2.jpg"]
|
| 350 |
+
labels = ["f001r", "f002r"]
|
| 351 |
+
|
| 352 |
+
response = await async_client.post(
|
| 353 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
|
| 354 |
+
json={"urls": urls, "folio_labels": labels},
|
| 355 |
+
)
|
| 356 |
+
assert response.status_code == 201
|
| 357 |
+
data = response.json()
|
| 358 |
+
assert data["pages_created"] == 2
|
| 359 |
+
assert len(data["page_ids"]) == 2
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
@pytest.mark.asyncio
|
| 363 |
+
async def test_ingest_images_folio_labels_in_ids(async_client, db_session):
|
| 364 |
+
corpus = await _make_corpus(db_session)
|
| 365 |
+
response = await async_client.post(
|
| 366 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
|
| 367 |
+
json={
|
| 368 |
+
"urls": ["https://example.com/a.jpg"],
|
| 369 |
+
"folio_labels": ["f013v"],
|
| 370 |
+
},
|
| 371 |
+
)
|
| 372 |
+
data = response.json()
|
| 373 |
+
assert any("f013v" in pid for pid in data["page_ids"])
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
@pytest.mark.asyncio
|
| 377 |
+
async def test_ingest_images_mismatched_lengths_422(async_client, db_session):
|
| 378 |
+
"""urls et folio_labels de longueurs différentes → 422."""
|
| 379 |
+
corpus = await _make_corpus(db_session)
|
| 380 |
+
response = await async_client.post(
|
| 381 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
|
| 382 |
+
json={"urls": ["https://a.com/1.jpg", "https://a.com/2.jpg"], "folio_labels": ["f001r"]},
|
| 383 |
+
)
|
| 384 |
+
assert response.status_code == 422
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
@pytest.mark.asyncio
|
| 388 |
+
async def test_ingest_images_empty_urls_422(async_client, db_session):
|
| 389 |
+
corpus = await _make_corpus(db_session)
|
| 390 |
+
response = await async_client.post(
|
| 391 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
|
| 392 |
+
json={"urls": [], "folio_labels": []},
|
| 393 |
+
)
|
| 394 |
+
assert response.status_code == 422
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
@pytest.mark.asyncio
|
| 398 |
+
async def test_ingest_images_pages_in_sequence_order(async_client, db_session):
|
| 399 |
+
"""Les pages ont des séquences consécutives."""
|
| 400 |
+
corpus = await _make_corpus(db_session)
|
| 401 |
+
n = 4
|
| 402 |
+
urls = [f"https://example.com/{i}.jpg" for i in range(1, n + 1)]
|
| 403 |
+
labels = [f"f{i:03d}r" for i in range(1, n + 1)]
|
| 404 |
+
|
| 405 |
+
data = (await async_client.post(
|
| 406 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
|
| 407 |
+
json={"urls": urls, "folio_labels": labels},
|
| 408 |
+
)).json()
|
| 409 |
+
assert data["pages_created"] == n
|
| 410 |
+
|
| 411 |
+
|
| 412 |
+
@pytest.mark.asyncio
|
| 413 |
+
async def test_ingest_images_corpus_id_in_response(async_client, db_session):
|
| 414 |
+
corpus = await _make_corpus(db_session)
|
| 415 |
+
data = (await async_client.post(
|
| 416 |
+
f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
|
| 417 |
+
json={"urls": ["https://x.com/1.jpg"], "folio_labels": ["f001r"]},
|
| 418 |
+
)).json()
|
| 419 |
+
assert data["corpus_id"] == corpus.id
|
|
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests des endpoints /api/v1/jobs et /api/v1/corpora/{id}/run (Sprint 4 — Session B).
|
| 3 |
+
|
| 4 |
+
Vérifie :
|
| 5 |
+
- POST /api/v1/corpora/{id}/run → 202 + jobs_created + job_ids
|
| 6 |
+
- POST /api/v1/pages/{id}/run → 202 + job unique
|
| 7 |
+
- GET /api/v1/jobs/{job_id} → 200 ou 404
|
| 8 |
+
- POST /api/v1/jobs/{job_id}/retry → 200 (FAILED) ou 409 (autre statut)
|
| 9 |
+
- Isolation : corpus/page inexistants → 404
|
| 10 |
+
"""
|
| 11 |
+
# 1. stdlib
|
| 12 |
+
import uuid
|
| 13 |
+
from datetime import datetime, timezone
|
| 14 |
+
|
| 15 |
+
# 2. third-party
|
| 16 |
+
import pytest
|
| 17 |
+
|
| 18 |
+
# 3. local
|
| 19 |
+
from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
|
| 20 |
+
from app.models.job import JobModel
|
| 21 |
+
from tests.conftest_api import async_client, db_session # noqa: F401
|
| 22 |
+
|
| 23 |
+
_NOW = datetime.now(timezone.utc)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# ---------------------------------------------------------------------------
|
| 27 |
+
# Helpers — création de données de test
|
| 28 |
+
# ---------------------------------------------------------------------------
|
| 29 |
+
|
| 30 |
+
async def _make_corpus(db, slug="test-c"):
|
| 31 |
+
corpus = CorpusModel(
|
| 32 |
+
id=str(uuid.uuid4()), slug=slug, title="Test", profile_id="medieval-illuminated",
|
| 33 |
+
created_at=_NOW, updated_at=_NOW,
|
| 34 |
+
)
|
| 35 |
+
db.add(corpus)
|
| 36 |
+
await db.commit()
|
| 37 |
+
await db.refresh(corpus)
|
| 38 |
+
return corpus
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
async def _make_manuscript(db, corpus_id):
|
| 42 |
+
ms = ManuscriptModel(
|
| 43 |
+
id=str(uuid.uuid4()), corpus_id=corpus_id, title="MS", total_pages=0,
|
| 44 |
+
)
|
| 45 |
+
db.add(ms)
|
| 46 |
+
await db.commit()
|
| 47 |
+
await db.refresh(ms)
|
| 48 |
+
return ms
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
async def _make_page(db, ms_id, folio="f001r", seq=1):
|
| 52 |
+
page = PageModel(
|
| 53 |
+
id=str(uuid.uuid4()), manuscript_id=ms_id, folio_label=folio,
|
| 54 |
+
sequence=seq, processing_status="INGESTED",
|
| 55 |
+
)
|
| 56 |
+
db.add(page)
|
| 57 |
+
await db.commit()
|
| 58 |
+
await db.refresh(page)
|
| 59 |
+
return page
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
async def _make_failed_job(db, corpus_id, page_id=None):
|
| 63 |
+
"""Crée un job en état FAILED pour tester retry."""
|
| 64 |
+
job = JobModel(
|
| 65 |
+
id=str(uuid.uuid4()),
|
| 66 |
+
corpus_id=corpus_id,
|
| 67 |
+
page_id=page_id,
|
| 68 |
+
status="failed",
|
| 69 |
+
error_message="Simulated failure",
|
| 70 |
+
created_at=_NOW,
|
| 71 |
+
)
|
| 72 |
+
db.add(job)
|
| 73 |
+
await db.commit()
|
| 74 |
+
await db.refresh(job)
|
| 75 |
+
return job
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# ---------------------------------------------------------------------------
|
| 79 |
+
# POST /api/v1/corpora/{id}/run
|
| 80 |
+
# ---------------------------------------------------------------------------
|
| 81 |
+
|
| 82 |
+
@pytest.mark.asyncio
|
| 83 |
+
async def test_run_corpus_not_found(async_client):
|
| 84 |
+
response = await async_client.post("/api/v1/corpora/nonexistent/run")
|
| 85 |
+
assert response.status_code == 404
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
@pytest.mark.asyncio
|
| 89 |
+
async def test_run_corpus_no_pages(async_client, db_session):
|
| 90 |
+
"""Corpus sans pages → 202, jobs_created = 0."""
|
| 91 |
+
corpus = await _make_corpus(db_session)
|
| 92 |
+
response = await async_client.post(f"/api/v1/corpora/{corpus.id}/run")
|
| 93 |
+
assert response.status_code == 202
|
| 94 |
+
data = response.json()
|
| 95 |
+
assert data["jobs_created"] == 0
|
| 96 |
+
assert data["job_ids"] == []
|
| 97 |
+
assert data["corpus_id"] == corpus.id
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
@pytest.mark.asyncio
|
| 101 |
+
async def test_run_corpus_creates_jobs_per_page(async_client, db_session):
|
| 102 |
+
"""Corpus avec 3 pages → 3 jobs créés."""
|
| 103 |
+
corpus = await _make_corpus(db_session)
|
| 104 |
+
ms = await _make_manuscript(db_session, corpus.id)
|
| 105 |
+
for i in range(3):
|
| 106 |
+
await _make_page(db_session, ms.id, folio=f"f{i+1:03d}r", seq=i + 1)
|
| 107 |
+
|
| 108 |
+
response = await async_client.post(f"/api/v1/corpora/{corpus.id}/run")
|
| 109 |
+
assert response.status_code == 202
|
| 110 |
+
data = response.json()
|
| 111 |
+
assert data["jobs_created"] == 3
|
| 112 |
+
assert len(data["job_ids"]) == 3
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
@pytest.mark.asyncio
|
| 116 |
+
async def test_run_corpus_job_ids_are_unique(async_client, db_session):
|
| 117 |
+
corpus = await _make_corpus(db_session)
|
| 118 |
+
ms = await _make_manuscript(db_session, corpus.id)
|
| 119 |
+
for i in range(2):
|
| 120 |
+
await _make_page(db_session, ms.id, folio=f"f{i+1:03d}r", seq=i + 1)
|
| 121 |
+
|
| 122 |
+
data = (await async_client.post(f"/api/v1/corpora/{corpus.id}/run")).json()
|
| 123 |
+
assert len(set(data["job_ids"])) == 2 # all unique
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
@pytest.mark.asyncio
|
| 127 |
+
async def test_run_corpus_jobs_are_pending(async_client, db_session):
|
| 128 |
+
"""Les jobs créés par corpus.run ont le statut 'pending'."""
|
| 129 |
+
corpus = await _make_corpus(db_session)
|
| 130 |
+
ms = await _make_manuscript(db_session, corpus.id)
|
| 131 |
+
await _make_page(db_session, ms.id)
|
| 132 |
+
|
| 133 |
+
run_data = (await async_client.post(f"/api/v1/corpora/{corpus.id}/run")).json()
|
| 134 |
+
job_id = run_data["job_ids"][0]
|
| 135 |
+
|
| 136 |
+
job_data = (await async_client.get(f"/api/v1/jobs/{job_id}")).json()
|
| 137 |
+
assert job_data["status"] == "pending"
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
# ---------------------------------------------------------------------------
|
| 141 |
+
# POST /api/v1/pages/{id}/run
|
| 142 |
+
# ---------------------------------------------------------------------------
|
| 143 |
+
|
| 144 |
+
@pytest.mark.asyncio
|
| 145 |
+
async def test_run_page_not_found(async_client):
|
| 146 |
+
response = await async_client.post("/api/v1/pages/nonexistent/run")
|
| 147 |
+
assert response.status_code == 404
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
@pytest.mark.asyncio
|
| 151 |
+
async def test_run_page_creates_job(async_client, db_session):
|
| 152 |
+
corpus = await _make_corpus(db_session)
|
| 153 |
+
ms = await _make_manuscript(db_session, corpus.id)
|
| 154 |
+
page = await _make_page(db_session, ms.id)
|
| 155 |
+
|
| 156 |
+
response = await async_client.post(f"/api/v1/pages/{page.id}/run")
|
| 157 |
+
assert response.status_code == 202
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
@pytest.mark.asyncio
|
| 161 |
+
async def test_run_page_job_fields(async_client, db_session):
|
| 162 |
+
corpus = await _make_corpus(db_session)
|
| 163 |
+
ms = await _make_manuscript(db_session, corpus.id)
|
| 164 |
+
page = await _make_page(db_session, ms.id)
|
| 165 |
+
|
| 166 |
+
data = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
|
| 167 |
+
assert data["page_id"] == page.id
|
| 168 |
+
assert data["corpus_id"] == corpus.id
|
| 169 |
+
assert data["status"] == "pending"
|
| 170 |
+
assert data["started_at"] is None
|
| 171 |
+
assert data["finished_at"] is None
|
| 172 |
+
assert data["error_message"] is None
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
@pytest.mark.asyncio
|
| 176 |
+
async def test_run_page_job_id_is_uuid(async_client, db_session):
|
| 177 |
+
corpus = await _make_corpus(db_session)
|
| 178 |
+
ms = await _make_manuscript(db_session, corpus.id)
|
| 179 |
+
page = await _make_page(db_session, ms.id)
|
| 180 |
+
|
| 181 |
+
data = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
|
| 182 |
+
assert len(data["id"]) == 36
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
@pytest.mark.asyncio
|
| 186 |
+
async def test_run_page_multiple_times_creates_multiple_jobs(async_client, db_session):
|
| 187 |
+
"""Lancer run sur la même page deux fois crée deux jobs distincts."""
|
| 188 |
+
corpus = await _make_corpus(db_session)
|
| 189 |
+
ms = await _make_manuscript(db_session, corpus.id)
|
| 190 |
+
page = await _make_page(db_session, ms.id)
|
| 191 |
+
|
| 192 |
+
r1 = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
|
| 193 |
+
r2 = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
|
| 194 |
+
assert r1["id"] != r2["id"]
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
# ---------------------------------------------------------------------------
|
| 198 |
+
# GET /api/v1/jobs/{job_id}
|
| 199 |
+
# ---------------------------------------------------------------------------
|
| 200 |
+
|
| 201 |
+
@pytest.mark.asyncio
|
| 202 |
+
async def test_get_job_not_found(async_client):
|
| 203 |
+
response = await async_client.get("/api/v1/jobs/nonexistent")
|
| 204 |
+
assert response.status_code == 404
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
@pytest.mark.asyncio
|
| 208 |
+
async def test_get_job_ok(async_client, db_session):
|
| 209 |
+
corpus = await _make_corpus(db_session)
|
| 210 |
+
ms = await _make_manuscript(db_session, corpus.id)
|
| 211 |
+
page = await _make_page(db_session, ms.id)
|
| 212 |
+
|
| 213 |
+
run_data = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
|
| 214 |
+
job_id = run_data["id"]
|
| 215 |
+
|
| 216 |
+
response = await async_client.get(f"/api/v1/jobs/{job_id}")
|
| 217 |
+
assert response.status_code == 200
|
| 218 |
+
assert response.json()["id"] == job_id
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
@pytest.mark.asyncio
|
| 222 |
+
async def test_get_job_fields(async_client, db_session):
|
| 223 |
+
corpus = await _make_corpus(db_session)
|
| 224 |
+
ms = await _make_manuscript(db_session, corpus.id)
|
| 225 |
+
page = await _make_page(db_session, ms.id)
|
| 226 |
+
|
| 227 |
+
run_data = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
|
| 228 |
+
data = (await async_client.get(f"/api/v1/jobs/{run_data['id']}")).json()
|
| 229 |
+
|
| 230 |
+
assert "status" in data
|
| 231 |
+
assert "corpus_id" in data
|
| 232 |
+
assert "page_id" in data
|
| 233 |
+
assert "created_at" in data
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
# ---------------------------------------------------------------------------
|
| 237 |
+
# POST /api/v1/jobs/{job_id}/retry
|
| 238 |
+
# ---------------------------------------------------------------------------
|
| 239 |
+
|
| 240 |
+
@pytest.mark.asyncio
|
| 241 |
+
async def test_retry_job_not_found(async_client):
|
| 242 |
+
response = await async_client.post("/api/v1/jobs/nonexistent/retry")
|
| 243 |
+
assert response.status_code == 404
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
@pytest.mark.asyncio
|
| 247 |
+
async def test_retry_pending_job_409(async_client, db_session):
|
| 248 |
+
"""Un job en état 'pending' ne peut pas être relancé."""
|
| 249 |
+
corpus = await _make_corpus(db_session)
|
| 250 |
+
ms = await _make_manuscript(db_session, corpus.id)
|
| 251 |
+
page = await _make_page(db_session, ms.id)
|
| 252 |
+
|
| 253 |
+
job_data = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
|
| 254 |
+
response = await async_client.post(f"/api/v1/jobs/{job_data['id']}/retry")
|
| 255 |
+
assert response.status_code == 409
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
@pytest.mark.asyncio
|
| 259 |
+
async def test_retry_failed_job_ok(async_client, db_session):
|
| 260 |
+
"""Un job en état 'failed' peut être relancé → status passe à 'pending'."""
|
| 261 |
+
corpus = await _make_corpus(db_session)
|
| 262 |
+
job = await _make_failed_job(db_session, corpus.id)
|
| 263 |
+
|
| 264 |
+
response = await async_client.post(f"/api/v1/jobs/{job.id}/retry")
|
| 265 |
+
assert response.status_code == 200
|
| 266 |
+
data = response.json()
|
| 267 |
+
assert data["status"] == "pending"
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
@pytest.mark.asyncio
|
| 271 |
+
async def test_retry_failed_job_clears_error(async_client, db_session):
|
| 272 |
+
corpus = await _make_corpus(db_session)
|
| 273 |
+
job = await _make_failed_job(db_session, corpus.id)
|
| 274 |
+
|
| 275 |
+
data = (await async_client.post(f"/api/v1/jobs/{job.id}/retry")).json()
|
| 276 |
+
assert data["error_message"] is None
|
| 277 |
+
assert data["started_at"] is None
|
| 278 |
+
assert data["finished_at"] is None
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
@pytest.mark.asyncio
|
| 282 |
+
async def test_retry_failed_job_is_retrievable(async_client, db_session):
|
| 283 |
+
"""Après retry, GET /jobs/{id} reflète le nouveau statut."""
|
| 284 |
+
corpus = await _make_corpus(db_session)
|
| 285 |
+
job = await _make_failed_job(db_session, corpus.id)
|
| 286 |
+
|
| 287 |
+
await async_client.post(f"/api/v1/jobs/{job.id}/retry")
|
| 288 |
+
data = (await async_client.get(f"/api/v1/jobs/{job.id}")).json()
|
| 289 |
+
assert data["status"] == "pending"
|
|
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tests des endpoints /api/v1/models et /api/v1/settings/api-key (Sprint 4 — Session B).
|
| 3 |
+
|
| 4 |
+
Stratégie :
|
| 5 |
+
- Appels Google AI mockés via monkeypatch sur _validate_api_key et list_all_models
|
| 6 |
+
- BDD SQLite en mémoire pour les endpoints qui touchent la BDD (PUT/GET model)
|
| 7 |
+
|
| 8 |
+
Vérifie :
|
| 9 |
+
- POST /api/v1/settings/api-key → valid/invalid
|
| 10 |
+
- GET /api/v1/models → liste mockée
|
| 11 |
+
- POST /api/v1/models/refresh → mise à jour + timestamp
|
| 12 |
+
- PUT /api/v1/corpora/{id}/model → création + mise à jour
|
| 13 |
+
- GET /api/v1/corpora/{id}/model → 200 ou 404
|
| 14 |
+
"""
|
| 15 |
+
# 1. stdlib
|
| 16 |
+
import uuid
|
| 17 |
+
from datetime import datetime, timezone
|
| 18 |
+
|
| 19 |
+
# 2. third-party
|
| 20 |
+
import pytest
|
| 21 |
+
|
| 22 |
+
# 3. local
|
| 23 |
+
import app.api.v1.models_api as models_api_module
|
| 24 |
+
from app.models.corpus import CorpusModel
|
| 25 |
+
from app.schemas.model_config import ModelInfo, ProviderType
|
| 26 |
+
from tests.conftest_api import async_client, db_session # noqa: F401
|
| 27 |
+
|
| 28 |
+
_NOW = datetime.now(timezone.utc)
|
| 29 |
+
|
| 30 |
+
_MOCK_MODELS = [
|
| 31 |
+
ModelInfo(
|
| 32 |
+
model_id="gemini-2.0-flash",
|
| 33 |
+
display_name="Gemini 2.0 Flash",
|
| 34 |
+
provider=ProviderType.GOOGLE_AI_STUDIO,
|
| 35 |
+
supports_vision=True,
|
| 36 |
+
input_token_limit=1_000_000,
|
| 37 |
+
output_token_limit=8192,
|
| 38 |
+
),
|
| 39 |
+
ModelInfo(
|
| 40 |
+
model_id="gemini-1.5-pro",
|
| 41 |
+
display_name="Gemini 1.5 Pro",
|
| 42 |
+
provider=ProviderType.GOOGLE_AI_STUDIO,
|
| 43 |
+
supports_vision=True,
|
| 44 |
+
input_token_limit=2_000_000,
|
| 45 |
+
output_token_limit=8192,
|
| 46 |
+
),
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
# ---------------------------------------------------------------------------
|
| 51 |
+
# Helpers
|
| 52 |
+
# ---------------------------------------------------------------------------
|
| 53 |
+
|
| 54 |
+
async def _make_corpus(db, slug="models-test"):
|
| 55 |
+
corpus = CorpusModel(
|
| 56 |
+
id=str(uuid.uuid4()), slug=slug, title="Models Test",
|
| 57 |
+
profile_id="medieval-illuminated", created_at=_NOW, updated_at=_NOW,
|
| 58 |
+
)
|
| 59 |
+
db.add(corpus)
|
| 60 |
+
await db.commit()
|
| 61 |
+
await db.refresh(corpus)
|
| 62 |
+
return corpus
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# ---------------------------------------------------------------------------
|
| 66 |
+
# POST /api/v1/settings/api-key
|
| 67 |
+
# ---------------------------------------------------------------------------
|
| 68 |
+
|
| 69 |
+
@pytest.mark.asyncio
|
| 70 |
+
async def test_api_key_valid(async_client, monkeypatch):
|
| 71 |
+
monkeypatch.setattr(
|
| 72 |
+
models_api_module, "_validate_api_key",
|
| 73 |
+
lambda key, provider: (True, 3, None),
|
| 74 |
+
)
|
| 75 |
+
response = await async_client.post(
|
| 76 |
+
"/api/v1/settings/api-key",
|
| 77 |
+
json={"api_key": "AIza-test-key", "provider_type": "google_ai_studio"},
|
| 78 |
+
)
|
| 79 |
+
assert response.status_code == 200
|
| 80 |
+
data = response.json()
|
| 81 |
+
assert data["valid"] is True
|
| 82 |
+
assert data["model_count"] == 3
|
| 83 |
+
assert data["provider"] == "google_ai_studio"
|
| 84 |
+
assert data["error"] is None
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
@pytest.mark.asyncio
|
| 88 |
+
async def test_api_key_invalid(async_client, monkeypatch):
|
| 89 |
+
monkeypatch.setattr(
|
| 90 |
+
models_api_module, "_validate_api_key",
|
| 91 |
+
lambda key, provider: (False, 0, "API key not valid"),
|
| 92 |
+
)
|
| 93 |
+
response = await async_client.post(
|
| 94 |
+
"/api/v1/settings/api-key",
|
| 95 |
+
json={"api_key": "bad-key", "provider_type": "google_ai_studio"},
|
| 96 |
+
)
|
| 97 |
+
assert response.status_code == 200
|
| 98 |
+
data = response.json()
|
| 99 |
+
assert data["valid"] is False
|
| 100 |
+
assert data["model_count"] == 0
|
| 101 |
+
assert data["error"] is not None
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
@pytest.mark.asyncio
|
| 105 |
+
async def test_api_key_not_stored_in_db(async_client, db_session, monkeypatch):
|
| 106 |
+
"""La clé ne doit apparaître nulle part dans la BDD (R06)."""
|
| 107 |
+
monkeypatch.setattr(
|
| 108 |
+
models_api_module, "_validate_api_key",
|
| 109 |
+
lambda key, provider: (True, 2, None),
|
| 110 |
+
)
|
| 111 |
+
await async_client.post(
|
| 112 |
+
"/api/v1/settings/api-key",
|
| 113 |
+
json={"api_key": "secret-key-AIza123", "provider_type": "google_ai_studio"},
|
| 114 |
+
)
|
| 115 |
+
# Vérifie que la clé n'est pas dans model_configs
|
| 116 |
+
from sqlalchemy import text
|
| 117 |
+
result = await db_session.execute(text("SELECT * FROM model_configs"))
|
| 118 |
+
rows = result.fetchall()
|
| 119 |
+
for row in rows:
|
| 120 |
+
row_str = str(row)
|
| 121 |
+
assert "secret-key-AIza123" not in row_str
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
@pytest.mark.asyncio
|
| 125 |
+
async def test_api_key_missing_body_422(async_client):
|
| 126 |
+
response = await async_client.post("/api/v1/settings/api-key", json={})
|
| 127 |
+
assert response.status_code == 422
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
@pytest.mark.asyncio
|
| 131 |
+
async def test_api_key_default_provider_type(async_client, monkeypatch):
|
| 132 |
+
"""provider_type est optionnel (default: google_ai_studio)."""
|
| 133 |
+
monkeypatch.setattr(
|
| 134 |
+
models_api_module, "_validate_api_key",
|
| 135 |
+
lambda key, provider: (True, 1, None),
|
| 136 |
+
)
|
| 137 |
+
response = await async_client.post(
|
| 138 |
+
"/api/v1/settings/api-key",
|
| 139 |
+
json={"api_key": "AIza-test"},
|
| 140 |
+
)
|
| 141 |
+
assert response.status_code == 200
|
| 142 |
+
assert response.json()["provider"] == "google_ai_studio"
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
# ---------------------------------------------------------------------------
|
| 146 |
+
# GET /api/v1/models
|
| 147 |
+
# ---------------------------------------------------------------------------
|
| 148 |
+
|
| 149 |
+
@pytest.mark.asyncio
|
| 150 |
+
async def test_get_models_returns_list(async_client, monkeypatch):
|
| 151 |
+
monkeypatch.setattr(
|
| 152 |
+
models_api_module, "list_all_models", lambda: _MOCK_MODELS
|
| 153 |
+
)
|
| 154 |
+
response = await async_client.get("/api/v1/models")
|
| 155 |
+
assert response.status_code == 200
|
| 156 |
+
assert isinstance(response.json(), list)
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
@pytest.mark.asyncio
|
| 160 |
+
async def test_get_models_count(async_client, monkeypatch):
|
| 161 |
+
monkeypatch.setattr(
|
| 162 |
+
models_api_module, "list_all_models", lambda: _MOCK_MODELS
|
| 163 |
+
)
|
| 164 |
+
models = response = await async_client.get("/api/v1/models")
|
| 165 |
+
assert len(response.json()) == 2
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
@pytest.mark.asyncio
|
| 169 |
+
async def test_get_models_fields(async_client, monkeypatch):
|
| 170 |
+
monkeypatch.setattr(
|
| 171 |
+
models_api_module, "list_all_models", lambda: _MOCK_MODELS
|
| 172 |
+
)
|
| 173 |
+
models = (await async_client.get("/api/v1/models")).json()
|
| 174 |
+
m = models[0]
|
| 175 |
+
assert "model_id" in m
|
| 176 |
+
assert "display_name" in m
|
| 177 |
+
assert "provider" in m
|
| 178 |
+
assert "supports_vision" in m
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
@pytest.mark.asyncio
|
| 182 |
+
async def test_get_models_empty_when_no_provider(async_client, monkeypatch):
|
| 183 |
+
monkeypatch.setattr(models_api_module, "list_all_models", lambda: [])
|
| 184 |
+
response = await async_client.get("/api/v1/models")
|
| 185 |
+
assert response.status_code == 200
|
| 186 |
+
assert response.json() == []
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
@pytest.mark.asyncio
|
| 190 |
+
async def test_get_models_contains_gemini(async_client, monkeypatch):
|
| 191 |
+
monkeypatch.setattr(
|
| 192 |
+
models_api_module, "list_all_models", lambda: _MOCK_MODELS
|
| 193 |
+
)
|
| 194 |
+
models = (await async_client.get("/api/v1/models")).json()
|
| 195 |
+
ids = [m["model_id"] for m in models]
|
| 196 |
+
assert any("gemini" in mid for mid in ids)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
# ---------------------------------------------------------------------------
|
| 200 |
+
# POST /api/v1/models/refresh
|
| 201 |
+
# ---------------------------------------------------------------------------
|
| 202 |
+
|
| 203 |
+
@pytest.mark.asyncio
|
| 204 |
+
async def test_refresh_models_ok(async_client, monkeypatch):
|
| 205 |
+
monkeypatch.setattr(
|
| 206 |
+
models_api_module, "list_all_models", lambda: _MOCK_MODELS
|
| 207 |
+
)
|
| 208 |
+
response = await async_client.post("/api/v1/models/refresh")
|
| 209 |
+
assert response.status_code == 200
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
@pytest.mark.asyncio
|
| 213 |
+
async def test_refresh_models_has_timestamp(async_client, monkeypatch):
|
| 214 |
+
monkeypatch.setattr(
|
| 215 |
+
models_api_module, "list_all_models", lambda: _MOCK_MODELS
|
| 216 |
+
)
|
| 217 |
+
data = (await async_client.post("/api/v1/models/refresh")).json()
|
| 218 |
+
assert "refreshed_at" in data
|
| 219 |
+
assert data["refreshed_at"] # non-vide
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
@pytest.mark.asyncio
|
| 223 |
+
async def test_refresh_models_count(async_client, monkeypatch):
|
| 224 |
+
monkeypatch.setattr(
|
| 225 |
+
models_api_module, "list_all_models", lambda: _MOCK_MODELS
|
| 226 |
+
)
|
| 227 |
+
data = (await async_client.post("/api/v1/models/refresh")).json()
|
| 228 |
+
assert data["count"] == 2
|
| 229 |
+
assert len(data["models"]) == 2
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
@pytest.mark.asyncio
|
| 233 |
+
async def test_refresh_models_structure(async_client, monkeypatch):
|
| 234 |
+
monkeypatch.setattr(
|
| 235 |
+
models_api_module, "list_all_models", lambda: _MOCK_MODELS
|
| 236 |
+
)
|
| 237 |
+
data = (await async_client.post("/api/v1/models/refresh")).json()
|
| 238 |
+
assert "models" in data
|
| 239 |
+
assert "count" in data
|
| 240 |
+
assert "refreshed_at" in data
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
# ---------------------------------------------------------------------------
|
| 244 |
+
# PUT /api/v1/corpora/{id}/model
|
| 245 |
+
# ---------------------------------------------------------------------------
|
| 246 |
+
|
| 247 |
+
@pytest.mark.asyncio
|
| 248 |
+
async def test_set_model_corpus_not_found(async_client):
|
| 249 |
+
response = await async_client.put(
|
| 250 |
+
"/api/v1/corpora/nonexistent/model",
|
| 251 |
+
json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
|
| 252 |
+
)
|
| 253 |
+
assert response.status_code == 404
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
@pytest.mark.asyncio
|
| 257 |
+
async def test_set_model_ok(async_client, db_session):
|
| 258 |
+
corpus = await _make_corpus(db_session)
|
| 259 |
+
response = await async_client.put(
|
| 260 |
+
f"/api/v1/corpora/{corpus.id}/model",
|
| 261 |
+
json={
|
| 262 |
+
"model_id": "gemini-2.0-flash",
|
| 263 |
+
"provider_type": "google_ai_studio",
|
| 264 |
+
"display_name": "Gemini 2.0 Flash",
|
| 265 |
+
},
|
| 266 |
+
)
|
| 267 |
+
assert response.status_code == 200
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
@pytest.mark.asyncio
|
| 271 |
+
async def test_set_model_response_fields(async_client, db_session):
|
| 272 |
+
corpus = await _make_corpus(db_session)
|
| 273 |
+
data = (await async_client.put(
|
| 274 |
+
f"/api/v1/corpora/{corpus.id}/model",
|
| 275 |
+
json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
|
| 276 |
+
)).json()
|
| 277 |
+
|
| 278 |
+
assert data["corpus_id"] == corpus.id
|
| 279 |
+
assert data["selected_model_id"] == "gemini-2.0-flash"
|
| 280 |
+
assert data["provider_type"] == "google_ai_studio"
|
| 281 |
+
assert "updated_at" in data
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
@pytest.mark.asyncio
|
| 285 |
+
async def test_set_model_update_existing(async_client, db_session):
|
| 286 |
+
"""PUT sur un corpus déjà configuré → mise à jour (pas de doublon)."""
|
| 287 |
+
corpus = await _make_corpus(db_session)
|
| 288 |
+
|
| 289 |
+
await async_client.put(
|
| 290 |
+
f"/api/v1/corpora/{corpus.id}/model",
|
| 291 |
+
json={"model_id": "gemini-1.5-pro", "provider_type": "google_ai_studio"},
|
| 292 |
+
)
|
| 293 |
+
resp2 = await async_client.put(
|
| 294 |
+
f"/api/v1/corpora/{corpus.id}/model",
|
| 295 |
+
json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
|
| 296 |
+
)
|
| 297 |
+
data = resp2.json()
|
| 298 |
+
assert data["selected_model_id"] == "gemini-2.0-flash"
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
@pytest.mark.asyncio
|
| 302 |
+
async def test_set_model_then_get(async_client, db_session):
|
| 303 |
+
"""Après PUT, GET retourne le même modèle."""
|
| 304 |
+
corpus = await _make_corpus(db_session)
|
| 305 |
+
await async_client.put(
|
| 306 |
+
f"/api/v1/corpora/{corpus.id}/model",
|
| 307 |
+
json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
|
| 308 |
+
)
|
| 309 |
+
get_data = (await async_client.get(f"/api/v1/corpora/{corpus.id}/model")).json()
|
| 310 |
+
assert get_data["selected_model_id"] == "gemini-2.0-flash"
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
@pytest.mark.asyncio
|
| 314 |
+
async def test_set_model_display_name_fallback(async_client, db_session):
|
| 315 |
+
"""Sans display_name, l'id est utilisé comme display_name."""
|
| 316 |
+
corpus = await _make_corpus(db_session)
|
| 317 |
+
data = (await async_client.put(
|
| 318 |
+
f"/api/v1/corpora/{corpus.id}/model",
|
| 319 |
+
json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
|
| 320 |
+
)).json()
|
| 321 |
+
assert data["selected_model_display_name"] == "gemini-2.0-flash"
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
# ---------------------------------------------------------------------------
|
| 325 |
+
# GET /api/v1/corpora/{id}/model
|
| 326 |
+
# ---------------------------------------------------------------------------
|
| 327 |
+
|
| 328 |
+
@pytest.mark.asyncio
|
| 329 |
+
async def test_get_model_corpus_not_found(async_client):
|
| 330 |
+
response = await async_client.get("/api/v1/corpora/nonexistent/model")
|
| 331 |
+
assert response.status_code == 404
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
@pytest.mark.asyncio
|
| 335 |
+
async def test_get_model_not_configured(async_client, db_session):
|
| 336 |
+
"""Corpus sans modèle configuré → 404."""
|
| 337 |
+
corpus = await _make_corpus(db_session)
|
| 338 |
+
response = await async_client.get(f"/api/v1/corpora/{corpus.id}/model")
|
| 339 |
+
assert response.status_code == 404
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
@pytest.mark.asyncio
|
| 343 |
+
async def test_get_model_ok(async_client, db_session):
|
| 344 |
+
corpus = await _make_corpus(db_session)
|
| 345 |
+
await async_client.put(
|
| 346 |
+
f"/api/v1/corpora/{corpus.id}/model",
|
| 347 |
+
json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
|
| 348 |
+
)
|
| 349 |
+
response = await async_client.get(f"/api/v1/corpora/{corpus.id}/model")
|
| 350 |
+
assert response.status_code == 200
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
@pytest.mark.asyncio
|
| 354 |
+
async def test_get_model_fields(async_client, db_session):
|
| 355 |
+
corpus = await _make_corpus(db_session)
|
| 356 |
+
await async_client.put(
|
| 357 |
+
f"/api/v1/corpora/{corpus.id}/model",
|
| 358 |
+
json={"model_id": "gemini-1.5-pro", "provider_type": "google_ai_studio", "display_name": "Gemini 1.5 Pro"},
|
| 359 |
+
)
|
| 360 |
+
data = (await async_client.get(f"/api/v1/corpora/{corpus.id}/model")).json()
|
| 361 |
+
assert data["corpus_id"] == corpus.id
|
| 362 |
+
assert data["selected_model_id"] == "gemini-1.5-pro"
|
| 363 |
+
assert data["selected_model_display_name"] == "Gemini 1.5 Pro"
|
| 364 |
+
assert data["provider_type"] == "google_ai_studio"
|
| 365 |
+
assert "updated_at" in data
|