Claude commited on
Commit
ed52286
·
unverified ·
1 Parent(s): bd09498

feat(sprint4-session-b): endpoints jobs, ingestion et modèles IA

Browse files

Nouveaux modèles SQLAlchemy :
- models/job.py : JobModel (id, corpus_id, page_id, status
pending/running/done/failed, started_at, finished_at, error_message)
- models/model_config_db.py : ModelConfigDB (corpus_id PK, provider_type,
selected_model_id, selected_model_display_name, updated_at)
- models/__init__.py : enregistrement JobModel + ModelConfigDB dans Base

Nouveaux routers (R10 — /api/v1/) :
- api/v1/jobs.py :
POST /corpora/{id}/run → 1 job par page du corpus (202, immédiat)
POST /pages/{id}/run → 1 job pour la page (202, immédiat)
GET /jobs/{id} → état du job (404 si inconnu)
POST /jobs/{id}/retry → relance FAILED→pending (409 si non-FAILED)
- api/v1/ingest.py :
POST /corpora/{id}/ingest/files → multipart, pages créées
POST /corpora/{id}/ingest/iiif-manifest → manifest IIIF 3.0 + 2.x
POST /corpora/{id}/ingest/iiif-images → liste d'URLs directes
- api/v1/models_api.py :
POST /settings/api-key → validation sans stockage (R06)
GET /models → list_all_models() mockable
POST /models/refresh → idem + refreshed_at
PUT /corpora/{id}/model → ModelConfigDB upsert
GET /corpora/{id}/model → config active (404 si absent)

Infrastructure :
- python-multipart ajouté à pyproject.toml (requis pour UploadFile)
- _fetch_json_manifest et _validate_api_key isolés pour être patchables
dans les tests sans dépendances réseau

Tests (61 nouveaux) :
- test_api_jobs.py : corpus.run, pages.run, get_job, retry (409/200)
- test_api_ingest.py : files (disk write vérifié), IIIF 3.0/2.x, images,
erreurs 404/422/502 ; mock _fetch_json_manifest
- test_api_models.py : api-key (R06 vérifié), models list/refresh,
PUT/GET model ; mock list_all_models + _validate_api_key

Total : 457 tests passent, 3 skippés (intégration réseau).

https://claude.ai/code/session_018woyEHc8HG2th7V4ewJ4Kg

backend/app/api/v1/ingest.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Endpoints d'ingestion de corpus (R10 — préfixe /api/v1/).
3
+
4
+ POST /api/v1/corpora/{id}/ingest/files
5
+ POST /api/v1/corpora/{id}/ingest/iiif-manifest
6
+ POST /api/v1/corpora/{id}/ingest/iiif-images
7
+
8
+ Règle (R01) : aucune logique spécifique à un corpus particulier.
9
+ Règle : ingestion = création des PageModel en BDD uniquement.
10
+ L'analyse IA est déclenchée séparément via /run.
11
+ """
12
+ # 1. stdlib
13
+ import logging
14
+ import uuid
15
+ from pathlib import Path
16
+
17
+ # 2. third-party
18
+ import httpx
19
+ from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
20
+ from pydantic import BaseModel
21
+ from sqlalchemy import func, select
22
+ from sqlalchemy.ext.asyncio import AsyncSession
23
+
24
+ # 3. local
25
+ from app import config as _config_module
26
+ from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
27
+ from app.models.database import get_db
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ router = APIRouter(tags=["ingestion"])
32
+
33
+
34
+ # ── Schémas ───────────────────────────────────────────────────────────────────
35
+
36
+ class IIIFManifestRequest(BaseModel):
37
+ manifest_url: str
38
+
39
+
40
+ class IIIFImagesRequest(BaseModel):
41
+ urls: list[str]
42
+ folio_labels: list[str]
43
+
44
+
45
+ class IngestResponse(BaseModel):
46
+ corpus_id: str
47
+ manuscript_id: str
48
+ pages_created: int
49
+ page_ids: list[str]
50
+
51
+
52
+ # ── Helpers ───────────────────────────────────────────────────────────────────
53
+
54
+ async def _get_corpus_or_404(corpus_id: str, db: AsyncSession) -> CorpusModel:
55
+ corpus = await db.get(CorpusModel, corpus_id)
56
+ if corpus is None:
57
+ raise HTTPException(status_code=404, detail="Corpus introuvable")
58
+ return corpus
59
+
60
+
61
+ async def _get_or_create_manuscript(
62
+ db: AsyncSession, corpus_id: str, title: str | None = None
63
+ ) -> ManuscriptModel:
64
+ """Retourne le premier manuscrit du corpus, ou en crée un par défaut."""
65
+ result = await db.execute(
66
+ select(ManuscriptModel).where(ManuscriptModel.corpus_id == corpus_id).limit(1)
67
+ )
68
+ ms = result.scalar_one_or_none()
69
+ if ms is not None:
70
+ return ms
71
+
72
+ corpus = await db.get(CorpusModel, corpus_id)
73
+ ms = ManuscriptModel(
74
+ id=str(uuid.uuid4()),
75
+ corpus_id=corpus_id,
76
+ title=title or (corpus.title if corpus else corpus_id),
77
+ total_pages=0,
78
+ )
79
+ db.add(ms)
80
+ await db.flush()
81
+ return ms
82
+
83
+
84
+ async def _next_sequence(db: AsyncSession, manuscript_id: str) -> int:
85
+ """Retourne le prochain numéro de séquence disponible (max + 1, ou 1)."""
86
+ result = await db.execute(
87
+ select(func.max(PageModel.sequence)).where(
88
+ PageModel.manuscript_id == manuscript_id
89
+ )
90
+ )
91
+ max_seq = result.scalar_one_or_none()
92
+ return (max_seq or 0) + 1
93
+
94
+
95
+ async def _create_page(
96
+ db: AsyncSession,
97
+ manuscript_id: str,
98
+ corpus_id: str,
99
+ folio_label: str,
100
+ sequence: int,
101
+ image_master_path: str | None = None,
102
+ ) -> PageModel:
103
+ page = PageModel(
104
+ id=f"{corpus_id}-{folio_label}",
105
+ manuscript_id=manuscript_id,
106
+ folio_label=folio_label,
107
+ sequence=sequence,
108
+ image_master_path=image_master_path,
109
+ processing_status="INGESTED",
110
+ )
111
+ db.add(page)
112
+ return page
113
+
114
+
115
+ async def _fetch_json_manifest(url: str) -> dict:
116
+ """Télécharge un manifest IIIF. Fonction isolée pour faciliter les tests."""
117
+ async with httpx.AsyncClient() as client:
118
+ resp = await client.get(url, follow_redirects=True, timeout=30.0)
119
+ resp.raise_for_status()
120
+ return resp.json()
121
+
122
+
123
+ def _extract_canvas_label(canvas: dict, index: int) -> str:
124
+ """Extrait le folio_label d'un canvas IIIF (3.0 ou 2.x)."""
125
+ label = canvas.get("label")
126
+ if isinstance(label, dict):
127
+ for lang in ("none", "en", "fr", "la"):
128
+ values = label.get(lang)
129
+ if values:
130
+ return (values[0] if isinstance(values, list) else str(values)).strip()
131
+ elif isinstance(label, str) and label.strip():
132
+ return label.strip()
133
+ return f"f{index + 1:03d}r"
134
+
135
+
136
+ def _extract_canvas_image_url(canvas: dict) -> str | None:
137
+ """Extrait l'URL de l'image principale d'un canvas IIIF (3.0 ou 2.x)."""
138
+ # IIIF 3.0
139
+ items = canvas.get("items") or []
140
+ if items:
141
+ ann_items = (items[0].get("items") or []) if items else []
142
+ if ann_items:
143
+ body = ann_items[0].get("body") or {}
144
+ if isinstance(body, dict):
145
+ return body.get("id") or body.get("@id")
146
+ # IIIF 2.x
147
+ images = canvas.get("images") or []
148
+ if images:
149
+ resource = images[0].get("resource") or {}
150
+ return resource.get("@id")
151
+ # Fallback : ID du canvas
152
+ return canvas.get("id") or canvas.get("@id")
153
+
154
+
155
+ # ── Endpoints ─────────────────────────────────────────────────────────────────
156
+
157
+ @router.post("/corpora/{corpus_id}/ingest/files", response_model=IngestResponse, status_code=201)
158
+ async def ingest_files(
159
+ corpus_id: str,
160
+ files: list[UploadFile] = File(...),
161
+ db: AsyncSession = Depends(get_db),
162
+ ) -> IngestResponse:
163
+ """Ingère une liste de fichiers images (multipart/form-data).
164
+
165
+ Chaque fichier crée un PageModel. Le fichier est copié dans
166
+ data/corpora/{slug}/masters/{folio_label}/{filename}.
167
+ """
168
+ corpus = await _get_corpus_or_404(corpus_id, db)
169
+ ms = await _get_or_create_manuscript(db, corpus_id)
170
+ seq = await _next_sequence(db, ms.id)
171
+
172
+ created: list[PageModel] = []
173
+ for i, upload in enumerate(files):
174
+ filename = Path(upload.filename or f"file_{i}").name
175
+ folio_label = Path(filename).stem # nom sans extension
176
+
177
+ master_dir = (
178
+ _config_module.settings.data_dir
179
+ / "corpora"
180
+ / corpus.slug
181
+ / "masters"
182
+ / folio_label
183
+ )
184
+ master_dir.mkdir(parents=True, exist_ok=True)
185
+ master_path = master_dir / filename
186
+ content = await upload.read()
187
+ master_path.write_bytes(content)
188
+
189
+ page = await _create_page(
190
+ db, ms.id, corpus.slug, folio_label, seq + i,
191
+ image_master_path=str(master_path),
192
+ )
193
+ created.append(page)
194
+
195
+ ms.total_pages = (ms.total_pages or 0) + len(created)
196
+ await db.commit()
197
+
198
+ logger.info(
199
+ "Fichiers ingérés",
200
+ extra={"corpus_id": corpus_id, "count": len(created)},
201
+ )
202
+ return IngestResponse(
203
+ corpus_id=corpus_id,
204
+ manuscript_id=ms.id,
205
+ pages_created=len(created),
206
+ page_ids=[p.id for p in created],
207
+ )
208
+
209
+
210
+ @router.post("/corpora/{corpus_id}/ingest/iiif-manifest", response_model=IngestResponse, status_code=201)
211
+ async def ingest_iiif_manifest(
212
+ corpus_id: str,
213
+ body: IIIFManifestRequest,
214
+ db: AsyncSession = Depends(get_db),
215
+ ) -> IngestResponse:
216
+ """Télécharge un manifest IIIF, extrait les canvases et crée les PageModel."""
217
+ corpus = await _get_corpus_or_404(corpus_id, db)
218
+
219
+ try:
220
+ manifest = await _fetch_json_manifest(body.manifest_url)
221
+ except httpx.HTTPStatusError as exc:
222
+ raise HTTPException(
223
+ status_code=502,
224
+ detail=f"Erreur HTTP lors du téléchargement du manifest : {exc.response.status_code}",
225
+ )
226
+ except (httpx.RequestError, httpx.TimeoutException) as exc:
227
+ raise HTTPException(
228
+ status_code=502,
229
+ detail=f"Erreur réseau lors du téléchargement du manifest : {exc}",
230
+ )
231
+
232
+ # Détecte le format IIIF (3.0 vs 2.x)
233
+ canvases: list[dict] = manifest.get("items") or []
234
+ if not canvases:
235
+ sequences = manifest.get("sequences") or []
236
+ canvases = sequences[0].get("canvases", []) if sequences else []
237
+
238
+ if not canvases:
239
+ raise HTTPException(
240
+ status_code=422,
241
+ detail="Le manifest IIIF ne contient aucun canvas (items vide)",
242
+ )
243
+
244
+ # Titre du manuscrit depuis le manifest
245
+ ms_title_raw = manifest.get("label") or {}
246
+ if isinstance(ms_title_raw, dict):
247
+ for lang in ("none", "fr", "en"):
248
+ v = ms_title_raw.get(lang)
249
+ if v:
250
+ ms_title = v[0] if isinstance(v, list) else str(v)
251
+ break
252
+ else:
253
+ ms_title = corpus.title
254
+ elif isinstance(ms_title_raw, str):
255
+ ms_title = ms_title_raw
256
+ else:
257
+ ms_title = corpus.title
258
+
259
+ ms = await _get_or_create_manuscript(db, corpus_id, title=ms_title)
260
+ seq = await _next_sequence(db, ms.id)
261
+
262
+ created: list[PageModel] = []
263
+ for i, canvas in enumerate(canvases):
264
+ folio_label = _extract_canvas_label(canvas, i)
265
+ image_url = _extract_canvas_image_url(canvas)
266
+ page = await _create_page(
267
+ db, ms.id, corpus.slug, folio_label, seq + i,
268
+ image_master_path=image_url,
269
+ )
270
+ created.append(page)
271
+
272
+ ms.total_pages = (ms.total_pages or 0) + len(created)
273
+ await db.commit()
274
+
275
+ logger.info(
276
+ "Manifest IIIF ingéré",
277
+ extra={"corpus_id": corpus_id, "url": body.manifest_url, "pages": len(created)},
278
+ )
279
+ return IngestResponse(
280
+ corpus_id=corpus_id,
281
+ manuscript_id=ms.id,
282
+ pages_created=len(created),
283
+ page_ids=[p.id for p in created],
284
+ )
285
+
286
+
287
+ @router.post("/corpora/{corpus_id}/ingest/iiif-images", response_model=IngestResponse, status_code=201)
288
+ async def ingest_iiif_images(
289
+ corpus_id: str,
290
+ body: IIIFImagesRequest,
291
+ db: AsyncSession = Depends(get_db),
292
+ ) -> IngestResponse:
293
+ """Ingère une liste d'URLs d'images IIIF directes.
294
+
295
+ urls et folio_labels doivent avoir la même longueur.
296
+ """
297
+ if len(body.urls) != len(body.folio_labels):
298
+ raise HTTPException(
299
+ status_code=422,
300
+ detail=f"urls ({len(body.urls)}) et folio_labels ({len(body.folio_labels)}) doivent avoir la même longueur",
301
+ )
302
+ if not body.urls:
303
+ raise HTTPException(status_code=422, detail="La liste d'URLs est vide")
304
+
305
+ corpus = await _get_corpus_or_404(corpus_id, db)
306
+ ms = await _get_or_create_manuscript(db, corpus_id)
307
+ seq = await _next_sequence(db, ms.id)
308
+
309
+ created: list[PageModel] = []
310
+ for i, (url, folio_label) in enumerate(zip(body.urls, body.folio_labels)):
311
+ page = await _create_page(
312
+ db, ms.id, corpus.slug, folio_label, seq + i,
313
+ image_master_path=url,
314
+ )
315
+ created.append(page)
316
+
317
+ ms.total_pages = (ms.total_pages or 0) + len(created)
318
+ await db.commit()
319
+
320
+ logger.info(
321
+ "Images IIIF ingérées",
322
+ extra={"corpus_id": corpus_id, "count": len(created)},
323
+ )
324
+ return IngestResponse(
325
+ corpus_id=corpus_id,
326
+ manuscript_id=ms.id,
327
+ pages_created=len(created),
328
+ page_ids=[p.id for p in created],
329
+ )
backend/app/api/v1/jobs.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Endpoints de gestion des jobs de traitement (R10 — préfixe /api/v1/).
3
+
4
+ POST /api/v1/corpora/{id}/run → crée un job par page du corpus
5
+ POST /api/v1/pages/{id}/run → crée un job pour une page
6
+ GET /api/v1/jobs/{job_id} → état du job
7
+ POST /api/v1/jobs/{job_id}/retry → relance un job FAILED
8
+
9
+ Règle : les jobs sont créés en BDD et retournent immédiatement.
10
+ Le pipeline réel (analyzer) sera branché en Session C.
11
+ """
12
+ # 1. stdlib
13
+ import uuid
14
+ from datetime import datetime, timezone
15
+
16
+ # 2. third-party
17
+ from fastapi import APIRouter, Depends, HTTPException
18
+ from pydantic import BaseModel, ConfigDict
19
+ from sqlalchemy import select
20
+ from sqlalchemy.ext.asyncio import AsyncSession
21
+
22
+ # 3. local
23
+ from app.models.corpus import CorpusModel, PageModel
24
+ from app.models.database import get_db
25
+ from app.models.job import JobModel
26
+
27
+ router = APIRouter(tags=["jobs"])
28
+
29
+ _JOB_STATUS_PENDING = "pending"
30
+ _JOB_STATUS_FAILED = "failed"
31
+
32
+
33
+ # ── Schémas de réponse ────────────────────────────────────────────────────────
34
+
35
+ class JobResponse(BaseModel):
36
+ model_config = ConfigDict(from_attributes=True)
37
+
38
+ id: str
39
+ corpus_id: str
40
+ page_id: str | None
41
+ status: str
42
+ started_at: datetime | None
43
+ finished_at: datetime | None
44
+ error_message: str | None
45
+ created_at: datetime
46
+
47
+
48
+ class CorpusRunResponse(BaseModel):
49
+ corpus_id: str
50
+ jobs_created: int
51
+ job_ids: list[str]
52
+
53
+
54
+ # ── Helpers ───────────────────────────────────────────────────────────────────
55
+
56
+ def _new_job(corpus_id: str, page_id: str | None) -> JobModel:
57
+ now = datetime.now(timezone.utc)
58
+ return JobModel(
59
+ id=str(uuid.uuid4()),
60
+ corpus_id=corpus_id,
61
+ page_id=page_id,
62
+ status=_JOB_STATUS_PENDING,
63
+ started_at=None,
64
+ finished_at=None,
65
+ error_message=None,
66
+ created_at=now,
67
+ )
68
+
69
+
70
+ # ── Endpoints ─────────────────────────────────────────────────────────────────
71
+
72
+ @router.post("/corpora/{corpus_id}/run", response_model=CorpusRunResponse, status_code=202)
73
+ async def run_corpus(
74
+ corpus_id: str, db: AsyncSession = Depends(get_db)
75
+ ) -> CorpusRunResponse:
76
+ """Lance le pipeline sur toutes les pages du corpus.
77
+
78
+ Crée un JobModel par page (status=pending). Retourne immédiatement.
79
+ Le pipeline réel sera branché en Session C.
80
+ """
81
+ corpus = await db.get(CorpusModel, corpus_id)
82
+ if corpus is None:
83
+ raise HTTPException(status_code=404, detail="Corpus introuvable")
84
+
85
+ from app.models.corpus import ManuscriptModel
86
+ ms_result = await db.execute(
87
+ select(ManuscriptModel).where(ManuscriptModel.corpus_id == corpus_id)
88
+ )
89
+ ms_ids = [ms.id for ms in ms_result.scalars().all()]
90
+
91
+ pages_result = await db.execute(
92
+ select(PageModel).where(PageModel.manuscript_id.in_(ms_ids))
93
+ )
94
+ pages = list(pages_result.scalars().all())
95
+
96
+ jobs = [_new_job(corpus_id, page.id) for page in pages]
97
+ for job in jobs:
98
+ db.add(job)
99
+ await db.commit()
100
+
101
+ return CorpusRunResponse(
102
+ corpus_id=corpus_id,
103
+ jobs_created=len(jobs),
104
+ job_ids=[j.id for j in jobs],
105
+ )
106
+
107
+
108
+ @router.post("/pages/{page_id}/run", response_model=JobResponse, status_code=202)
109
+ async def run_page(
110
+ page_id: str, db: AsyncSession = Depends(get_db)
111
+ ) -> JobModel:
112
+ """Lance le pipeline sur une seule page. Retourne le job créé."""
113
+ page = await db.get(PageModel, page_id)
114
+ if page is None:
115
+ raise HTTPException(status_code=404, detail="Page introuvable")
116
+
117
+ from app.models.corpus import ManuscriptModel
118
+ manuscript = await db.get(ManuscriptModel, page.manuscript_id)
119
+ if manuscript is None:
120
+ raise HTTPException(status_code=404, detail="Manuscrit introuvable")
121
+
122
+ job = _new_job(manuscript.corpus_id, page_id)
123
+ db.add(job)
124
+ await db.commit()
125
+ await db.refresh(job)
126
+ return job
127
+
128
+
129
+ @router.get("/jobs/{job_id}", response_model=JobResponse)
130
+ async def get_job(job_id: str, db: AsyncSession = Depends(get_db)) -> JobModel:
131
+ """Retourne l'état d'un job."""
132
+ job = await db.get(JobModel, job_id)
133
+ if job is None:
134
+ raise HTTPException(status_code=404, detail="Job introuvable")
135
+ return job
136
+
137
+
138
+ @router.post("/jobs/{job_id}/retry", response_model=JobResponse)
139
+ async def retry_job(job_id: str, db: AsyncSession = Depends(get_db)) -> JobModel:
140
+ """Relance un job en état FAILED (remet le status à pending).
141
+
142
+ Retourne 409 si le job n'est pas dans l'état FAILED.
143
+ """
144
+ job = await db.get(JobModel, job_id)
145
+ if job is None:
146
+ raise HTTPException(status_code=404, detail="Job introuvable")
147
+ if job.status != _JOB_STATUS_FAILED:
148
+ raise HTTPException(
149
+ status_code=409,
150
+ detail=f"Le job ne peut être relancé que depuis l'état 'failed' (statut actuel : '{job.status}')",
151
+ )
152
+ job.status = _JOB_STATUS_PENDING
153
+ job.error_message = None
154
+ job.started_at = None
155
+ job.finished_at = None
156
+ await db.commit()
157
+ await db.refresh(job)
158
+ return job
backend/app/api/v1/models_api.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Endpoints de gestion des modèles IA (R10 — préfixe /api/v1/).
3
+
4
+ POST /api/v1/settings/api-key → valide la clé sans la stocker (R06)
5
+ GET /api/v1/models → liste les modèles disponibles
6
+ POST /api/v1/models/refresh → force la mise à jour de la liste
7
+ PUT /api/v1/corpora/{id}/model → associe un modèle à un corpus
8
+ GET /api/v1/corpora/{id}/model → modèle actif d'un corpus
9
+
10
+ Règle R06 : la clé API ne transite jamais vers la BDD — elle reste
11
+ exclusivement dans les variables d'environnement.
12
+ """
13
+ # 1. stdlib
14
+ import logging
15
+ from datetime import datetime, timezone
16
+
17
+ # 2. third-party
18
+ from fastapi import APIRouter, Depends, HTTPException
19
+ from pydantic import BaseModel, ConfigDict
20
+ from sqlalchemy.ext.asyncio import AsyncSession
21
+
22
+ # 3. local
23
+ from app.models.corpus import CorpusModel
24
+ from app.models.database import get_db
25
+ from app.models.model_config_db import ModelConfigDB
26
+ from app.services.ai.model_registry import list_all_models
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ router = APIRouter(tags=["models"])
31
+
32
+
33
+ # ── Schémas ───────────────────────────────────────────────────────────────────
34
+
35
+ class ApiKeyRequest(BaseModel):
36
+ api_key: str
37
+ provider_type: str = "google_ai_studio"
38
+
39
+
40
+ class ApiKeyResponse(BaseModel):
41
+ valid: bool
42
+ provider: str
43
+ model_count: int
44
+ error: str | None = None
45
+
46
+
47
+ class ModelSelectRequest(BaseModel):
48
+ model_id: str
49
+ provider_type: str
50
+ display_name: str = ""
51
+
52
+
53
+ class ModelConfigResponse(BaseModel):
54
+ model_config = ConfigDict(from_attributes=True)
55
+
56
+ corpus_id: str
57
+ provider_type: str
58
+ selected_model_id: str
59
+ selected_model_display_name: str
60
+ updated_at: datetime
61
+
62
+
63
+ class ModelsRefreshResponse(BaseModel):
64
+ models: list[dict]
65
+ count: int
66
+ refreshed_at: datetime
67
+
68
+
69
+ # ── Validation de clé API (isolé pour les tests) ──────────────────────────────
70
+
71
+ def _validate_api_key(api_key: str, provider_type: str) -> tuple[bool, int, str | None]:
72
+ """Essaie de lister les modèles avec la clé fournie.
73
+
74
+ Retourne (valid, model_count, error_message).
75
+ Fonction isolée au niveau module pour être patchable dans les tests.
76
+ """
77
+ try:
78
+ from google import genai # import local pour éviter l'import top-level
79
+ client = genai.Client(api_key=api_key)
80
+ raw_models = list(client.models.list())
81
+ vision_count = sum(
82
+ 1 for m in raw_models if "gemini" in (getattr(m, "name", "") or "").lower()
83
+ )
84
+ return True, vision_count, None
85
+ except Exception as exc:
86
+ return False, 0, str(exc)
87
+
88
+
89
+ # ── Endpoints ─────────────────────────────────────────────────────────────────
90
+
91
+ @router.post("/settings/api-key", response_model=ApiKeyResponse)
92
+ async def validate_api_key(body: ApiKeyRequest) -> ApiKeyResponse:
93
+ """Valide qu'une clé API fonctionne (appel list_models).
94
+
95
+ La clé N'EST PAS stockée (R06). Elle reste dans les variables d'env.
96
+ """
97
+ valid, count, error = _validate_api_key(body.api_key, body.provider_type)
98
+ return ApiKeyResponse(
99
+ valid=valid,
100
+ provider=body.provider_type,
101
+ model_count=count,
102
+ error=error,
103
+ )
104
+
105
+
106
+ @router.get("/models", response_model=list[dict])
107
+ async def get_models() -> list[dict]:
108
+ """Liste tous les modèles disponibles sur les providers configurés."""
109
+ models = list_all_models()
110
+ return [m.model_dump() for m in models]
111
+
112
+
113
+ @router.post("/models/refresh", response_model=ModelsRefreshResponse)
114
+ async def refresh_models() -> ModelsRefreshResponse:
115
+ """Force la mise à jour de la liste des modèles (vide le cache implicite)."""
116
+ models = list_all_models()
117
+ return ModelsRefreshResponse(
118
+ models=[m.model_dump() for m in models],
119
+ count=len(models),
120
+ refreshed_at=datetime.now(timezone.utc),
121
+ )
122
+
123
+
124
+ @router.put("/corpora/{corpus_id}/model", response_model=ModelConfigResponse)
125
+ async def set_corpus_model(
126
+ corpus_id: str,
127
+ body: ModelSelectRequest,
128
+ db: AsyncSession = Depends(get_db),
129
+ ) -> ModelConfigDB:
130
+ """Associe un modèle IA à un corpus. Crée ou met à jour la configuration."""
131
+ corpus = await db.get(CorpusModel, corpus_id)
132
+ if corpus is None:
133
+ raise HTTPException(status_code=404, detail="Corpus introuvable")
134
+
135
+ display_name = body.display_name or body.model_id
136
+
137
+ config = await db.get(ModelConfigDB, corpus_id)
138
+ if config is None:
139
+ config = ModelConfigDB(
140
+ corpus_id=corpus_id,
141
+ provider_type=body.provider_type,
142
+ selected_model_id=body.model_id,
143
+ selected_model_display_name=display_name,
144
+ updated_at=datetime.now(timezone.utc),
145
+ )
146
+ db.add(config)
147
+ else:
148
+ config.provider_type = body.provider_type
149
+ config.selected_model_id = body.model_id
150
+ config.selected_model_display_name = display_name
151
+ config.updated_at = datetime.now(timezone.utc)
152
+
153
+ await db.commit()
154
+ await db.refresh(config)
155
+ return config
156
+
157
+
158
+ @router.get("/corpora/{corpus_id}/model", response_model=ModelConfigResponse)
159
+ async def get_corpus_model(
160
+ corpus_id: str, db: AsyncSession = Depends(get_db)
161
+ ) -> ModelConfigDB:
162
+ """Retourne la configuration du modèle IA actif pour un corpus."""
163
+ corpus = await db.get(CorpusModel, corpus_id)
164
+ if corpus is None:
165
+ raise HTTPException(status_code=404, detail="Corpus introuvable")
166
+
167
+ config = await db.get(ModelConfigDB, corpus_id)
168
+ if config is None:
169
+ raise HTTPException(
170
+ status_code=404,
171
+ detail="Aucun modèle configuré pour ce corpus",
172
+ )
173
+ return config
backend/app/main.py CHANGED
@@ -15,7 +15,7 @@ from fastapi.middleware.cors import CORSMiddleware
15
 
16
  # 3. local — on importe les modèles pour que Base.metadata les connaisse
17
  import app.models # noqa: F401 (enregistrement des modèles SQLAlchemy)
18
- from app.api.v1 import corpora, export, pages, profiles
19
  from app.models.database import Base, engine
20
 
21
  logger = logging.getLogger(__name__)
@@ -55,3 +55,6 @@ app.include_router(corpora.router, prefix=_V1_PREFIX)
55
  app.include_router(pages.router, prefix=_V1_PREFIX)
56
  app.include_router(export.router, prefix=_V1_PREFIX)
57
  app.include_router(profiles.router, prefix=_V1_PREFIX)
 
 
 
 
15
 
16
  # 3. local — on importe les modèles pour que Base.metadata les connaisse
17
  import app.models # noqa: F401 (enregistrement des modèles SQLAlchemy)
18
+ from app.api.v1 import corpora, export, ingest, jobs, models_api, pages, profiles
19
  from app.models.database import Base, engine
20
 
21
  logger = logging.getLogger(__name__)
 
55
  app.include_router(pages.router, prefix=_V1_PREFIX)
56
  app.include_router(export.router, prefix=_V1_PREFIX)
57
  app.include_router(profiles.router, prefix=_V1_PREFIX)
58
+ app.include_router(jobs.router, prefix=_V1_PREFIX)
59
+ app.include_router(ingest.router, prefix=_V1_PREFIX)
60
+ app.include_router(models_api.router, prefix=_V1_PREFIX)
backend/app/models/__init__.py CHANGED
@@ -3,5 +3,13 @@ Modèles SQLAlchemy — importés ici pour que Base.metadata les connaisse
3
  au moment de la création des tables (Base.metadata.create_all).
4
  """
5
  from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
 
 
6
 
7
- __all__ = ["CorpusModel", "ManuscriptModel", "PageModel"]
 
 
 
 
 
 
 
3
  au moment de la création des tables (Base.metadata.create_all).
4
  """
5
  from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
6
+ from app.models.job import JobModel
7
+ from app.models.model_config_db import ModelConfigDB
8
 
9
+ __all__ = [
10
+ "CorpusModel",
11
+ "ManuscriptModel",
12
+ "PageModel",
13
+ "JobModel",
14
+ "ModelConfigDB",
15
+ ]
backend/app/models/job.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Modèle SQLAlchemy 2.0 — table des jobs de traitement.
3
+
4
+ Un job suit l'exécution du pipeline sur une page.
5
+ corpus.run → crée un JobModel par page du corpus (page_id renseigné)
6
+ pages.run → crée un JobModel pour la page cible
7
+
8
+ Cycle de vie :
9
+ pending → running → done
10
+ ↘ failed
11
+ """
12
+ # 1. stdlib
13
+ from datetime import datetime
14
+
15
+ # 2. third-party
16
+ from sqlalchemy import DateTime, ForeignKey, String, Text
17
+ from sqlalchemy.orm import Mapped, mapped_column, relationship
18
+
19
+ # 3. local
20
+ from app.models.database import Base
21
+
22
+
23
+ class JobModel(Base):
24
+ """Suivi d'un job de pipeline (1 job = 1 page)."""
25
+
26
+ __tablename__ = "jobs"
27
+
28
+ id: Mapped[str] = mapped_column(String, primary_key=True)
29
+ corpus_id: Mapped[str] = mapped_column(
30
+ String, ForeignKey("corpora.id"), nullable=False, index=True
31
+ )
32
+ page_id: Mapped[str | None] = mapped_column(
33
+ String, ForeignKey("pages.id"), nullable=True, index=True
34
+ )
35
+ # pending / running / done / failed
36
+ status: Mapped[str] = mapped_column(String, nullable=False, default="pending")
37
+ started_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
38
+ finished_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
39
+ error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
40
+ created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False)
backend/app/models/model_config_db.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Modèle SQLAlchemy 2.0 — configuration du modèle IA par corpus.
3
+
4
+ Une seule ligne par corpus (corpus_id = PK).
5
+ La clé API n'est JAMAIS stockée ici (R06) — elle reste dans l'environnement.
6
+ """
7
+ # 1. stdlib
8
+ from datetime import datetime
9
+
10
+ # 2. third-party
11
+ from sqlalchemy import DateTime, ForeignKey, String
12
+ from sqlalchemy.orm import Mapped, mapped_column
13
+
14
+ # 3. local
15
+ from app.models.database import Base
16
+
17
+
18
+ class ModelConfigDB(Base):
19
+ """Modèle IA sélectionné pour un corpus (1 entrée par corpus)."""
20
+
21
+ __tablename__ = "model_configs"
22
+
23
+ corpus_id: Mapped[str] = mapped_column(
24
+ String, ForeignKey("corpora.id"), primary_key=True
25
+ )
26
+ provider_type: Mapped[str] = mapped_column(String, nullable=False)
27
+ selected_model_id: Mapped[str] = mapped_column(String, nullable=False)
28
+ selected_model_display_name: Mapped[str] = mapped_column(String, nullable=False)
29
+ updated_at: Mapped[datetime] = mapped_column(DateTime, nullable=False)
backend/pyproject.toml CHANGED
@@ -18,6 +18,7 @@ dependencies = [
18
  "httpx>=0.27",
19
  "lxml>=5.2",
20
  "Pillow>=10.3",
 
21
  ]
22
 
23
  [project.optional-dependencies]
 
18
  "httpx>=0.27",
19
  "lxml>=5.2",
20
  "Pillow>=10.3",
21
+ "python-multipart>=0.0.9",
22
  ]
23
 
24
  [project.optional-dependencies]
backend/tests/test_api_ingest.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests des endpoints d'ingestion /api/v1/corpora/{id}/ingest/* (Sprint 4 — Session B).
3
+
4
+ Stratégie :
5
+ - BDD SQLite en mémoire
6
+ - Appels réseau mockés via monkeypatch (_fetch_json_manifest)
7
+ - Écriture disque mockée via monkeypatch (Path.mkdir, Path.write_bytes)
8
+
9
+ Vérifie :
10
+ - POST /ingest/files → pages créées, IDs retournés
11
+ - POST /ingest/iiif-manifest → manifest parsé, pages créées
12
+ - POST /ingest/iiif-images → pages créées depuis liste d'URLs
13
+ - 404 si corpus inexistant
14
+ - 422 si données invalides
15
+ """
16
+ # 1. stdlib
17
+ import uuid
18
+ from datetime import datetime, timezone
19
+ from pathlib import Path
20
+ from unittest.mock import AsyncMock, patch
21
+
22
+ # 2. third-party
23
+ import pytest
24
+
25
+ # 3. local
26
+ import app.api.v1.ingest as ingest_module
27
+ from app.models.corpus import CorpusModel
28
+ from tests.conftest_api import async_client, db_session # noqa: F401
29
+
30
+ _NOW = datetime.now(timezone.utc)
31
+
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Helpers
35
+ # ---------------------------------------------------------------------------
36
+
37
+ async def _make_corpus(db, slug="test-ingest"):
38
+ corpus = CorpusModel(
39
+ id=str(uuid.uuid4()), slug=slug, title="Corpus Test",
40
+ profile_id="medieval-illuminated", created_at=_NOW, updated_at=_NOW,
41
+ )
42
+ db.add(corpus)
43
+ await db.commit()
44
+ await db.refresh(corpus)
45
+ return corpus
46
+
47
+
48
+ def _iiif3_manifest(n_canvases: int = 3) -> dict:
49
+ """Génère un manifest IIIF 3.0 minimal avec n canvases."""
50
+ return {
51
+ "@context": "http://iiif.io/api/presentation/3/context.json",
52
+ "id": "https://example.com/manifest",
53
+ "type": "Manifest",
54
+ "label": {"fr": ["Beatus de Saint-Sever"]},
55
+ "items": [
56
+ {
57
+ "id": f"https://example.com/canvas/{i}",
58
+ "type": "Canvas",
59
+ "label": {"none": [f"f{i:03d}r"]},
60
+ "width": 1500, "height": 2000,
61
+ "items": [
62
+ {
63
+ "id": f"https://example.com/canvas/{i}/page",
64
+ "type": "AnnotationPage",
65
+ "items": [
66
+ {
67
+ "id": f"https://example.com/canvas/{i}/annotation",
68
+ "type": "Annotation",
69
+ "motivation": "painting",
70
+ "body": {
71
+ "id": f"https://example.com/images/{i}.jpg",
72
+ "type": "Image",
73
+ "format": "image/jpeg",
74
+ },
75
+ "target": f"https://example.com/canvas/{i}",
76
+ }
77
+ ],
78
+ }
79
+ ],
80
+ }
81
+ for i in range(1, n_canvases + 1)
82
+ ],
83
+ }
84
+
85
+
86
+ def _iiif2_manifest(n_canvases: int = 2) -> dict:
87
+ """Génère un manifest IIIF 2.x minimal."""
88
+ return {
89
+ "@context": "http://iiif.io/api/presentation/2/context.json",
90
+ "@type": "sc:Manifest",
91
+ "label": "Test Manuscript 2.x",
92
+ "sequences": [
93
+ {
94
+ "canvases": [
95
+ {
96
+ "@id": f"https://example.com/canvas/{i}",
97
+ "@type": "sc:Canvas",
98
+ "label": f"f{i:03d}r",
99
+ "images": [
100
+ {
101
+ "resource": {
102
+ "@id": f"https://example.com/images/{i}.jpg"
103
+ }
104
+ }
105
+ ],
106
+ }
107
+ for i in range(1, n_canvases + 1)
108
+ ]
109
+ }
110
+ ],
111
+ }
112
+
113
+
114
+ # ---------------------------------------------------------------------------
115
+ # POST /api/v1/corpora/{id}/ingest/files
116
+ # ---------------------------------------------------------------------------
117
+
118
+ @pytest.mark.asyncio
119
+ async def test_ingest_files_corpus_not_found(async_client):
120
+ response = await async_client.post(
121
+ "/api/v1/corpora/nonexistent/ingest/files",
122
+ files=[("files", ("img.jpg", b"data", "image/jpeg"))],
123
+ )
124
+ assert response.status_code == 404
125
+
126
+
127
+ @pytest.mark.asyncio
128
+ async def test_ingest_files_ok(async_client, db_session, tmp_path, monkeypatch):
129
+ corpus = await _make_corpus(db_session)
130
+ monkeypatch.setattr(_config_module := __import__("app.config", fromlist=["config"]), "settings",
131
+ type("S", (), {"data_dir": tmp_path})())
132
+
133
+ import app.config as _cfg
134
+ import app.api.v1.ingest as _ingest
135
+ original_data_dir = _cfg.settings.data_dir
136
+ _cfg.settings.data_dir = tmp_path
137
+
138
+ try:
139
+ response = await async_client.post(
140
+ f"/api/v1/corpora/{corpus.id}/ingest/files",
141
+ files=[
142
+ ("files", ("f001r.jpg", b"fake_jpeg_data_1", "image/jpeg")),
143
+ ("files", ("f002r.jpg", b"fake_jpeg_data_2", "image/jpeg")),
144
+ ],
145
+ )
146
+ assert response.status_code == 201
147
+ data = response.json()
148
+ assert data["pages_created"] == 2
149
+ assert len(data["page_ids"]) == 2
150
+ assert data["corpus_id"] == corpus.id
151
+ finally:
152
+ _cfg.settings.data_dir = original_data_dir
153
+
154
+
155
+ @pytest.mark.asyncio
156
+ async def test_ingest_files_creates_manuscript(async_client, db_session, tmp_path):
157
+ corpus = await _make_corpus(db_session)
158
+
159
+ import app.config as _cfg
160
+ original = _cfg.settings.data_dir
161
+ _cfg.settings.data_dir = tmp_path
162
+ try:
163
+ response = await async_client.post(
164
+ f"/api/v1/corpora/{corpus.id}/ingest/files",
165
+ files=[("files", ("f001r.jpg", b"data", "image/jpeg"))],
166
+ )
167
+ data = response.json()
168
+ assert "manuscript_id" in data
169
+ assert data["manuscript_id"] # non-vide
170
+ finally:
171
+ _cfg.settings.data_dir = original
172
+
173
+
174
+ @pytest.mark.asyncio
175
+ async def test_ingest_files_folio_from_filename(async_client, db_session, tmp_path):
176
+ """Le folio_label est dérivé du nom de fichier (sans extension)."""
177
+ corpus = await _make_corpus(db_session)
178
+
179
+ import app.config as _cfg
180
+ original = _cfg.settings.data_dir
181
+ _cfg.settings.data_dir = tmp_path
182
+ try:
183
+ response = await async_client.post(
184
+ f"/api/v1/corpora/{corpus.id}/ingest/files",
185
+ files=[("files", ("f013v.jpg", b"data", "image/jpeg"))],
186
+ )
187
+ data = response.json()
188
+ # L'ID de page contient le folio_label
189
+ assert any("f013v" in pid for pid in data["page_ids"])
190
+ finally:
191
+ _cfg.settings.data_dir = original
192
+
193
+
194
+ @pytest.mark.asyncio
195
+ async def test_ingest_files_writes_to_disk(async_client, db_session, tmp_path):
196
+ """Les fichiers sont bien écrits dans data/corpora/{slug}/masters/."""
197
+ corpus = await _make_corpus(db_session, slug="test-write")
198
+
199
+ import app.config as _cfg
200
+ original = _cfg.settings.data_dir
201
+ _cfg.settings.data_dir = tmp_path
202
+ try:
203
+ await async_client.post(
204
+ f"/api/v1/corpora/{corpus.id}/ingest/files",
205
+ files=[("files", ("f001r.jpg", b"JPEG_CONTENT", "image/jpeg"))],
206
+ )
207
+ expected = tmp_path / "corpora" / "test-write" / "masters" / "f001r" / "f001r.jpg"
208
+ assert expected.exists()
209
+ assert expected.read_bytes() == b"JPEG_CONTENT"
210
+ finally:
211
+ _cfg.settings.data_dir = original
212
+
213
+
214
+ # ---------------------------------------------------------------------------
215
+ # POST /api/v1/corpora/{id}/ingest/iiif-manifest
216
+ # ---------------------------------------------------------------------------
217
+
218
+ @pytest.mark.asyncio
219
+ async def test_ingest_manifest_corpus_not_found(async_client):
220
+ response = await async_client.post(
221
+ "/api/v1/corpora/nonexistent/ingest/iiif-manifest",
222
+ json={"manifest_url": "https://example.com/manifest"},
223
+ )
224
+ assert response.status_code == 404
225
+
226
+
227
+ @pytest.mark.asyncio
228
+ async def test_ingest_manifest_iiif3_ok(async_client, db_session, monkeypatch):
229
+ corpus = await _make_corpus(db_session)
230
+ manifest = _iiif3_manifest(n_canvases=3)
231
+
232
+ async def fake_fetch(url: str) -> dict:
233
+ return manifest
234
+
235
+ monkeypatch.setattr(ingest_module, "_fetch_json_manifest", fake_fetch)
236
+
237
+ response = await async_client.post(
238
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
239
+ json={"manifest_url": "https://example.com/manifest"},
240
+ )
241
+ assert response.status_code == 201
242
+ data = response.json()
243
+ assert data["pages_created"] == 3
244
+ assert len(data["page_ids"]) == 3
245
+
246
+
247
+ @pytest.mark.asyncio
248
+ async def test_ingest_manifest_iiif2_ok(async_client, db_session, monkeypatch):
249
+ corpus = await _make_corpus(db_session)
250
+ manifest = _iiif2_manifest(n_canvases=2)
251
+
252
+ async def fake_fetch(url: str) -> dict:
253
+ return manifest
254
+
255
+ monkeypatch.setattr(ingest_module, "_fetch_json_manifest", fake_fetch)
256
+
257
+ response = await async_client.post(
258
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
259
+ json={"manifest_url": "https://example.com/manifest"},
260
+ )
261
+ assert response.status_code == 201
262
+ assert response.json()["pages_created"] == 2
263
+
264
+
265
+ @pytest.mark.asyncio
266
+ async def test_ingest_manifest_extracts_folio_labels(async_client, db_session, monkeypatch):
267
+ """Les folio_labels sont extraits des labels des canvases."""
268
+ corpus = await _make_corpus(db_session)
269
+ manifest = _iiif3_manifest(n_canvases=2)
270
+
271
+ async def fake_fetch(url: str) -> dict:
272
+ return manifest
273
+
274
+ monkeypatch.setattr(ingest_module, "_fetch_json_manifest", fake_fetch)
275
+
276
+ data = (await async_client.post(
277
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
278
+ json={"manifest_url": "https://example.com/manifest"},
279
+ )).json()
280
+
281
+ # Canvas labels: "f001r", "f002r"
282
+ assert any("f001r" in pid for pid in data["page_ids"])
283
+ assert any("f002r" in pid for pid in data["page_ids"])
284
+
285
+
286
+ @pytest.mark.asyncio
287
+ async def test_ingest_manifest_empty_canvases_422(async_client, db_session, monkeypatch):
288
+ """Manifest sans canvases → 422."""
289
+ corpus = await _make_corpus(db_session)
290
+
291
+ async def fake_fetch(url: str) -> dict:
292
+ return {"type": "Manifest", "items": []}
293
+
294
+ monkeypatch.setattr(ingest_module, "_fetch_json_manifest", fake_fetch)
295
+
296
+ response = await async_client.post(
297
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
298
+ json={"manifest_url": "https://example.com/manifest"},
299
+ )
300
+ assert response.status_code == 422
301
+
302
+
303
+ @pytest.mark.asyncio
304
+ async def test_ingest_manifest_network_error_502(async_client, db_session, monkeypatch):
305
+ """Erreur réseau → 502."""
306
+ corpus = await _make_corpus(db_session)
307
+ import httpx
308
+
309
+ async def fake_fetch(url: str) -> dict:
310
+ raise httpx.RequestError("Connection refused")
311
+
312
+ monkeypatch.setattr(ingest_module, "_fetch_json_manifest", fake_fetch)
313
+
314
+ response = await async_client.post(
315
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
316
+ json={"manifest_url": "https://example.com/manifest"},
317
+ )
318
+ assert response.status_code == 502
319
+
320
+
321
+ @pytest.mark.asyncio
322
+ async def test_ingest_manifest_returns_corpus_id(async_client, db_session, monkeypatch):
323
+ corpus = await _make_corpus(db_session)
324
+ monkeypatch.setattr(ingest_module, "_fetch_json_manifest", AsyncMock(return_value=_iiif3_manifest(1)))
325
+
326
+ data = (await async_client.post(
327
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-manifest",
328
+ json={"manifest_url": "https://example.com/manifest"},
329
+ )).json()
330
+ assert data["corpus_id"] == corpus.id
331
+
332
+
333
+ # ---------------------------------------------------------------------------
334
+ # POST /api/v1/corpora/{id}/ingest/iiif-images
335
+ # ---------------------------------------------------------------------------
336
+
337
+ @pytest.mark.asyncio
338
+ async def test_ingest_images_corpus_not_found(async_client):
339
+ response = await async_client.post(
340
+ "/api/v1/corpora/nonexistent/ingest/iiif-images",
341
+ json={"urls": ["https://x.com/1.jpg"], "folio_labels": ["f001r"]},
342
+ )
343
+ assert response.status_code == 404
344
+
345
+
346
+ @pytest.mark.asyncio
347
+ async def test_ingest_images_ok(async_client, db_session):
348
+ corpus = await _make_corpus(db_session)
349
+ urls = ["https://example.com/img1.jpg", "https://example.com/img2.jpg"]
350
+ labels = ["f001r", "f002r"]
351
+
352
+ response = await async_client.post(
353
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
354
+ json={"urls": urls, "folio_labels": labels},
355
+ )
356
+ assert response.status_code == 201
357
+ data = response.json()
358
+ assert data["pages_created"] == 2
359
+ assert len(data["page_ids"]) == 2
360
+
361
+
362
+ @pytest.mark.asyncio
363
+ async def test_ingest_images_folio_labels_in_ids(async_client, db_session):
364
+ corpus = await _make_corpus(db_session)
365
+ response = await async_client.post(
366
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
367
+ json={
368
+ "urls": ["https://example.com/a.jpg"],
369
+ "folio_labels": ["f013v"],
370
+ },
371
+ )
372
+ data = response.json()
373
+ assert any("f013v" in pid for pid in data["page_ids"])
374
+
375
+
376
+ @pytest.mark.asyncio
377
+ async def test_ingest_images_mismatched_lengths_422(async_client, db_session):
378
+ """urls et folio_labels de longueurs différentes → 422."""
379
+ corpus = await _make_corpus(db_session)
380
+ response = await async_client.post(
381
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
382
+ json={"urls": ["https://a.com/1.jpg", "https://a.com/2.jpg"], "folio_labels": ["f001r"]},
383
+ )
384
+ assert response.status_code == 422
385
+
386
+
387
+ @pytest.mark.asyncio
388
+ async def test_ingest_images_empty_urls_422(async_client, db_session):
389
+ corpus = await _make_corpus(db_session)
390
+ response = await async_client.post(
391
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
392
+ json={"urls": [], "folio_labels": []},
393
+ )
394
+ assert response.status_code == 422
395
+
396
+
397
+ @pytest.mark.asyncio
398
+ async def test_ingest_images_pages_in_sequence_order(async_client, db_session):
399
+ """Les pages ont des séquences consécutives."""
400
+ corpus = await _make_corpus(db_session)
401
+ n = 4
402
+ urls = [f"https://example.com/{i}.jpg" for i in range(1, n + 1)]
403
+ labels = [f"f{i:03d}r" for i in range(1, n + 1)]
404
+
405
+ data = (await async_client.post(
406
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
407
+ json={"urls": urls, "folio_labels": labels},
408
+ )).json()
409
+ assert data["pages_created"] == n
410
+
411
+
412
+ @pytest.mark.asyncio
413
+ async def test_ingest_images_corpus_id_in_response(async_client, db_session):
414
+ corpus = await _make_corpus(db_session)
415
+ data = (await async_client.post(
416
+ f"/api/v1/corpora/{corpus.id}/ingest/iiif-images",
417
+ json={"urls": ["https://x.com/1.jpg"], "folio_labels": ["f001r"]},
418
+ )).json()
419
+ assert data["corpus_id"] == corpus.id
backend/tests/test_api_jobs.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests des endpoints /api/v1/jobs et /api/v1/corpora/{id}/run (Sprint 4 — Session B).
3
+
4
+ Vérifie :
5
+ - POST /api/v1/corpora/{id}/run → 202 + jobs_created + job_ids
6
+ - POST /api/v1/pages/{id}/run → 202 + job unique
7
+ - GET /api/v1/jobs/{job_id} → 200 ou 404
8
+ - POST /api/v1/jobs/{job_id}/retry → 200 (FAILED) ou 409 (autre statut)
9
+ - Isolation : corpus/page inexistants → 404
10
+ """
11
+ # 1. stdlib
12
+ import uuid
13
+ from datetime import datetime, timezone
14
+
15
+ # 2. third-party
16
+ import pytest
17
+
18
+ # 3. local
19
+ from app.models.corpus import CorpusModel, ManuscriptModel, PageModel
20
+ from app.models.job import JobModel
21
+ from tests.conftest_api import async_client, db_session # noqa: F401
22
+
23
+ _NOW = datetime.now(timezone.utc)
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Helpers — création de données de test
28
+ # ---------------------------------------------------------------------------
29
+
30
+ async def _make_corpus(db, slug="test-c"):
31
+ corpus = CorpusModel(
32
+ id=str(uuid.uuid4()), slug=slug, title="Test", profile_id="medieval-illuminated",
33
+ created_at=_NOW, updated_at=_NOW,
34
+ )
35
+ db.add(corpus)
36
+ await db.commit()
37
+ await db.refresh(corpus)
38
+ return corpus
39
+
40
+
41
+ async def _make_manuscript(db, corpus_id):
42
+ ms = ManuscriptModel(
43
+ id=str(uuid.uuid4()), corpus_id=corpus_id, title="MS", total_pages=0,
44
+ )
45
+ db.add(ms)
46
+ await db.commit()
47
+ await db.refresh(ms)
48
+ return ms
49
+
50
+
51
+ async def _make_page(db, ms_id, folio="f001r", seq=1):
52
+ page = PageModel(
53
+ id=str(uuid.uuid4()), manuscript_id=ms_id, folio_label=folio,
54
+ sequence=seq, processing_status="INGESTED",
55
+ )
56
+ db.add(page)
57
+ await db.commit()
58
+ await db.refresh(page)
59
+ return page
60
+
61
+
62
+ async def _make_failed_job(db, corpus_id, page_id=None):
63
+ """Crée un job en état FAILED pour tester retry."""
64
+ job = JobModel(
65
+ id=str(uuid.uuid4()),
66
+ corpus_id=corpus_id,
67
+ page_id=page_id,
68
+ status="failed",
69
+ error_message="Simulated failure",
70
+ created_at=_NOW,
71
+ )
72
+ db.add(job)
73
+ await db.commit()
74
+ await db.refresh(job)
75
+ return job
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # POST /api/v1/corpora/{id}/run
80
+ # ---------------------------------------------------------------------------
81
+
82
+ @pytest.mark.asyncio
83
+ async def test_run_corpus_not_found(async_client):
84
+ response = await async_client.post("/api/v1/corpora/nonexistent/run")
85
+ assert response.status_code == 404
86
+
87
+
88
+ @pytest.mark.asyncio
89
+ async def test_run_corpus_no_pages(async_client, db_session):
90
+ """Corpus sans pages → 202, jobs_created = 0."""
91
+ corpus = await _make_corpus(db_session)
92
+ response = await async_client.post(f"/api/v1/corpora/{corpus.id}/run")
93
+ assert response.status_code == 202
94
+ data = response.json()
95
+ assert data["jobs_created"] == 0
96
+ assert data["job_ids"] == []
97
+ assert data["corpus_id"] == corpus.id
98
+
99
+
100
+ @pytest.mark.asyncio
101
+ async def test_run_corpus_creates_jobs_per_page(async_client, db_session):
102
+ """Corpus avec 3 pages → 3 jobs créés."""
103
+ corpus = await _make_corpus(db_session)
104
+ ms = await _make_manuscript(db_session, corpus.id)
105
+ for i in range(3):
106
+ await _make_page(db_session, ms.id, folio=f"f{i+1:03d}r", seq=i + 1)
107
+
108
+ response = await async_client.post(f"/api/v1/corpora/{corpus.id}/run")
109
+ assert response.status_code == 202
110
+ data = response.json()
111
+ assert data["jobs_created"] == 3
112
+ assert len(data["job_ids"]) == 3
113
+
114
+
115
+ @pytest.mark.asyncio
116
+ async def test_run_corpus_job_ids_are_unique(async_client, db_session):
117
+ corpus = await _make_corpus(db_session)
118
+ ms = await _make_manuscript(db_session, corpus.id)
119
+ for i in range(2):
120
+ await _make_page(db_session, ms.id, folio=f"f{i+1:03d}r", seq=i + 1)
121
+
122
+ data = (await async_client.post(f"/api/v1/corpora/{corpus.id}/run")).json()
123
+ assert len(set(data["job_ids"])) == 2 # all unique
124
+
125
+
126
+ @pytest.mark.asyncio
127
+ async def test_run_corpus_jobs_are_pending(async_client, db_session):
128
+ """Les jobs créés par corpus.run ont le statut 'pending'."""
129
+ corpus = await _make_corpus(db_session)
130
+ ms = await _make_manuscript(db_session, corpus.id)
131
+ await _make_page(db_session, ms.id)
132
+
133
+ run_data = (await async_client.post(f"/api/v1/corpora/{corpus.id}/run")).json()
134
+ job_id = run_data["job_ids"][0]
135
+
136
+ job_data = (await async_client.get(f"/api/v1/jobs/{job_id}")).json()
137
+ assert job_data["status"] == "pending"
138
+
139
+
140
+ # ---------------------------------------------------------------------------
141
+ # POST /api/v1/pages/{id}/run
142
+ # ---------------------------------------------------------------------------
143
+
144
+ @pytest.mark.asyncio
145
+ async def test_run_page_not_found(async_client):
146
+ response = await async_client.post("/api/v1/pages/nonexistent/run")
147
+ assert response.status_code == 404
148
+
149
+
150
+ @pytest.mark.asyncio
151
+ async def test_run_page_creates_job(async_client, db_session):
152
+ corpus = await _make_corpus(db_session)
153
+ ms = await _make_manuscript(db_session, corpus.id)
154
+ page = await _make_page(db_session, ms.id)
155
+
156
+ response = await async_client.post(f"/api/v1/pages/{page.id}/run")
157
+ assert response.status_code == 202
158
+
159
+
160
+ @pytest.mark.asyncio
161
+ async def test_run_page_job_fields(async_client, db_session):
162
+ corpus = await _make_corpus(db_session)
163
+ ms = await _make_manuscript(db_session, corpus.id)
164
+ page = await _make_page(db_session, ms.id)
165
+
166
+ data = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
167
+ assert data["page_id"] == page.id
168
+ assert data["corpus_id"] == corpus.id
169
+ assert data["status"] == "pending"
170
+ assert data["started_at"] is None
171
+ assert data["finished_at"] is None
172
+ assert data["error_message"] is None
173
+
174
+
175
+ @pytest.mark.asyncio
176
+ async def test_run_page_job_id_is_uuid(async_client, db_session):
177
+ corpus = await _make_corpus(db_session)
178
+ ms = await _make_manuscript(db_session, corpus.id)
179
+ page = await _make_page(db_session, ms.id)
180
+
181
+ data = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
182
+ assert len(data["id"]) == 36
183
+
184
+
185
+ @pytest.mark.asyncio
186
+ async def test_run_page_multiple_times_creates_multiple_jobs(async_client, db_session):
187
+ """Lancer run sur la même page deux fois crée deux jobs distincts."""
188
+ corpus = await _make_corpus(db_session)
189
+ ms = await _make_manuscript(db_session, corpus.id)
190
+ page = await _make_page(db_session, ms.id)
191
+
192
+ r1 = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
193
+ r2 = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
194
+ assert r1["id"] != r2["id"]
195
+
196
+
197
+ # ---------------------------------------------------------------------------
198
+ # GET /api/v1/jobs/{job_id}
199
+ # ---------------------------------------------------------------------------
200
+
201
+ @pytest.mark.asyncio
202
+ async def test_get_job_not_found(async_client):
203
+ response = await async_client.get("/api/v1/jobs/nonexistent")
204
+ assert response.status_code == 404
205
+
206
+
207
+ @pytest.mark.asyncio
208
+ async def test_get_job_ok(async_client, db_session):
209
+ corpus = await _make_corpus(db_session)
210
+ ms = await _make_manuscript(db_session, corpus.id)
211
+ page = await _make_page(db_session, ms.id)
212
+
213
+ run_data = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
214
+ job_id = run_data["id"]
215
+
216
+ response = await async_client.get(f"/api/v1/jobs/{job_id}")
217
+ assert response.status_code == 200
218
+ assert response.json()["id"] == job_id
219
+
220
+
221
+ @pytest.mark.asyncio
222
+ async def test_get_job_fields(async_client, db_session):
223
+ corpus = await _make_corpus(db_session)
224
+ ms = await _make_manuscript(db_session, corpus.id)
225
+ page = await _make_page(db_session, ms.id)
226
+
227
+ run_data = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
228
+ data = (await async_client.get(f"/api/v1/jobs/{run_data['id']}")).json()
229
+
230
+ assert "status" in data
231
+ assert "corpus_id" in data
232
+ assert "page_id" in data
233
+ assert "created_at" in data
234
+
235
+
236
+ # ---------------------------------------------------------------------------
237
+ # POST /api/v1/jobs/{job_id}/retry
238
+ # ---------------------------------------------------------------------------
239
+
240
+ @pytest.mark.asyncio
241
+ async def test_retry_job_not_found(async_client):
242
+ response = await async_client.post("/api/v1/jobs/nonexistent/retry")
243
+ assert response.status_code == 404
244
+
245
+
246
+ @pytest.mark.asyncio
247
+ async def test_retry_pending_job_409(async_client, db_session):
248
+ """Un job en état 'pending' ne peut pas être relancé."""
249
+ corpus = await _make_corpus(db_session)
250
+ ms = await _make_manuscript(db_session, corpus.id)
251
+ page = await _make_page(db_session, ms.id)
252
+
253
+ job_data = (await async_client.post(f"/api/v1/pages/{page.id}/run")).json()
254
+ response = await async_client.post(f"/api/v1/jobs/{job_data['id']}/retry")
255
+ assert response.status_code == 409
256
+
257
+
258
+ @pytest.mark.asyncio
259
+ async def test_retry_failed_job_ok(async_client, db_session):
260
+ """Un job en état 'failed' peut être relancé → status passe à 'pending'."""
261
+ corpus = await _make_corpus(db_session)
262
+ job = await _make_failed_job(db_session, corpus.id)
263
+
264
+ response = await async_client.post(f"/api/v1/jobs/{job.id}/retry")
265
+ assert response.status_code == 200
266
+ data = response.json()
267
+ assert data["status"] == "pending"
268
+
269
+
270
+ @pytest.mark.asyncio
271
+ async def test_retry_failed_job_clears_error(async_client, db_session):
272
+ corpus = await _make_corpus(db_session)
273
+ job = await _make_failed_job(db_session, corpus.id)
274
+
275
+ data = (await async_client.post(f"/api/v1/jobs/{job.id}/retry")).json()
276
+ assert data["error_message"] is None
277
+ assert data["started_at"] is None
278
+ assert data["finished_at"] is None
279
+
280
+
281
+ @pytest.mark.asyncio
282
+ async def test_retry_failed_job_is_retrievable(async_client, db_session):
283
+ """Après retry, GET /jobs/{id} reflète le nouveau statut."""
284
+ corpus = await _make_corpus(db_session)
285
+ job = await _make_failed_job(db_session, corpus.id)
286
+
287
+ await async_client.post(f"/api/v1/jobs/{job.id}/retry")
288
+ data = (await async_client.get(f"/api/v1/jobs/{job.id}")).json()
289
+ assert data["status"] == "pending"
backend/tests/test_api_models.py ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests des endpoints /api/v1/models et /api/v1/settings/api-key (Sprint 4 — Session B).
3
+
4
+ Stratégie :
5
+ - Appels Google AI mockés via monkeypatch sur _validate_api_key et list_all_models
6
+ - BDD SQLite en mémoire pour les endpoints qui touchent la BDD (PUT/GET model)
7
+
8
+ Vérifie :
9
+ - POST /api/v1/settings/api-key → valid/invalid
10
+ - GET /api/v1/models → liste mockée
11
+ - POST /api/v1/models/refresh → mise à jour + timestamp
12
+ - PUT /api/v1/corpora/{id}/model → création + mise à jour
13
+ - GET /api/v1/corpora/{id}/model → 200 ou 404
14
+ """
15
+ # 1. stdlib
16
+ import uuid
17
+ from datetime import datetime, timezone
18
+
19
+ # 2. third-party
20
+ import pytest
21
+
22
+ # 3. local
23
+ import app.api.v1.models_api as models_api_module
24
+ from app.models.corpus import CorpusModel
25
+ from app.schemas.model_config import ModelInfo, ProviderType
26
+ from tests.conftest_api import async_client, db_session # noqa: F401
27
+
28
+ _NOW = datetime.now(timezone.utc)
29
+
30
+ _MOCK_MODELS = [
31
+ ModelInfo(
32
+ model_id="gemini-2.0-flash",
33
+ display_name="Gemini 2.0 Flash",
34
+ provider=ProviderType.GOOGLE_AI_STUDIO,
35
+ supports_vision=True,
36
+ input_token_limit=1_000_000,
37
+ output_token_limit=8192,
38
+ ),
39
+ ModelInfo(
40
+ model_id="gemini-1.5-pro",
41
+ display_name="Gemini 1.5 Pro",
42
+ provider=ProviderType.GOOGLE_AI_STUDIO,
43
+ supports_vision=True,
44
+ input_token_limit=2_000_000,
45
+ output_token_limit=8192,
46
+ ),
47
+ ]
48
+
49
+
50
+ # ---------------------------------------------------------------------------
51
+ # Helpers
52
+ # ---------------------------------------------------------------------------
53
+
54
+ async def _make_corpus(db, slug="models-test"):
55
+ corpus = CorpusModel(
56
+ id=str(uuid.uuid4()), slug=slug, title="Models Test",
57
+ profile_id="medieval-illuminated", created_at=_NOW, updated_at=_NOW,
58
+ )
59
+ db.add(corpus)
60
+ await db.commit()
61
+ await db.refresh(corpus)
62
+ return corpus
63
+
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # POST /api/v1/settings/api-key
67
+ # ---------------------------------------------------------------------------
68
+
69
+ @pytest.mark.asyncio
70
+ async def test_api_key_valid(async_client, monkeypatch):
71
+ monkeypatch.setattr(
72
+ models_api_module, "_validate_api_key",
73
+ lambda key, provider: (True, 3, None),
74
+ )
75
+ response = await async_client.post(
76
+ "/api/v1/settings/api-key",
77
+ json={"api_key": "AIza-test-key", "provider_type": "google_ai_studio"},
78
+ )
79
+ assert response.status_code == 200
80
+ data = response.json()
81
+ assert data["valid"] is True
82
+ assert data["model_count"] == 3
83
+ assert data["provider"] == "google_ai_studio"
84
+ assert data["error"] is None
85
+
86
+
87
+ @pytest.mark.asyncio
88
+ async def test_api_key_invalid(async_client, monkeypatch):
89
+ monkeypatch.setattr(
90
+ models_api_module, "_validate_api_key",
91
+ lambda key, provider: (False, 0, "API key not valid"),
92
+ )
93
+ response = await async_client.post(
94
+ "/api/v1/settings/api-key",
95
+ json={"api_key": "bad-key", "provider_type": "google_ai_studio"},
96
+ )
97
+ assert response.status_code == 200
98
+ data = response.json()
99
+ assert data["valid"] is False
100
+ assert data["model_count"] == 0
101
+ assert data["error"] is not None
102
+
103
+
104
+ @pytest.mark.asyncio
105
+ async def test_api_key_not_stored_in_db(async_client, db_session, monkeypatch):
106
+ """La clé ne doit apparaître nulle part dans la BDD (R06)."""
107
+ monkeypatch.setattr(
108
+ models_api_module, "_validate_api_key",
109
+ lambda key, provider: (True, 2, None),
110
+ )
111
+ await async_client.post(
112
+ "/api/v1/settings/api-key",
113
+ json={"api_key": "secret-key-AIza123", "provider_type": "google_ai_studio"},
114
+ )
115
+ # Vérifie que la clé n'est pas dans model_configs
116
+ from sqlalchemy import text
117
+ result = await db_session.execute(text("SELECT * FROM model_configs"))
118
+ rows = result.fetchall()
119
+ for row in rows:
120
+ row_str = str(row)
121
+ assert "secret-key-AIza123" not in row_str
122
+
123
+
124
+ @pytest.mark.asyncio
125
+ async def test_api_key_missing_body_422(async_client):
126
+ response = await async_client.post("/api/v1/settings/api-key", json={})
127
+ assert response.status_code == 422
128
+
129
+
130
+ @pytest.mark.asyncio
131
+ async def test_api_key_default_provider_type(async_client, monkeypatch):
132
+ """provider_type est optionnel (default: google_ai_studio)."""
133
+ monkeypatch.setattr(
134
+ models_api_module, "_validate_api_key",
135
+ lambda key, provider: (True, 1, None),
136
+ )
137
+ response = await async_client.post(
138
+ "/api/v1/settings/api-key",
139
+ json={"api_key": "AIza-test"},
140
+ )
141
+ assert response.status_code == 200
142
+ assert response.json()["provider"] == "google_ai_studio"
143
+
144
+
145
+ # ---------------------------------------------------------------------------
146
+ # GET /api/v1/models
147
+ # ---------------------------------------------------------------------------
148
+
149
+ @pytest.mark.asyncio
150
+ async def test_get_models_returns_list(async_client, monkeypatch):
151
+ monkeypatch.setattr(
152
+ models_api_module, "list_all_models", lambda: _MOCK_MODELS
153
+ )
154
+ response = await async_client.get("/api/v1/models")
155
+ assert response.status_code == 200
156
+ assert isinstance(response.json(), list)
157
+
158
+
159
+ @pytest.mark.asyncio
160
+ async def test_get_models_count(async_client, monkeypatch):
161
+ monkeypatch.setattr(
162
+ models_api_module, "list_all_models", lambda: _MOCK_MODELS
163
+ )
164
+ models = response = await async_client.get("/api/v1/models")
165
+ assert len(response.json()) == 2
166
+
167
+
168
+ @pytest.mark.asyncio
169
+ async def test_get_models_fields(async_client, monkeypatch):
170
+ monkeypatch.setattr(
171
+ models_api_module, "list_all_models", lambda: _MOCK_MODELS
172
+ )
173
+ models = (await async_client.get("/api/v1/models")).json()
174
+ m = models[0]
175
+ assert "model_id" in m
176
+ assert "display_name" in m
177
+ assert "provider" in m
178
+ assert "supports_vision" in m
179
+
180
+
181
+ @pytest.mark.asyncio
182
+ async def test_get_models_empty_when_no_provider(async_client, monkeypatch):
183
+ monkeypatch.setattr(models_api_module, "list_all_models", lambda: [])
184
+ response = await async_client.get("/api/v1/models")
185
+ assert response.status_code == 200
186
+ assert response.json() == []
187
+
188
+
189
+ @pytest.mark.asyncio
190
+ async def test_get_models_contains_gemini(async_client, monkeypatch):
191
+ monkeypatch.setattr(
192
+ models_api_module, "list_all_models", lambda: _MOCK_MODELS
193
+ )
194
+ models = (await async_client.get("/api/v1/models")).json()
195
+ ids = [m["model_id"] for m in models]
196
+ assert any("gemini" in mid for mid in ids)
197
+
198
+
199
+ # ---------------------------------------------------------------------------
200
+ # POST /api/v1/models/refresh
201
+ # ---------------------------------------------------------------------------
202
+
203
+ @pytest.mark.asyncio
204
+ async def test_refresh_models_ok(async_client, monkeypatch):
205
+ monkeypatch.setattr(
206
+ models_api_module, "list_all_models", lambda: _MOCK_MODELS
207
+ )
208
+ response = await async_client.post("/api/v1/models/refresh")
209
+ assert response.status_code == 200
210
+
211
+
212
+ @pytest.mark.asyncio
213
+ async def test_refresh_models_has_timestamp(async_client, monkeypatch):
214
+ monkeypatch.setattr(
215
+ models_api_module, "list_all_models", lambda: _MOCK_MODELS
216
+ )
217
+ data = (await async_client.post("/api/v1/models/refresh")).json()
218
+ assert "refreshed_at" in data
219
+ assert data["refreshed_at"] # non-vide
220
+
221
+
222
+ @pytest.mark.asyncio
223
+ async def test_refresh_models_count(async_client, monkeypatch):
224
+ monkeypatch.setattr(
225
+ models_api_module, "list_all_models", lambda: _MOCK_MODELS
226
+ )
227
+ data = (await async_client.post("/api/v1/models/refresh")).json()
228
+ assert data["count"] == 2
229
+ assert len(data["models"]) == 2
230
+
231
+
232
+ @pytest.mark.asyncio
233
+ async def test_refresh_models_structure(async_client, monkeypatch):
234
+ monkeypatch.setattr(
235
+ models_api_module, "list_all_models", lambda: _MOCK_MODELS
236
+ )
237
+ data = (await async_client.post("/api/v1/models/refresh")).json()
238
+ assert "models" in data
239
+ assert "count" in data
240
+ assert "refreshed_at" in data
241
+
242
+
243
+ # ---------------------------------------------------------------------------
244
+ # PUT /api/v1/corpora/{id}/model
245
+ # ---------------------------------------------------------------------------
246
+
247
+ @pytest.mark.asyncio
248
+ async def test_set_model_corpus_not_found(async_client):
249
+ response = await async_client.put(
250
+ "/api/v1/corpora/nonexistent/model",
251
+ json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
252
+ )
253
+ assert response.status_code == 404
254
+
255
+
256
+ @pytest.mark.asyncio
257
+ async def test_set_model_ok(async_client, db_session):
258
+ corpus = await _make_corpus(db_session)
259
+ response = await async_client.put(
260
+ f"/api/v1/corpora/{corpus.id}/model",
261
+ json={
262
+ "model_id": "gemini-2.0-flash",
263
+ "provider_type": "google_ai_studio",
264
+ "display_name": "Gemini 2.0 Flash",
265
+ },
266
+ )
267
+ assert response.status_code == 200
268
+
269
+
270
+ @pytest.mark.asyncio
271
+ async def test_set_model_response_fields(async_client, db_session):
272
+ corpus = await _make_corpus(db_session)
273
+ data = (await async_client.put(
274
+ f"/api/v1/corpora/{corpus.id}/model",
275
+ json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
276
+ )).json()
277
+
278
+ assert data["corpus_id"] == corpus.id
279
+ assert data["selected_model_id"] == "gemini-2.0-flash"
280
+ assert data["provider_type"] == "google_ai_studio"
281
+ assert "updated_at" in data
282
+
283
+
284
+ @pytest.mark.asyncio
285
+ async def test_set_model_update_existing(async_client, db_session):
286
+ """PUT sur un corpus déjà configuré → mise à jour (pas de doublon)."""
287
+ corpus = await _make_corpus(db_session)
288
+
289
+ await async_client.put(
290
+ f"/api/v1/corpora/{corpus.id}/model",
291
+ json={"model_id": "gemini-1.5-pro", "provider_type": "google_ai_studio"},
292
+ )
293
+ resp2 = await async_client.put(
294
+ f"/api/v1/corpora/{corpus.id}/model",
295
+ json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
296
+ )
297
+ data = resp2.json()
298
+ assert data["selected_model_id"] == "gemini-2.0-flash"
299
+
300
+
301
+ @pytest.mark.asyncio
302
+ async def test_set_model_then_get(async_client, db_session):
303
+ """Après PUT, GET retourne le même modèle."""
304
+ corpus = await _make_corpus(db_session)
305
+ await async_client.put(
306
+ f"/api/v1/corpora/{corpus.id}/model",
307
+ json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
308
+ )
309
+ get_data = (await async_client.get(f"/api/v1/corpora/{corpus.id}/model")).json()
310
+ assert get_data["selected_model_id"] == "gemini-2.0-flash"
311
+
312
+
313
+ @pytest.mark.asyncio
314
+ async def test_set_model_display_name_fallback(async_client, db_session):
315
+ """Sans display_name, l'id est utilisé comme display_name."""
316
+ corpus = await _make_corpus(db_session)
317
+ data = (await async_client.put(
318
+ f"/api/v1/corpora/{corpus.id}/model",
319
+ json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
320
+ )).json()
321
+ assert data["selected_model_display_name"] == "gemini-2.0-flash"
322
+
323
+
324
+ # ---------------------------------------------------------------------------
325
+ # GET /api/v1/corpora/{id}/model
326
+ # ---------------------------------------------------------------------------
327
+
328
+ @pytest.mark.asyncio
329
+ async def test_get_model_corpus_not_found(async_client):
330
+ response = await async_client.get("/api/v1/corpora/nonexistent/model")
331
+ assert response.status_code == 404
332
+
333
+
334
+ @pytest.mark.asyncio
335
+ async def test_get_model_not_configured(async_client, db_session):
336
+ """Corpus sans modèle configuré → 404."""
337
+ corpus = await _make_corpus(db_session)
338
+ response = await async_client.get(f"/api/v1/corpora/{corpus.id}/model")
339
+ assert response.status_code == 404
340
+
341
+
342
+ @pytest.mark.asyncio
343
+ async def test_get_model_ok(async_client, db_session):
344
+ corpus = await _make_corpus(db_session)
345
+ await async_client.put(
346
+ f"/api/v1/corpora/{corpus.id}/model",
347
+ json={"model_id": "gemini-2.0-flash", "provider_type": "google_ai_studio"},
348
+ )
349
+ response = await async_client.get(f"/api/v1/corpora/{corpus.id}/model")
350
+ assert response.status_code == 200
351
+
352
+
353
+ @pytest.mark.asyncio
354
+ async def test_get_model_fields(async_client, db_session):
355
+ corpus = await _make_corpus(db_session)
356
+ await async_client.put(
357
+ f"/api/v1/corpora/{corpus.id}/model",
358
+ json={"model_id": "gemini-1.5-pro", "provider_type": "google_ai_studio", "display_name": "Gemini 1.5 Pro"},
359
+ )
360
+ data = (await async_client.get(f"/api/v1/corpora/{corpus.id}/model")).json()
361
+ assert data["corpus_id"] == corpus.id
362
+ assert data["selected_model_id"] == "gemini-1.5-pro"
363
+ assert data["selected_model_display_name"] == "Gemini 1.5 Pro"
364
+ assert data["provider_type"] == "google_ai_studio"
365
+ assert "updated_at" in data