IIIF-Studio / backend /tests /test_image_pipeline.py
Claude
fix: IIIF image fetch performance — retry with backoff, request size reduction, cursor warning
2e30fe9 unverified
"""
Tests du pipeline image : fetch IIIF + normalisation (dérivé + thumbnail).
Tests unitaires : httpx mocké, images créées en mémoire (Pillow).
Tests d'intégration : requêtes réseau réelles, activés via RUN_INTEGRATION_TESTS=1.
"""
# 1. stdlib
import io
import os
from pathlib import Path
from unittest.mock import MagicMock, patch
# 2. third-party
import httpx
import pytest
from PIL import Image
from pydantic import ValidationError
# 3. local
from app.schemas.image import ImageDerivativeInfo
from app.services.image.normalizer import (
_MAX_DERIVATIVE_PX,
_MAX_THUMBNAIL_PX,
_resize_to_max,
create_derivatives,
fetch_and_normalize,
)
from app.services.ingest.iiif_fetcher import fetch_iiif_image
# ---------------------------------------------------------------------------
# Marqueur d'intégration — activé seulement si RUN_INTEGRATION_TESTS=1
# ---------------------------------------------------------------------------
integration = pytest.mark.skipif(
not os.environ.get("RUN_INTEGRATION_TESTS"),
reason="Tests réseau réels : définir RUN_INTEGRATION_TESTS=1 pour les activer",
)
# URLs IIIF des 3 manuscrits de test (BnF Gallica)
_URL_BEATUS_HI = (
"https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432836p/f13/full/max/0/default.jpg"
)
_URL_BEATUS_LO = (
"https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432836p/f13/full/600,/0/default.jpg"
)
_URL_GRANDES_CHRONIQUES = (
"https://gallica.bnf.fr/iiif/ark:/12148/btv1b8427295k/f3/full/max/0/default.jpg"
)
# ---------------------------------------------------------------------------
# Helpers de test
# ---------------------------------------------------------------------------
def _make_jpeg_bytes(width: int, height: int, color: tuple[int, int, int] = (200, 150, 100)) -> bytes:
"""Crée un JPEG minimal en mémoire pour les tests unitaires."""
img = Image.new("RGB", (width, height), color=color)
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=85)
return buf.getvalue()
def _make_png_rgba_bytes(width: int, height: int) -> bytes:
"""Crée un PNG RGBA en mémoire (pour tester la conversion RGB)."""
img = Image.new("RGBA", (width, height), color=(100, 150, 200, 128))
buf = io.BytesIO()
img.save(buf, format="PNG")
return buf.getvalue()
# ---------------------------------------------------------------------------
# Tests — ImageDerivativeInfo (schéma)
# ---------------------------------------------------------------------------
def test_schema_valid():
info = ImageDerivativeInfo(
original_url="https://example.com/image.jpg",
original_width=3000,
original_height=4000,
derivative_path="/data/corpora/test/derivatives/0001r.jpg",
derivative_width=1125,
derivative_height=1500,
thumbnail_path="/data/corpora/test/derivatives/0001r_thumb.jpg",
thumbnail_width=192,
thumbnail_height=256,
)
assert info.original_width == 3000
assert info.derivative_width == 1125
def test_schema_missing_required_field():
with pytest.raises(ValidationError):
ImageDerivativeInfo.model_validate({"original_url": "https://x.com/img.jpg"})
def test_schema_all_fields_present():
fields = ImageDerivativeInfo.model_fields.keys()
expected = {
"original_url", "original_width", "original_height",
"derivative_path", "derivative_width", "derivative_height",
"thumbnail_path", "thumbnail_width", "thumbnail_height",
}
assert set(fields) == expected
# ---------------------------------------------------------------------------
# Tests — _resize_to_max
# ---------------------------------------------------------------------------
def test_resize_small_image_not_upscaled():
"""Une image déjà petite ne doit pas être agrandie."""
img = Image.new("RGB", (800, 600))
result = _resize_to_max(img, _MAX_DERIVATIVE_PX)
assert result.size == (800, 600)
def test_resize_exact_max_not_changed():
"""Une image dont le grand côté est exactement max_size n'est pas redimensionnée."""
img = Image.new("RGB", (1500, 1000))
result = _resize_to_max(img, _MAX_DERIVATIVE_PX)
assert result.size == (1500, 1000)
def test_resize_landscape_large():
"""Paysage 3000x2000 → 1500x1000."""
img = Image.new("RGB", (3000, 2000))
result = _resize_to_max(img, 1500)
assert result.size == (1500, 1000)
def test_resize_portrait_large():
"""Portrait 2000x3000 → 1000x1500."""
img = Image.new("RGB", (2000, 3000))
result = _resize_to_max(img, 1500)
assert result.size == (1000, 1500)
def test_resize_square_large():
"""Carré 2000x2000 → 1500x1500."""
img = Image.new("RGB", (2000, 2000))
result = _resize_to_max(img, 1500)
assert result.size == (1500, 1500)
def test_resize_preserves_aspect_ratio():
"""Le ratio d'aspect est préservé après resize."""
img = Image.new("RGB", (4000, 3000))
result = _resize_to_max(img, 1500)
w, h = result.size
assert w == 1500
assert abs(w / h - 4 / 3) < 0.01
def test_resize_returns_copy_when_no_resize_needed():
"""Retourne une copie (pas la même instance) même sans resize."""
img = Image.new("RGB", (100, 100))
result = _resize_to_max(img, 1500)
assert result is not img
def test_resize_thumbnail_size():
"""Vérification pour la taille thumbnail (256px)."""
img = Image.new("RGB", (1200, 800))
result = _resize_to_max(img, _MAX_THUMBNAIL_PX)
assert result.size[0] == 256
assert result.size[1] == 171 # round(800 * 256 / 1200) = round(170.67) = 171
# ---------------------------------------------------------------------------
# Tests — create_derivatives
# ---------------------------------------------------------------------------
def test_create_derivatives_large_landscape(tmp_path):
"""Image 3000x2000 → dérivé 1500x1000, thumbnail 256x171."""
source = _make_jpeg_bytes(3000, 2000)
info = create_derivatives(source, "https://x.com/img.jpg", "test-corpus", "0001r", tmp_path)
assert info.original_width == 3000
assert info.original_height == 2000
assert info.derivative_width == 1500
assert info.derivative_height == 1000
assert info.thumbnail_width == 256
assert info.thumbnail_height == 171
assert info.original_url == "https://x.com/img.jpg"
def test_create_derivatives_small_image_not_upscaled(tmp_path):
"""Image 600x900 (< 1500px) : dérivé conserve les dimensions originales."""
source = _make_jpeg_bytes(600, 900)
info = create_derivatives(source, "https://x.com/img.jpg", "test-corpus", "0001r", tmp_path)
assert info.derivative_width == 600
assert info.derivative_height == 900
assert info.original_width == 600
assert info.original_height == 900
def test_create_derivatives_files_exist(tmp_path):
"""Les deux fichiers JPEG sont bien créés sur disque."""
source = _make_jpeg_bytes(2000, 3000)
info = create_derivatives(source, "https://x.com/img.jpg", "corpus-a", "f001r", tmp_path)
assert Path(info.derivative_path).exists()
assert Path(info.thumbnail_path).exists()
def test_create_derivatives_path_structure(tmp_path):
"""Les chemins respectent la convention CLAUDE.md §3."""
source = _make_jpeg_bytes(1000, 1000)
info = create_derivatives(source, "https://x.com/img.jpg", "beatus-lat8878", "0013r", tmp_path)
expected_deriv = tmp_path / "corpora" / "beatus-lat8878" / "derivatives" / "0013r.jpg"
expected_thumb = tmp_path / "corpora" / "beatus-lat8878" / "derivatives" / "0013r_thumb.jpg"
assert info.derivative_path == str(expected_deriv)
assert info.thumbnail_path == str(expected_thumb)
def test_create_derivatives_output_is_jpeg(tmp_path):
"""Les fichiers produits sont bien des JPEG valides."""
source = _make_jpeg_bytes(1000, 800)
info = create_derivatives(source, "https://x.com/img.jpg", "corpus-b", "f002r", tmp_path)
with Image.open(info.derivative_path) as img:
assert img.format == "JPEG"
with Image.open(info.thumbnail_path) as img:
assert img.format == "JPEG"
def test_create_derivatives_rgba_converted_to_rgb(tmp_path):
"""Un PNG RGBA est converti en RGB sans erreur."""
source = _make_png_rgba_bytes(800, 1000)
info = create_derivatives(source, "https://x.com/img.png", "corpus-c", "f003r", tmp_path)
with Image.open(info.derivative_path) as img:
assert img.mode == "RGB"
assert info.original_width == 800
assert info.original_height == 1000
def test_create_derivatives_thumbnail_dimensions(tmp_path):
"""Le thumbnail a bien son grand côté <= 256px."""
source = _make_jpeg_bytes(3000, 4000)
info = create_derivatives(source, "https://x.com/img.jpg", "corpus-d", "f004r", tmp_path)
assert max(info.thumbnail_width, info.thumbnail_height) == _MAX_THUMBNAIL_PX
def test_create_derivatives_creates_parent_dirs(tmp_path):
"""Les dossiers intermédiaires sont créés automatiquement."""
source = _make_jpeg_bytes(500, 500)
new_slug = "nouveau-corpus-jamais-vu"
info = create_derivatives(source, "https://x.com/img.jpg", new_slug, "f001r", tmp_path)
assert Path(info.derivative_path).parent.exists()
# ---------------------------------------------------------------------------
# Tests — fetch_iiif_image
# ---------------------------------------------------------------------------
def test_fetch_iiif_image_success():
"""Retourne les bytes de l'image si la requête réussit."""
fake_bytes = _make_jpeg_bytes(100, 100)
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get, \
patch("app.services.ingest.iiif_fetcher.time.sleep"), \
patch("app.services.ingest.iiif_fetcher.time.monotonic", return_value=0.0):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.content = fake_bytes
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response
result = fetch_iiif_image("https://example.com/image.jpg")
assert result == fake_bytes
_, kwargs = mock_get.call_args
assert kwargs["follow_redirects"] is True
# Timeout is an httpx.Timeout object (connect=15s, read=60s)
assert kwargs["timeout"].connect == 15.0
assert kwargs["timeout"].read == 60.0
def test_fetch_iiif_image_http_error():
"""Propage HTTPStatusError si le serveur répond 404 (pas de retry sur 4xx hors 429)."""
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get, \
patch("app.services.ingest.iiif_fetcher.time.sleep"), \
patch("app.services.ingest.iiif_fetcher.time.monotonic", return_value=0.0):
mock_response = MagicMock()
mock_response.status_code = 404
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"404 Not Found",
request=MagicMock(),
response=MagicMock(status_code=404),
)
mock_get.return_value = mock_response
with pytest.raises(httpx.HTTPStatusError):
fetch_iiif_image("https://example.com/missing.jpg")
def test_fetch_iiif_image_timeout():
"""Propage TimeoutException après épuisement des retries."""
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get, \
patch("app.services.ingest.iiif_fetcher.time.sleep"), \
patch("app.services.ingest.iiif_fetcher.time.monotonic", return_value=0.0):
mock_get.side_effect = httpx.TimeoutException("timed out")
with pytest.raises(httpx.TimeoutException):
fetch_iiif_image("https://example.com/slow.jpg", timeout=1.0)
def test_fetch_iiif_image_custom_timeout():
"""Le timeout personnalisé est bien transmis à httpx.get."""
fake_bytes = _make_jpeg_bytes(50, 50)
with patch("app.services.ingest.iiif_fetcher.httpx.get") as mock_get, \
patch("app.services.ingest.iiif_fetcher.time.sleep"), \
patch("app.services.ingest.iiif_fetcher.time.monotonic", return_value=0.0):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.content = fake_bytes
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response
fetch_iiif_image("https://example.com/img.jpg", timeout=120.0)
_, kwargs = mock_get.call_args
# Custom timeout wraps in httpx.Timeout(120.0, connect=15.0)
assert kwargs["timeout"].read == 120.0
assert kwargs["timeout"].connect == 15.0
# ---------------------------------------------------------------------------
# Tests — fetch_and_normalize (end-to-end mocké)
# ---------------------------------------------------------------------------
def test_fetch_and_normalize_chains_correctly(tmp_path):
"""fetch_and_normalize appelle fetch_iiif_image puis create_derivatives."""
fake_bytes = _make_jpeg_bytes(2000, 1500)
with patch("app.services.image.normalizer.fetch_iiif_image", return_value=fake_bytes) as mock_fetch:
info = fetch_and_normalize(
"https://example.com/ms/f001.jpg",
"corpus-test",
"0001r",
tmp_path,
)
mock_fetch.assert_called_once_with("https://example.com/ms/f001.jpg")
assert info.original_url == "https://example.com/ms/f001.jpg"
assert info.original_width == 2000
assert info.original_height == 1500
assert info.derivative_width == 1500
assert info.derivative_height == 1125
assert Path(info.derivative_path).exists()
assert Path(info.thumbnail_path).exists()
def test_fetch_and_normalize_propagates_http_error(tmp_path):
"""Les erreurs HTTP de fetch_iiif_image sont propagées sans être masquées."""
with patch(
"app.services.image.normalizer.fetch_iiif_image",
side_effect=httpx.HTTPStatusError("403", request=MagicMock(), response=MagicMock()),
):
with pytest.raises(httpx.HTTPStatusError):
fetch_and_normalize("https://example.com/img.jpg", "corpus", "f001", tmp_path)
# ---------------------------------------------------------------------------
# Tests d'intégration — URLs IIIF BnF réelles (skippés par défaut)
# ---------------------------------------------------------------------------
@integration
def test_integration_beatus_high_res(tmp_path):
"""Beatus de Saint-Sever, BnF Latin 8878, f.13 — haute résolution."""
info = fetch_and_normalize(_URL_BEATUS_HI, "beatus-lat8878", "0013r", tmp_path)
assert info.original_width > 1500 or info.original_height > 1500
assert info.derivative_width <= _MAX_DERIVATIVE_PX
assert info.derivative_height <= _MAX_DERIVATIVE_PX
assert max(info.derivative_width, info.derivative_height) == _MAX_DERIVATIVE_PX
assert Path(info.derivative_path).exists()
assert Path(info.thumbnail_path).exists()
@integration
def test_integration_beatus_low_res(tmp_path):
"""Beatus de Saint-Sever, BnF Latin 8878, f.13 — 600px (image déjà petite)."""
info = fetch_and_normalize(_URL_BEATUS_LO, "beatus-lat8878", "0013r-600", tmp_path)
# Image à 600px de large : pas d'upscaling, dérivé == original
assert info.derivative_width <= _MAX_DERIVATIVE_PX
assert info.derivative_height <= _MAX_DERIVATIVE_PX
assert max(info.derivative_width, info.derivative_height) <= 600
assert Path(info.derivative_path).exists()
@integration
def test_integration_grandes_chroniques(tmp_path):
"""Grandes Chroniques de France, BnF Français 2813."""
info = fetch_and_normalize(_URL_GRANDES_CHRONIQUES, "grandes-chroniques", "f003", tmp_path)
assert info.derivative_width <= _MAX_DERIVATIVE_PX
assert info.derivative_height <= _MAX_DERIVATIVE_PX
assert Path(info.derivative_path).exists()
assert Path(info.thumbnail_path).exists()