IIIF-Studio / backend /tests /test_schemas.py
Claude
fix: comprehensive repo audit — 15 issues fixed
9097545 unverified
"""
Tests des schémas Pydantic — corpus_profile, page_master, annotation.
"""
# 1. stdlib
from datetime import datetime, timezone
# 2. third-party
import pytest
from pydantic import ValidationError
# 3. local
from app.schemas.corpus_profile import (
CorpusProfile,
ExportConfig,
LayerType,
ScriptType,
UncertaintyConfig,
)
from app.schemas.page_master import (
Commentary,
CommentaryClaim,
EditorialInfo,
EditorialStatus,
OCRResult,
PageMaster,
ProcessingInfo,
Region,
RegionType,
Translation,
)
from app.schemas.annotation import AnnotationLayer, LayerStatus
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def minimal_corpus_profile() -> dict:
return {
"profile_id": "test-profile",
"label": "Test Profile",
"language_hints": ["la"],
"script_type": "caroline",
"active_layers": ["ocr_diplomatic", "translation_fr"],
"prompt_templates": {"primary": "prompts/test/primary_v1.txt"},
"uncertainty_config": {"flag_below": 0.4, "min_acceptable": 0.25},
"export_config": {"mets": True, "alto": True, "tei": False},
}
@pytest.fixture
def minimal_page_master() -> dict:
return {
"page_id": "test-corpus-0001r",
"corpus_profile": "test-profile",
"manuscript_id": "ms-test-001",
"folio_label": "0001r",
"sequence": 1,
"image": {
"master": "data/corpora/test/masters/0001r.tif",
"derivative_web": "data/corpora/test/derivatives/0001r.jpg",
"width": 2000,
"height": 3000,
},
"layout": {"regions": []},
}
@pytest.fixture
def valid_region() -> dict:
return {
"id": "r1",
"type": "text_block",
"bbox": [10, 20, 300, 400],
"confidence": 0.95,
}
# ---------------------------------------------------------------------------
# Tests — CorpusProfile
# ---------------------------------------------------------------------------
def test_corpus_profile_valid(minimal_corpus_profile):
profile = CorpusProfile.model_validate(minimal_corpus_profile)
assert profile.profile_id == "test-profile"
assert profile.script_type == ScriptType.CAROLINE
assert LayerType.OCR_DIPLOMATIC in profile.active_layers
def test_corpus_profile_is_frozen(minimal_corpus_profile):
profile = CorpusProfile.model_validate(minimal_corpus_profile)
with pytest.raises((TypeError, ValidationError)):
profile.label = "Modified" # type: ignore[misc]
def test_corpus_profile_all_script_types(minimal_corpus_profile):
for script in ScriptType:
data = {**minimal_corpus_profile, "script_type": script.value}
profile = CorpusProfile.model_validate(data)
assert profile.script_type == script
def test_corpus_profile_all_layer_types(minimal_corpus_profile):
all_layers = [lt.value for lt in LayerType]
data = {**minimal_corpus_profile, "active_layers": all_layers}
profile = CorpusProfile.model_validate(data)
assert len(profile.active_layers) == len(LayerType)
def test_uncertainty_config_defaults():
config = UncertaintyConfig()
assert config.flag_below == 0.4
assert config.min_acceptable == 0.25
def test_uncertainty_config_bounds():
with pytest.raises(ValidationError):
UncertaintyConfig(flag_below=1.5)
with pytest.raises(ValidationError):
UncertaintyConfig(min_acceptable=-0.1)
def test_export_config_defaults():
config = ExportConfig()
assert config.mets is True
assert config.alto is True
assert config.tei is False
def test_corpus_profile_missing_required_field():
with pytest.raises(ValidationError):
CorpusProfile.model_validate({"profile_id": "x"})
# ---------------------------------------------------------------------------
# Tests — Region / bbox
# ---------------------------------------------------------------------------
def test_region_valid_bbox(valid_region):
region = Region.model_validate(valid_region)
assert region.bbox == [10, 20, 300, 400]
assert region.confidence == 0.95
def test_region_bbox_negative_x():
with pytest.raises(ValidationError):
Region.model_validate({
"id": "r1", "type": "text_block",
"bbox": [-1, 20, 300, 400], "confidence": 0.5,
})
def test_region_bbox_zero_width():
with pytest.raises(ValidationError):
Region.model_validate({
"id": "r1", "type": "text_block",
"bbox": [0, 0, 0, 400], "confidence": 0.5,
})
def test_region_bbox_zero_height():
with pytest.raises(ValidationError):
Region.model_validate({
"id": "r1", "type": "text_block",
"bbox": [0, 0, 300, 0], "confidence": 0.5,
})
def test_region_bbox_wrong_length():
with pytest.raises(ValidationError):
Region.model_validate({
"id": "r1", "type": "text_block",
"bbox": [0, 0, 300], "confidence": 0.5,
})
def test_region_all_types():
for region_type in RegionType:
region = Region.model_validate({
"id": "r1", "type": region_type.value,
"bbox": [0, 0, 100, 100], "confidence": 0.8,
})
assert region.type == region_type
def test_region_optional_polygon():
region = Region.model_validate({
"id": "r1", "type": "miniature",
"bbox": [0, 0, 200, 200], "confidence": 0.9,
"polygon": [[0, 0], [200, 0], [200, 200], [0, 200]],
})
assert region.polygon is not None
assert len(region.polygon) == 4
# ---------------------------------------------------------------------------
# Tests — PageMaster
# ---------------------------------------------------------------------------
def test_page_master_valid(minimal_page_master):
page = PageMaster.model_validate(minimal_page_master)
assert page.schema_version == "1.0"
assert page.page_id == "test-corpus-0001r"
assert page.editorial.status == EditorialStatus.MACHINE_DRAFT
def test_page_master_schema_version_default(minimal_page_master):
page = PageMaster.model_validate(minimal_page_master)
assert page.schema_version == "1.0"
def test_page_master_with_ocr(minimal_page_master):
data = {**minimal_page_master, "ocr": {
"diplomatic_text": "In nomine Domini",
"language": "la",
"confidence": 0.87,
}}
page = PageMaster.model_validate(data)
assert page.ocr is not None
assert page.ocr.diplomatic_text == "In nomine Domini"
def test_page_master_with_translation(minimal_page_master):
data = {**minimal_page_master, "translation": {
"fr": "Au nom du Seigneur",
"en": "In the name of the Lord",
}}
page = PageMaster.model_validate(data)
assert page.translation is not None
assert page.translation.fr == "Au nom du Seigneur"
def test_page_master_with_commentary(minimal_page_master):
data = {**minimal_page_master, "commentary": {
"public": "Description publique.",
"scholarly": "Analyse savante.",
"claims": [
{"claim": "Ce folio date du XIe siècle.", "certainty": "high"}
],
}}
page = PageMaster.model_validate(data)
assert page.commentary is not None
assert len(page.commentary.claims) == 1
assert page.commentary.claims[0].certainty == "high"
def test_page_master_editorial_info_defaults(minimal_page_master):
page = PageMaster.model_validate(minimal_page_master)
assert page.editorial.validated is False
assert page.editorial.version == 1
assert page.editorial.validated_by is None
def test_commentary_claim_certainty_values():
for certainty in ("high", "medium", "low", "speculative"):
claim = CommentaryClaim(claim="Test.", certainty=certainty)
assert claim.certainty == certainty
def test_commentary_claim_invalid_certainty():
with pytest.raises(ValidationError):
CommentaryClaim(claim="Test.", certainty="unknown")
# ---------------------------------------------------------------------------
# Tests — AnnotationLayer
# ---------------------------------------------------------------------------
def test_annotation_layer_valid():
layer = AnnotationLayer(
id="layer-001",
page_id="test-corpus-0001r",
layer_type=LayerType.OCR_DIPLOMATIC,
created_at=datetime(2026, 3, 16, 12, 0, 0, tzinfo=timezone.utc),
)
assert layer.status == LayerStatus.PENDING
assert layer.version == 1
def test_annotation_layer_all_statuses():
for status in LayerStatus:
layer = AnnotationLayer(
id="layer-001",
page_id="test-corpus-0001r",
layer_type=LayerType.TRANSLATION_FR,
status=status,
created_at=datetime(2026, 3, 16, tzinfo=timezone.utc),
)
assert layer.status == status
def test_annotation_layer_all_layer_types():
for layer_type in LayerType:
layer = AnnotationLayer(
id=f"layer-{layer_type.value}",
page_id="test-corpus-0001r",
layer_type=layer_type,
created_at=datetime(2026, 3, 16, tzinfo=timezone.utc),
)
assert layer.layer_type == layer_type
# ---------------------------------------------------------------------------
# ImageInfo — compatibilité arrière et champs IIIF natifs
# ---------------------------------------------------------------------------
from app.schemas.page_master import ImageInfo
from app.schemas.image import ImageSourceInfo
def test_image_info_backward_compat_without_iiif_fields():
"""Un ImageInfo sans les nouveaux champs IIIF doit toujours valider."""
info = ImageInfo.model_validate({
"master": "data/corpora/test/masters/0001r.tif",
"derivative_web": "data/corpora/test/derivatives/0001r.jpg",
"width": 2000,
"height": 3000,
})
assert info.iiif_service_url is None
assert info.manifest_url is None
def test_image_info_with_iiif_service_url():
"""Un ImageInfo avec iiif_service_url doit valider."""
info = ImageInfo.model_validate({
"master": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg",
"iiif_service_url": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29",
"manifest_url": "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/manifest.json",
"width": 3543,
"height": 4724,
})
assert info.iiif_service_url == "https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29"
assert info.manifest_url is not None
assert info.derivative_web is None
assert info.thumbnail is None
def test_image_info_iiif_native_no_local_paths():
"""En mode IIIF natif, derivative_web et thumbnail sont None."""
info = ImageInfo(
master="https://example.com/image.jpg",
iiif_service_url="https://example.com/iiif/img1",
width=5000,
height=7000,
)
assert info.derivative_web is None
assert info.thumbnail is None
assert info.width == 5000
assert info.height == 7000
def test_page_master_backward_compat_v10(minimal_page_master):
"""Un PageMaster v1.0 (sans champs IIIF) doit toujours valider."""
pm = PageMaster.model_validate(minimal_page_master)
assert pm.schema_version == "1.0"
assert pm.image.iiif_service_url is None
def test_image_source_info_iiif():
"""ImageSourceInfo avec service IIIF détecté."""
info = ImageSourceInfo(
original_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29/full/max/0/default.jpg",
iiif_service_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/f29",
manifest_url="https://gallica.bnf.fr/iiif/ark:/12148/btv1b8432314s/manifest.json",
is_iiif=True,
original_width=3543,
original_height=4724,
)
assert info.is_iiif is True
assert "gallica" in info.iiif_service_url
def test_image_source_info_static_fallback():
"""ImageSourceInfo sans service IIIF (image statique)."""
info = ImageSourceInfo(
original_url="https://example.com/static/page1.jpg",
is_iiif=False,
original_width=2000,
original_height=3000,
)
assert info.is_iiif is False
assert info.iiif_service_url is None