File size: 18,677 Bytes
cc423b0
59c75b7
cc423b0
 
 
59c75b7
 
 
 
 
433e26f
59c75b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db489aa
59c75b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc423b0
 
 
 
 
 
 
 
 
 
 
 
 
59c75b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc423b0
 
 
 
 
 
 
 
 
 
 
 
 
db489aa
 
 
59c75b7
 
 
 
 
 
 
 
 
 
 
cc423b0
 
 
 
 
59c75b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db489aa
 
 
 
59c75b7
 
 
 
 
cc423b0
59c75b7
 
433e26f
59c75b7
 
 
 
 
 
 
 
 
 
 
 
 
cc423b0
 
 
 
59c75b7
db489aa
59c75b7
 
cc423b0
59c75b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc423b0
 
 
 
 
 
 
 
 
 
 
59c75b7
 
db489aa
59c75b7
 
db489aa
59c75b7
cc423b0
59c75b7
 
 
cc423b0
59c75b7
 
 
 
 
 
 
 
 
 
cc423b0
 
 
 
 
59c75b7
 
cc423b0
 
 
 
 
 
 
 
59c75b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db489aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59c75b7
 
 
 
 
 
 
 
cc423b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59c75b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc423b0
 
 
 
59c75b7
 
 
 
 
 
 
 
cc423b0
59c75b7
 
 
cc423b0
59c75b7
 
 
 
cc423b0
59c75b7
 
 
 
cc423b0
59c75b7
 
 
 
 
cc423b0
59c75b7
 
 
cc423b0
59c75b7
 
 
 
cc423b0
59c75b7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
"""Evaluation metrics suite.

All metrics stratified by Fitzpatrick skin type (I-VI) using ITA-based thresholding.
Primary metrics: FID, LPIPS, NME, ArcFace identity similarity.
Secondary: SSIM (relaxed target >0.80).
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any

import numpy as np

try:
    import cv2
except ImportError:
    cv2 = None  # type: ignore[assignment]


@dataclass
class EvalMetrics:
    """Computed evaluation metrics for a batch of generated images."""

    fid: float = 0.0
    lpips: float = 0.0
    nme: float = 0.0           # Normalized Mean landmark Error
    identity_sim: float = 0.0  # ArcFace cosine similarity
    ssim: float = 0.0

    # Per-Fitzpatrick breakdown (all metrics stratified)
    fid_by_fitzpatrick: dict[str, float] = field(default_factory=dict)
    nme_by_fitzpatrick: dict[str, float] = field(default_factory=dict)
    lpips_by_fitzpatrick: dict[str, float] = field(default_factory=dict)
    ssim_by_fitzpatrick: dict[str, float] = field(default_factory=dict)
    identity_sim_by_fitzpatrick: dict[str, float] = field(default_factory=dict)
    count_by_fitzpatrick: dict[str, int] = field(default_factory=dict)

    # Per-procedure breakdown
    nme_by_procedure: dict[str, float] = field(default_factory=dict)
    lpips_by_procedure: dict[str, float] = field(default_factory=dict)
    ssim_by_procedure: dict[str, float] = field(default_factory=dict)

    def summary(self) -> str:
        lines = [
            f"FID:          {self.fid:.2f}",
            f"LPIPS:        {self.lpips:.4f}",
            f"NME:          {self.nme:.4f}",
            f"Identity Sim: {self.identity_sim:.4f}",
            f"SSIM:         {self.ssim:.4f}",
        ]
        if self.count_by_fitzpatrick:
            lines.append("\nBy Fitzpatrick Type:")
            for ftype in sorted(self.count_by_fitzpatrick):
                n = self.count_by_fitzpatrick[ftype]
                parts = [f"  Type {ftype} (n={n}):"]
                if ftype in self.lpips_by_fitzpatrick:
                    parts.append(f"LPIPS={self.lpips_by_fitzpatrick[ftype]:.4f}")
                if ftype in self.ssim_by_fitzpatrick:
                    parts.append(f"SSIM={self.ssim_by_fitzpatrick[ftype]:.4f}")
                if ftype in self.nme_by_fitzpatrick:
                    parts.append(f"NME={self.nme_by_fitzpatrick[ftype]:.4f}")
                if ftype in self.identity_sim_by_fitzpatrick:
                    parts.append(f"ID={self.identity_sim_by_fitzpatrick[ftype]:.4f}")
                lines.append(" ".join(parts))
        if self.fid_by_fitzpatrick:
            lines.append("\nFID by Fitzpatrick:")
            for k, v in sorted(self.fid_by_fitzpatrick.items()):
                lines.append(f"  Type {k}: {v:.2f}")
        return "\n".join(lines)

    def to_dict(self) -> dict:
        """Convert to flat dictionary for JSON/CSV export."""
        d = {
            "fid": self.fid,
            "lpips": self.lpips,
            "nme": self.nme,
            "identity_sim": self.identity_sim,
            "ssim": self.ssim,
        }
        for ftype in sorted(self.count_by_fitzpatrick):
            prefix = f"fitz_{ftype}"
            d[f"{prefix}_count"] = self.count_by_fitzpatrick.get(ftype, 0)
            d[f"{prefix}_lpips"] = self.lpips_by_fitzpatrick.get(ftype, 0.0)
            d[f"{prefix}_ssim"] = self.ssim_by_fitzpatrick.get(ftype, 0.0)
            d[f"{prefix}_nme"] = self.nme_by_fitzpatrick.get(ftype, 0.0)
            d[f"{prefix}_identity"] = self.identity_sim_by_fitzpatrick.get(ftype, 0.0)
        for proc in sorted(self.nme_by_procedure):
            d[f"proc_{proc}_nme"] = self.nme_by_procedure.get(proc, 0.0)
            d[f"proc_{proc}_lpips"] = self.lpips_by_procedure.get(proc, 0.0)
            d[f"proc_{proc}_ssim"] = self.ssim_by_procedure.get(proc, 0.0)
        return d


def classify_fitzpatrick_ita(image: np.ndarray) -> str:
    """Classify Fitzpatrick skin type using Individual Typology Angle (ITA).

    ITA = arctan((L - 50) / b) * (180 / pi)
    where L, b are from CIE L*a*b* color space.

    Thresholds from Chardon et al. (1991):
    - ITA > 55: Type I (very light)
    - 41 < ITA <= 55: Type II (light)
    - 28 < ITA <= 41: Type III (intermediate)
    - 10 < ITA <= 28: Type IV (tan)
    - -30 < ITA <= 10: Type V (brown)
    - ITA <= -30: Type VI (dark)
    """
    if cv2 is None:
        raise ImportError("opencv-python is required for Fitzpatrick classification")
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB).astype(np.float32)

    # Sample from face center region (avoid background)
    h, w = image.shape[:2]
    center = lab[h // 4 : 3 * h // 4, w // 4 : 3 * w // 4]

    L_mean = center[:, :, 0].mean() * 100 / 255  # scale to 0-100
    b_mean = center[:, :, 2].mean() - 128  # center around 0

    if abs(b_mean) < 1e-6:
        b_mean = 1e-6

    ita = np.arctan2(L_mean - 50, b_mean) * (180 / np.pi)

    if ita > 55:
        return "I"
    elif ita > 41:
        return "II"
    elif ita > 28:
        return "III"
    elif ita > 10:
        return "IV"
    elif ita > -30:
        return "V"
    else:
        return "VI"


def compute_nme(
    pred_landmarks: np.ndarray,
    target_landmarks: np.ndarray,
    left_eye_idx: int = 33,
    right_eye_idx: int = 263,
) -> float:
    """Compute Normalized Mean Error for landmarks.

    Normalized by inter-ocular distance.

    Args:
        pred_landmarks: (N, 2) predicted landmark positions.
        target_landmarks: (N, 2) ground truth positions.
        left_eye_idx: MediaPipe index for left eye center.
        right_eye_idx: MediaPipe index for right eye center.

    Returns:
        NME value (lower is better).
    """
    iod = np.linalg.norm(
        target_landmarks[left_eye_idx] - target_landmarks[right_eye_idx]
    )
    if iod < 1.0:
        iod = 1.0

    distances = np.linalg.norm(pred_landmarks - target_landmarks, axis=1)
    return float(np.mean(distances) / iod)


def compute_ssim(
    pred: np.ndarray,
    target: np.ndarray,
) -> float:
    """Compute Structural Similarity Index (SSIM).

    Uses scikit-image's windowed SSIM (Wang et al. 2004) for proper
    per-window computation with 11x11 Gaussian kernel.
    """
    try:
        from skimage.metrics import structural_similarity
        # Convert to grayscale if color, or compute per-channel
        if pred.ndim == 3 and pred.shape[2] == 3:
            return float(structural_similarity(pred, target, channel_axis=2, data_range=255))
        else:
            return float(structural_similarity(pred, target, data_range=255))
    except ImportError:
        # Fallback: simple global SSIM (not publication-quality)
        pred_f = pred.astype(np.float64)
        target_f = target.astype(np.float64)

        mu_p = np.mean(pred_f)
        mu_t = np.mean(target_f)
        sigma_p = np.std(pred_f)
        sigma_t = np.std(target_f)
        sigma_pt = np.mean((pred_f - mu_p) * (target_f - mu_t))

        C1 = (0.01 * 255) ** 2
        C2 = (0.03 * 255) ** 2

        ssim_val = (
            (2 * mu_p * mu_t + C1) * (2 * sigma_pt + C2)
        ) / (
            (mu_p ** 2 + mu_t ** 2 + C1) * (sigma_p ** 2 + sigma_t ** 2 + C2)
        )
        return float(ssim_val)


_LPIPS_FN = None
_ARCFACE_APP = None


def _get_lpips_fn() -> Any:
    """Get or create singleton LPIPS model."""
    global _LPIPS_FN
    if _LPIPS_FN is None:
        import lpips
        _LPIPS_FN = lpips.LPIPS(net="alex", verbose=False)
        _LPIPS_FN.eval()
    return _LPIPS_FN


def compute_lpips(
    pred: np.ndarray,
    target: np.ndarray,
) -> float:
    """Compute LPIPS perceptual distance between two images.

    Returns LPIPS score (lower = more similar).
    """
    try:
        import lpips  # noqa: F401 — availability check; used in _get_lpips_fn
        import torch
    except ImportError:
        return float("nan")

    _lpips_fn = _get_lpips_fn()

    def _to_tensor(img: np.ndarray) -> torch.Tensor:
        t = torch.from_numpy(img.astype(np.float32) / 255.0).permute(2, 0, 1).unsqueeze(0)
        return t * 2 - 1  # LPIPS expects [-1, 1]

    with torch.no_grad():
        score = _lpips_fn(_to_tensor(pred), _to_tensor(target))
    return float(score.item())


def compute_fid(
    real_dir: str,
    generated_dir: str,
) -> float:
    """Compute FID between directories of real and generated images.

    Uses torch-fidelity for GPU-accelerated computation.

    Args:
        real_dir: Path to directory of real images.
        generated_dir: Path to directory of generated images.

    Returns:
        FID score (lower = more similar distributions).
    """
    try:
        from torch_fidelity import calculate_metrics
    except ImportError as e:
        raise ImportError(
            "torch-fidelity is required for FID. Install with: pip install torch-fidelity"
        ) from e

    import torch
    metrics = calculate_metrics(
        input1=generated_dir,
        input2=real_dir,
        cuda=torch.cuda.is_available(),
        fid=True,
        verbose=False,
    )
    return float(metrics["frechet_inception_distance"])


def compute_identity_similarity(
    pred: np.ndarray,
    target: np.ndarray,
) -> float:
    """Compute ArcFace identity cosine similarity between two face images.

    Returns cosine similarity [0, 1] where 1 = identical identity.
    Falls back to SSIM-based proxy if InsightFace unavailable.
    """
    try:
        from insightface.app import FaceAnalysis
        global _ARCFACE_APP
        if _ARCFACE_APP is None:
            _ARCFACE_APP = FaceAnalysis(
                name="buffalo_l",
                providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
            )
            _ARCFACE_APP.prepare(ctx_id=-1, det_size=(320, 320))
        app = _ARCFACE_APP

        pred_bgr = pred if pred.shape[2] == 3 else cv2.cvtColor(pred, cv2.COLOR_RGB2BGR)
        target_bgr = target if target.shape[2] == 3 else cv2.cvtColor(target, cv2.COLOR_RGB2BGR)

        pred_faces = app.get(pred_bgr)
        target_faces = app.get(target_bgr)

        if pred_faces and target_faces:
            pred_emb = pred_faces[0].embedding
            target_emb = target_faces[0].embedding
            sim = np.dot(pred_emb, target_emb) / (
                np.linalg.norm(pred_emb) * np.linalg.norm(target_emb) + 1e-8
            )
            return float(np.clip(sim, 0, 1))
    except Exception:
        pass

    # Fallback: SSIM-based proxy
    return compute_ssim(pred, target)


# ------------------------------------------------------------------
# Geometric nasal ratios (adapted from Varghaei et al., arXiv:2508.13363)
# ------------------------------------------------------------------

# MediaPipe 478-point indices for facial measurements
_LEFT_ALAR = 129       # left alar (nose wing) outermost point
_RIGHT_ALAR = 358      # right alar
_NOSE_TIP = 1          # pronasale
_NOSE_BRIDGE_TOP = 168 # nasion (bridge root)
_LEFT_INNER_CANTHUS = 133
_RIGHT_INNER_CANTHUS = 362
_LEFT_TRAGION = 234    # left ear (face width proxy)
_RIGHT_TRAGION = 454   # right ear
_FOREHEAD = 10         # trichion / upper face
_CHIN = 152            # menton / lowest chin point


def compute_nasal_ratios(
    landmarks: np.ndarray,
) -> dict[str, float]:
    """Compute 5 nasal geometric ratios from MediaPipe 478-point landmarks.

    Ratios from Varghaei et al. (2025), used clinically to assess
    rhinoplasty outcomes. All ratios are dimensionless.

    Args:
        landmarks: (478, 2) or (478, 3) landmark pixel coordinates.

    Returns:
        Dict with keys: alar_face_ratio, nose_face_ratio,
        alar_intercanthal_ratio, tip_deviation, nostril_asymmetry.
    """
    pts = landmarks[:, :2]  # use only x,y

    alar_width = np.linalg.norm(pts[_LEFT_ALAR] - pts[_RIGHT_ALAR])
    face_width = np.linalg.norm(pts[_LEFT_TRAGION] - pts[_RIGHT_TRAGION])
    nose_length = np.linalg.norm(pts[_NOSE_BRIDGE_TOP] - pts[_NOSE_TIP])
    face_height = np.linalg.norm(pts[_FOREHEAD] - pts[_CHIN])
    intercanthal = np.linalg.norm(
        pts[_LEFT_INNER_CANTHUS] - pts[_RIGHT_INNER_CANTHUS]
    )

    # Midline: midpoint between inner canthi
    midline_x = (pts[_LEFT_INNER_CANTHUS][0] + pts[_RIGHT_INNER_CANTHUS][0]) / 2
    tip_deviation = abs(pts[_NOSE_TIP][0] - midline_x) / (face_width + 1e-8)

    # Nostril asymmetry: difference in left/right alar-to-tip distances
    left_dist = np.linalg.norm(pts[_LEFT_ALAR] - pts[_NOSE_TIP])
    right_dist = np.linalg.norm(pts[_RIGHT_ALAR] - pts[_NOSE_TIP])
    nostril_asymmetry = abs(left_dist - right_dist) / (alar_width + 1e-8)

    return {
        "alar_face_ratio": float(alar_width / (face_width + 1e-8)),
        "nose_face_ratio": float(nose_length / (face_height + 1e-8)),
        "alar_intercanthal_ratio": float(alar_width / (intercanthal + 1e-8)),
        "tip_deviation": float(tip_deviation),
        "nostril_asymmetry": float(nostril_asymmetry),
    }


def compute_bilateral_symmetry(
    landmarks: np.ndarray,
) -> float:
    """Compute bilateral facial symmetry score from landmarks.

    Reflects each left-side landmark across the vertical midline and
    measures average displacement from the corresponding right-side point.
    Normalized by inter-ocular distance.

    Based on KDTree approach from Varghaei et al. (2025).

    Args:
        landmarks: (478, 2) or (478, 3) landmark pixel coordinates.

    Returns:
        Symmetry score in [0, 1] where 1 = perfect symmetry.
    """
    pts = landmarks[:, :2]

    # Midline from forehead to chin
    midline_x = (pts[_LEFT_TRAGION][0] + pts[_RIGHT_TRAGION][0]) / 2
    iod = np.linalg.norm(pts[33] - pts[263])  # inter-ocular distance
    if iod < 1.0:
        iod = 1.0

    # MediaPipe left-right correspondence pairs (subset of reliable pairs)
    # format: (left_idx, right_idx)
    sym_pairs = [
        (33, 263),   # outer canthi
        (133, 362),  # inner canthi
        (70, 300),   # eyebrow inner
        (105, 334),  # eyebrow outer
        (129, 358),  # alar
        (61, 291),   # mouth corners
        (234, 454),  # tragion
        (93, 323),   # cheekbone
        (132, 361),  # lower eyelid
        (159, 386),  # upper eyelid
        (58, 288),   # lower lip
        (172, 397),  # chin lateral
        (136, 365),  # nose lateral
        (48, 278),   # nostril
    ]

    diffs = []
    for left_idx, right_idx in sym_pairs:
        # Reflect left point across midline
        reflected_x = 2 * midline_x - pts[left_idx][0]
        reflected = np.array([reflected_x, pts[left_idx][1]])
        diff = np.linalg.norm(reflected - pts[right_idx]) / iod
        diffs.append(diff)

    mean_asymmetry = np.mean(diffs)
    # Convert to 0-1 symmetry score (asymmetry of 0 = score of 1)
    return float(np.clip(1.0 - mean_asymmetry, 0.0, 1.0))


def evaluate_batch(
    predictions: list[np.ndarray],
    targets: list[np.ndarray],
    pred_landmarks: list[np.ndarray] | None = None,
    target_landmarks: list[np.ndarray] | None = None,
    procedures: list[str] | None = None,
    compute_identity: bool = False,
) -> EvalMetrics:
    """Evaluate a batch of predicted vs target images.

    Computes all metrics and stratifies by Fitzpatrick skin type and procedure.

    Args:
        predictions: List of predicted BGR images.
        targets: List of target BGR images.
        pred_landmarks: Optional list of (N, 2) predicted landmark arrays.
        target_landmarks: Optional list of (N, 2) target landmark arrays.
        procedures: Optional list of procedure names for per-procedure breakdown.
        compute_identity: Whether to compute ArcFace identity similarity (slow).

    Returns:
        EvalMetrics with all computed values.
    """
    n = len(predictions)
    ssim_scores = []
    lpips_scores = []
    nme_scores = []
    identity_scores = []
    fitz_groups: dict[str, list[int]] = {}
    proc_groups: dict[str, list[int]] = {}

    for i in range(n):
        ssim_scores.append(compute_ssim(predictions[i], targets[i]))
        lpips_scores.append(compute_lpips(predictions[i], targets[i]))

        if pred_landmarks is not None and target_landmarks is not None:
            nme_scores.append(compute_nme(pred_landmarks[i], target_landmarks[i]))

        if compute_identity:
            identity_scores.append(compute_identity_similarity(predictions[i], targets[i]))

        # Fitzpatrick classification
        if cv2 is not None:
            try:
                fitz = classify_fitzpatrick_ita(targets[i])
                fitz_groups.setdefault(fitz, []).append(i)
            except Exception:
                pass

        # Procedure grouping
        if procedures is not None and i < len(procedures):
            proc_groups.setdefault(procedures[i], []).append(i)

    metrics = EvalMetrics(
        ssim=float(np.nanmean(ssim_scores)) if ssim_scores else 0.0,
        lpips=float(np.nanmean(lpips_scores)) if lpips_scores else 0.0,
        nme=float(np.nanmean(nme_scores)) if nme_scores else 0.0,
        identity_sim=float(np.nanmean(identity_scores)) if identity_scores else 0.0,
    )

    # Full Fitzpatrick stratification for ALL metrics
    for ftype, indices in fitz_groups.items():
        metrics.count_by_fitzpatrick[ftype] = len(indices)

        group_lpips = [lpips_scores[i] for i in indices]
        if group_lpips:
            metrics.lpips_by_fitzpatrick[ftype] = float(np.nanmean(group_lpips))

        group_ssim = [ssim_scores[i] for i in indices]
        if group_ssim:
            metrics.ssim_by_fitzpatrick[ftype] = float(np.nanmean(group_ssim))

        if nme_scores:
            group_nme = [nme_scores[i] for i in indices if i < len(nme_scores)]
            if group_nme:
                metrics.nme_by_fitzpatrick[ftype] = float(np.nanmean(group_nme))

        if identity_scores:
            group_id = [identity_scores[i] for i in indices if i < len(identity_scores)]
            if group_id:
                metrics.identity_sim_by_fitzpatrick[ftype] = float(np.nanmean(group_id))

    # Per-procedure breakdown
    for proc, indices in proc_groups.items():
        group_lpips = [lpips_scores[i] for i in indices]
        if group_lpips:
            metrics.lpips_by_procedure[proc] = float(np.nanmean(group_lpips))

        group_ssim = [ssim_scores[i] for i in indices]
        if group_ssim:
            metrics.ssim_by_procedure[proc] = float(np.nanmean(group_ssim))

        if nme_scores:
            group_nme = [nme_scores[i] for i in indices if i < len(nme_scores)]
            if group_nme:
                metrics.nme_by_procedure[proc] = float(np.nanmean(group_nme))

    return metrics