File size: 31,209 Bytes
3933e8f
ef87e19
3933e8f
 
 
 
 
 
 
 
 
 
ef87e19
 
58c92f2
ef87e19
362d86f
 
ef87e19
362d86f
ef87e19
 
3933e8f
 
362d86f
c96096b
 
ef87e19
3933e8f
ef87e19
3e805ab
ef87e19
3e805ab
c96096b
ef87e19
 
 
 
 
 
 
 
3933e8f
 
ef87e19
3933e8f
cbbe6c5
 
 
362d86f
3933e8f
 
572243e
3933e8f
 
 
 
 
 
 
572243e
 
 
 
3933e8f
 
 
 
 
 
 
 
362d86f
 
 
 
 
 
 
 
ef87e19
362d86f
 
 
 
 
c96096b
ef87e19
3933e8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef87e19
3933e8f
 
ef87e19
3933e8f
ef87e19
 
 
3933e8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
572243e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3933e8f
 
 
 
3e805ab
 
362d86f
 
 
 
3933e8f
c96096b
ef87e19
3933e8f
ef87e19
 
 
 
362d86f
3933e8f
362d86f
ef87e19
 
362d86f
 
 
 
 
ef87e19
3933e8f
ef87e19
362d86f
3933e8f
 
 
 
ef87e19
 
 
3933e8f
 
 
 
 
 
 
 
 
68c0f26
 
3933e8f
68c0f26
3933e8f
 
 
 
 
ef87e19
4d03437
 
ef87e19
 
3933e8f
 
 
 
 
 
 
 
 
 
 
 
1feca1e
 
 
 
 
 
 
 
 
 
 
3933e8f
 
1feca1e
cbbe6c5
 
 
 
 
1feca1e
cbbe6c5
3f0e7aa
 
 
cbbe6c5
 
 
3933e8f
1feca1e
cbbe6c5
 
1feca1e
cbbe6c5
1feca1e
3933e8f
cbbe6c5
 
1feca1e
cbbe6c5
 
 
 
 
1feca1e
 
cbbe6c5
 
1feca1e
 
 
 
cbbe6c5
1feca1e
cbbe6c5
1feca1e
 
cbbe6c5
3933e8f
1feca1e
cbbe6c5
 
 
3933e8f
cbbe6c5
3933e8f
cbbe6c5
3933e8f
362d86f
cbbe6c5
ef87e19
3933e8f
362d86f
 
3e805ab
3933e8f
ef87e19
 
362d86f
ef87e19
 
 
cbbe6c5
 
 
 
 
 
 
 
 
 
 
 
58c92f2
362d86f
3933e8f
 
 
362d86f
ef87e19
 
 
 
 
362d86f
 
 
 
3933e8f
 
 
1feca1e
 
 
 
 
 
3933e8f
 
 
 
 
 
 
 
 
1feca1e
 
 
 
 
3933e8f
 
 
 
 
ef87e19
 
3933e8f
 
 
 
 
 
 
 
 
 
 
ef87e19
 
3933e8f
ef87e19
 
 
3933e8f
 
 
 
ef87e19
572243e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3933e8f
 
 
ef87e19
 
3933e8f
ef87e19
 
3933e8f
 
 
 
 
ef87e19
3933e8f
 
ef87e19
3933e8f
ef87e19
3933e8f
ef87e19
 
3933e8f
 
 
 
ef87e19
 
3933e8f
 
 
 
 
 
 
 
 
 
 
 
 
1feca1e
 
3933e8f
1feca1e
3933e8f
 
1feca1e
 
 
 
 
 
 
 
 
3933e8f
 
 
ef87e19
 
3933e8f
 
 
 
 
 
 
 
 
ef87e19
3933e8f
 
 
ef87e19
3933e8f
ef87e19
 
 
3933e8f
 
ef87e19
 
 
 
 
 
3933e8f
ef87e19
 
 
3933e8f
 
 
 
 
 
 
 
 
 
 
 
 
ef87e19
 
362d86f
3933e8f
362d86f
 
ef87e19
362d86f
3933e8f
362d86f
c96096b
3933e8f
 
 
 
 
 
 
572243e
 
 
ef87e19
 
 
 
 
 
3933e8f
 
 
 
 
 
 
362d86f
c96096b
 
5d013dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362d86f
 
3933e8f
 
ef87e19
 
362d86f
 
3933e8f
362d86f
3933e8f
362d86f
 
 
ef87e19
 
3933e8f
 
ef87e19
 
362d86f
ef87e19
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
# src/models.py  —  Enterprise Lens V4
# ════════════════════════════════════════════════════════════════════
# Face Lane  : InsightFace SCRFD-10GF + ArcFace-R100 (buffalo_l)
#              + AdaFace IR-50 (WebFace4M) fused → 1024-D vector
#              • det_size=(1280,1280) — catches small/group faces
#              • Quality gate: det_score ≥ 0.60, face_px ≥ 40
#              • Multi-scale: runs detection at 2 scales, merges
#              • Stores one 1024-D vector PER face
#              • Each vector carries base64 face-crop thumbnail
#              • face_quality_score + face_width_px in metadata
#
# Object Lane: SigLIP + DINOv2 fused 1536-D (unchanged from V3)
# ════════════════════════════════════════════════════════════════════

import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import asyncio
import base64
import functools
import hashlib
import io
import threading
import traceback

import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from transformers import AutoImageProcessor, AutoModel, AutoProcessor
from ultralytics import YOLO

# ── InsightFace ───────────────────────────────────────────────────
try:
    import insightface
    from insightface.app import FaceAnalysis
    INSIGHTFACE_AVAILABLE = True
except ImportError:
    INSIGHTFACE_AVAILABLE = False
    print("⚠️  insightface not installed — face lane disabled")
    print("    Run: pip install insightface onnxruntime-silicon  (mac)")
    print("         pip install insightface onnxruntime          (linux/win)")

# ── AdaFace ──────────────────────────────────────────────────────
# Disabled by default — enable by setting ENABLE_ADAFACE=1 env var.
# When disabled: ArcFace(512) + zeros(512) = 1024-D (fully functional).
ADAFACE_WEIGHTS_AVAILABLE = False  # controlled by ENABLE_ADAFACE env var

# ── Constants ─────────────────────────────────────────────────────
YOLO_PERSON_CLASS_ID  = 0
MIN_FACE_SIZE         = 20      # lowered: 40 missed small faces in group photos
MAX_FACES_PER_IMAGE   = 12      # slightly higher cap for group photos
MAX_CROPS             = 6       # max YOLO object crops per image
MAX_IMAGE_SIZE        = 640     # object lane longest edge
DET_SIZE_PRIMARY      = (1280, 1280)  # V4: 1280 for small-face detection
DET_SIZE_SECONDARY    = (640, 640)    # fallback / 2nd scale
FACE_CROP_THUMB_SIZE  = 112     # face thumbnail for Pinecone metadata
FACE_CROP_QUALITY     = 80      # JPEG quality for thumbnails
FACE_QUALITY_GATE     = 0.35    # lowered from 0.60 — accepts sunglasses, angles, smiles
# Multi-scale pyramid — tried in order, results merged with IoU dedup
DET_SCALES            = [(1280, 1280), (960, 960), (640, 640)]
IOU_DEDUP_THRESHOLD   = 0.45    # suppress duplicate detections across scales
FACE_DIM              = 512     # ArcFace embedding dimension
ADAFACE_DIM           = 512     # AdaFace embedding dimension
FUSED_FACE_DIM        = 1024    # ArcFace + AdaFace concatenated


# ════════════════════════════════════════════════════════════════
#  Utility functions
# ════════════════════════════════════════════════════════════════

def _resize_pil(img: Image.Image, max_side: int = MAX_IMAGE_SIZE) -> Image.Image:
    w, h = img.size
    if max(w, h) <= max_side:
        return img
    scale = max_side / max(w, h)
    return img.resize((int(w * scale), int(h * scale)), Image.LANCZOS)


def _img_hash(image_path: str) -> str:
    h = hashlib.md5()
    with open(image_path, "rb") as f:
        h.update(f.read(65536))
    return h.hexdigest()


def _crop_to_b64(
    img_bgr: np.ndarray,
    x1: int, y1: int, x2: int, y2: int,
    thumb_size: int = FACE_CROP_THUMB_SIZE,
) -> str:
    """Crop face from BGR image with 20% padding, return base64 JPEG thumbnail."""
    H, W = img_bgr.shape[:2]
    w, h = x2 - x1, y2 - y1
    pad_x = int(w * 0.20)
    pad_y = int(h * 0.20)
    cx1 = max(0, x1 - pad_x)
    cy1 = max(0, y1 - pad_y)
    cx2 = min(W, x2 + pad_x)
    cy2 = min(H, y2 + pad_y)
    crop = img_bgr[cy1:cy2, cx1:cx2]
    if crop.size == 0:
        return ""
    pil = Image.fromarray(crop[:, :, ::-1])          # BGR → RGB
    pil = pil.resize((thumb_size, thumb_size), Image.LANCZOS)
    buf = io.BytesIO()
    pil.save(buf, format="JPEG", quality=FACE_CROP_QUALITY)
    return base64.b64encode(buf.getvalue()).decode()


def _face_crop_for_adaface(
    img_bgr: np.ndarray,
    x1: int, y1: int, x2: int, y2: int,
) -> np.ndarray:
    """
    Crop and normalise face for AdaFace IR-50 input.
    Returns float32 numpy array (3, 112, 112) normalised to [-1, 1].
    """
    H, W = img_bgr.shape[:2]
    w, h = x2 - x1, y2 - y1
    pad_x = int(w * 0.10)
    pad_y = int(h * 0.10)
    cx1 = max(0, x1 - pad_x)
    cy1 = max(0, y1 - pad_y)
    cx2 = min(W, x2 + pad_x)
    cy2 = min(H, y2 + pad_y)
    crop = img_bgr[cy1:cy2, cx1:cx2]
    if crop.size == 0:
        return None
    rgb = crop[:, :, ::-1].copy()                   # BGR → RGB
    pil = Image.fromarray(rgb).resize((112, 112), Image.LANCZOS)
    arr = np.array(pil, dtype=np.float32) / 255.0
    arr = (arr - 0.5) / 0.5                          # normalise [-1, 1]
    return arr.transpose(2, 0, 1)                    # HWC → CHW



def _clahe_enhance(bgr: np.ndarray) -> np.ndarray:
    """CLAHE on luminance — improves detection on dark/washed/low-contrast photos."""
    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    l_eq = clahe.apply(l)
    return cv2.cvtColor(cv2.merge([l_eq, a, b]), cv2.COLOR_LAB2BGR)


def _iou(box_a: list, box_b: list) -> float:
    """IoU between two [x1,y1,x2,y2] boxes."""
    xa = max(box_a[0], box_b[0]); ya = max(box_a[1], box_b[1])
    xb = min(box_a[2], box_b[2]); yb = min(box_a[3], box_b[3])
    inter = max(0, xb - xa) * max(0, yb - ya)
    if inter == 0:
        return 0.0
    area_a = (box_a[2]-box_a[0]) * (box_a[3]-box_a[1])
    area_b = (box_b[2]-box_b[0]) * (box_b[3]-box_b[1])
    return inter / (area_a + area_b - inter)


def _dedup_faces(faces_list: list, iou_thresh: float = IOU_DEDUP_THRESHOLD) -> list:
    """Remove duplicate detections across scales/flips. Keep highest det_score."""
    if not faces_list:
        return []
    faces_list = sorted(faces_list, key=lambda f: float(f.det_score), reverse=True)
    kept = []
    for face in faces_list:
        b = face.bbox.astype(int)
        box = [b[0], b[1], b[2], b[3]]
        duplicate = any(_iou(box, [k.bbox.astype(int)[i] for i in range(4)]) > iou_thresh for k in kept)
        if not duplicate:
            kept.append(face)
    return kept

# ════════════════════════════════════════════════════════════════
#  AIModelManager — V4
# ════════════════════════════════════════════════════════════════

class AIModelManager:
    def __init__(self):
        self.device = (
            "cuda" if torch.cuda.is_available()
            else ("mps" if torch.backends.mps.is_available() else "cpu")
        )
        print(f"🚀 Loading models onto: {self.device.upper()}...")

        # ── Object Lane: SigLIP + DINOv2 (unchanged) ─────────────
        print("📦 Loading SigLIP...")
        self.siglip_processor = AutoProcessor.from_pretrained(
            "google/siglip-base-patch16-224", use_fast=True)
        self.siglip_model = AutoModel.from_pretrained(
            "google/siglip-base-patch16-224").to(self.device).eval()

        print("📦 Loading DINOv2...")
        self.dinov2_processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
        self.dinov2_model = AutoModel.from_pretrained(
            "facebook/dinov2-base").to(self.device).eval()

        if self.device == "cuda":
            self.siglip_model = self.siglip_model.half()
            self.dinov2_model = self.dinov2_model.half()

        # ── YOLO for object segmentation ─────────────────────────
        print("📦 Loading YOLO11n-seg...")
        self.yolo = YOLO("yolo11n-seg.pt")

        # ── Face Lane: InsightFace SCRFD + ArcFace-R100 ───────────
        # V4: ALWAYS use buffalo_l (SCRFD-10GF + ArcFace-R100)
        #     even on CPU — accuracy matters more than speed here.
        #     det_size=1280 catches faces as small as ~10px in source.
        self.face_app = None
        if INSIGHTFACE_AVAILABLE:
            try:
                print("📦 Loading InsightFace buffalo_l (SCRFD-10GF + ArcFace-R100)...")
                self.face_app = FaceAnalysis(
                    name="buffalo_l",
                    providers=(
                        ["CUDAExecutionProvider", "CPUExecutionProvider"]
                        if self.device == "cuda"
                        else ["CPUExecutionProvider"]
                    ),
                )
                self.face_app.prepare(
                    ctx_id=0 if self.device == "cuda" else -1,
                    det_size=DET_SIZE_PRIMARY,   # 1280×1280 — key for small faces
                )
                # Warmup
                test_img = np.zeros((112, 112, 3), dtype=np.uint8)
                self.face_app.get(test_img)
                print("✅ InsightFace buffalo_l loaded — SCRFD+ArcFace face lane ACTIVE")
                print(f"   det_size={DET_SIZE_PRIMARY} | quality_gate={FACE_QUALITY_GATE}")
            except Exception as e:
                print(f"❌ InsightFace init FAILED: {e}")
                print(traceback.format_exc())
                self.face_app = None
        else:
            print("❌ InsightFace NOT installed")

        # ── AdaFace IR-50 (CVPR 2022) — quality-adaptive fusion ───
        # Fused with ArcFace → 1024-D face vector
        # Weights: adaface_ir50_webface4m.ckpt from HuggingFace
        self.adaface_model = None
        self._load_adaface()

        # Thread safety for ONNX
        self._face_lock     = threading.Lock()
        self._cache         = {}
        self._cache_maxsize = 128
        adaface_status = "FULL FUSION u2705" if self.adaface_model else "ZERO-PADDED u26a0ufe0f  (AdaFace weights missing)"
        print("")
        print("u2705 Enterprise Lens V4 u2014 Models Ready")
        print(f"   Device            : {self.device.upper()}")
        print(f"   InsightFace       : buffalo_l (SCRFD-10GF + ArcFace-R100)")
        print(f"   AdaFace           : {adaface_status}")
        print(f"   Face vector dim   : {FUSED_FACE_DIM}  <- enterprise-faces MUST be {FUSED_FACE_DIM}-D")
        print(f"   Object vector dim : 1536  <- enterprise-objects MUST be 1536-D")
        print(f"   Quality gate      : det_score >= {FACE_QUALITY_GATE}, face_px >= {MIN_FACE_SIZE}")
        print(f"   Detection size    : {DET_SIZE_PRIMARY}")
        print("")

    def _load_adaface(self):
        """
        AdaFace IR-50 MS1MV2 — disabled for now.
        Face vectors use ArcFace(512) + zeros(512) = 1024-D.
        This is fully functional — cosine similarity works correctly.
        Re-enable by setting ENABLE_ADAFACE=1 env var when HF token
        injection into Docker build is confirmed working.
        """
        enable = os.getenv("ENABLE_ADAFACE", "0").strip() == "1"
        hf_token_present = bool(os.getenv("HF_TOKEN", "").strip())
        print(f"   ENABLE_ADAFACE={os.getenv('ENABLE_ADAFACE', 'NOT SET')}")
        print(f"   HF_TOKEN present={'YES' if hf_token_present else 'NO (not set or empty)'}")
        if not enable:
            print("⚠️  AdaFace disabled (ENABLE_ADAFACE != 1) — using ArcFace zero-padded 1024-D")
            self.adaface_model = None
            return

        # Full loading code kept here for when AdaFace is re-enabled
        import sys
        HF_TOKEN   = os.getenv("HF_TOKEN", None)
        REPO_ID    = "minchul/cvlface_adaface_ir50_ms1mv2"
        CACHE_PATH = os.path.expanduser("~/.cvlface_cache/minchul/cvlface_adaface_ir50_ms1mv2")
        try:
            from huggingface_hub import hf_hub_download
            print("📦 Loading AdaFace IR-50 MS1MV2...")
            os.makedirs(CACHE_PATH, exist_ok=True)
            hf_hub_download(repo_id=REPO_ID, filename="files.txt",
                token=HF_TOKEN, local_dir=CACHE_PATH, local_dir_use_symlinks=False)
            with open(os.path.join(CACHE_PATH, "files.txt")) as f:
                extra = [x.strip() for x in f.read().split("\n") if x.strip()]
            for fname in extra + ["config.json", "wrapper.py", "model.safetensors"]:
                fpath = os.path.join(CACHE_PATH, fname)
                if not os.path.exists(fpath):
                    hf_hub_download(repo_id=REPO_ID, filename=fname,
                        token=HF_TOKEN, local_dir=CACHE_PATH, local_dir_use_symlinks=False)
            cwd = os.getcwd()
            os.chdir(CACHE_PATH)
            sys.path.insert(0, CACHE_PATH)
            try:
                from transformers import AutoModel as _HF_AutoModel
                model = _HF_AutoModel.from_pretrained(
                    CACHE_PATH, trust_remote_code=True, token=HF_TOKEN)
            finally:
                os.chdir(cwd)
                if CACHE_PATH in sys.path: sys.path.remove(CACHE_PATH)
            model = model.to(self.device).eval()
            with torch.no_grad():
                out = model(torch.zeros(1, 3, 112, 112).to(self.device))
            emb = out if isinstance(out, torch.Tensor) else out.embedding
            assert emb.shape[-1] == ADAFACE_DIM
            self.adaface_model = model
            print(f"✅ AdaFace IR-50 loaded — 1024-D FULL FUSION active")
        except Exception as e:
            print(f"⚠️  AdaFace load failed: {e} — falling back to zero-padded 1024-D")
            self.adaface_model = None

        # ── Object Lane: batched SigLIP + DINOv2 embedding ───────────
    def _embed_crops_batch(self, crops: list) -> list:
        """Embed a list of PIL images → list of 1536-D numpy arrays."""
        if not crops:
            return []
        with torch.no_grad():
            # SigLIP
            sig_in = self.siglip_processor(images=crops, return_tensors="pt", padding=True)
            sig_in = {k: v.to(self.device) for k, v in sig_in.items()}
            if self.device == "cuda":
                sig_in = {k: v.half() if v.dtype == torch.float32 else v
                          for k, v in sig_in.items()}
            sig_out = self.siglip_model.get_image_features(**sig_in)
            # Handle all output types across transformers versions
            if hasattr(sig_out, "image_embeds"):
                sig_out = sig_out.image_embeds
            elif hasattr(sig_out, "pooler_output"):
                sig_out = sig_out.pooler_output
            elif hasattr(sig_out, "last_hidden_state"):
                sig_out = sig_out.last_hidden_state[:, 0, :]
            elif isinstance(sig_out, tuple):
                sig_out = sig_out[0]
            # sig_out is now a tensor
            if not isinstance(sig_out, torch.Tensor):
                sig_out = sig_out[0]
            sig_vecs = F.normalize(sig_out.float(), p=2, dim=1).cpu()

            # DINOv2
            dino_in = self.dinov2_processor(images=crops, return_tensors="pt")
            dino_in = {k: v.to(self.device) for k, v in dino_in.items()}
            if self.device == "cuda":
                dino_in = {k: v.half() if v.dtype == torch.float32 else v
                           for k, v in dino_in.items()}
            dino_out  = self.dinov2_model(**dino_in)
            dino_vecs = F.normalize(
                dino_out.last_hidden_state[:, 0, :].float(), p=2, dim=1).cpu()

            fused = F.normalize(torch.cat([sig_vecs, dino_vecs], dim=1), p=2, dim=1)
        return [fused[i].numpy() for i in range(len(crops))]

    # ── AdaFace embedding for a single face crop ─────────────────
    def _adaface_embed(self, face_arr_chw: np.ndarray) -> np.ndarray:
        """
        Run AdaFace IR-50 MS1MV2 on a preprocessed (3,112,112) float32 array.
        Input : CHW float32, normalised to [-1, 1]
        Output: 512-D L2-normalised numpy embedding, or None on failure.

        The cvlface model may return a tensor directly or an object
        with an .embedding attribute — both cases handled.
        """
        if self.adaface_model is None or face_arr_chw is None:
            return None
        try:
            t = torch.from_numpy(face_arr_chw).unsqueeze(0)  # (1,3,112,112)
            t = t.to(self.device)
            if self.device == "cuda":
                t = t.half()
            with torch.no_grad():
                out = self.adaface_model(t)
            # Handle both raw tensor and object-with-embedding outputs
            emb = out if isinstance(out, torch.Tensor) else out.embedding
            emb = F.normalize(emb.float(), p=2, dim=1)
            return emb[0].cpu().numpy()
        except Exception as e:
            print(f"⚠️  AdaFace inference error: {e}")
            return None

    # ── V4 Face detection + dual encoding ────────────────────────
    def _detect_and_encode_faces(self, img_np: np.ndarray) -> list:
        """
        Detect ALL faces using InsightFace SCRFD-10GF at 1280px.
        For each face:
          - ArcFace-R100 embedding (512-D, from InsightFace)
          - AdaFace IR-50 embedding (512-D, fused quality-adaptive)
          - Concatenate + L2-normalise → 1024-D final vector
          - Quality gate: det_score ≥ 0.60, face width ≥ 40px
          - Base64 thumbnail stored for UI

        Returns list of dicts with keys:
          type, vector (1024-D or 512-D), face_idx, bbox,
          face_crop, det_score, face_quality, face_width_px
        """
        if self.face_app is None:
            print("⚠️  face_app is None — InsightFace not loaded")
            return []

        try:
            # InsightFace expects BGR
            if img_np.dtype != np.uint8:
                img_np = (img_np * 255).astype(np.uint8)
            bgr = img_np[:, :, ::-1].copy() if img_np.shape[2] == 3 else img_np.copy()

            # ── Preprocessing: CLAHE contrast enhancement ─────────
            # Helps with dark/overexposed/low-contrast photos
            bgr_enhanced = _clahe_enhance(bgr)

            # ── Multi-scale + flip detection ──────────────────────
            # Run SCRFD at multiple resolutions AND on horizontally
            # flipped image. Catches faces that one scale/orientation misses.
            # Results are merged and deduplicated by IoU.
            all_raw_faces = []
            H, W = bgr.shape[:2]

            for scale in DET_SCALES:
                # Resize to this scale for detection
                scale_w = min(W, scale[0])
                scale_h = min(H, scale[1])
                if scale_w == W and scale_h == H:
                    bgr_scaled = bgr_enhanced
                else:
                    bgr_scaled = cv2.resize(bgr_enhanced, (scale_w, scale_h))

                print(f"🔍 SCRFD detection at {scale_w}×{scale_h}...")
                # Temporarily set det_size for this scale
                try:
                    self.face_app.det_model.input_size = scale
                    with self._face_lock:
                        faces_at_scale = self.face_app.get(bgr_scaled)
                    # Scale bboxes back to original dimensions
                    sx = W / scale_w; sy = H / scale_h
                    for f in faces_at_scale:
                        if sx != 1.0 or sy != 1.0:
                            f.bbox[0] *= sx; f.bbox[1] *= sy
                            f.bbox[2] *= sx; f.bbox[3] *= sy
                    all_raw_faces.extend(faces_at_scale)
                except Exception:
                    pass  # scale failed, continue

            # Horizontal flip pass — catches profile/turned faces
            bgr_flip = cv2.flip(bgr_enhanced, 1)
            try:
                self.face_app.det_model.input_size = DET_SIZE_PRIMARY
                with self._face_lock:
                    faces_flip = self.face_app.get(bgr_flip)
                # Mirror bboxes back to original orientation
                for f in faces_flip:
                    x1, y1, x2, y2 = f.bbox
                    f.bbox[0] = W - x2; f.bbox[2] = W - x1
                all_raw_faces.extend(faces_flip)
            except Exception:
                pass

            # Restore primary det_size
            self.face_app.det_model.input_size = DET_SIZE_PRIMARY

            # Deduplicate across scales and flip
            faces = _dedup_faces(all_raw_faces)
            print(f"   Raw detections: {len(all_raw_faces)} → after dedup: {len(faces)}")

            results  = []
            accepted = 0

            for idx, face in enumerate(faces):
                if accepted >= MAX_FACES_PER_IMAGE:
                    break

                # ── Bounding box ──────────────────────────────────
                bbox_raw = face.bbox.astype(int)
                x1, y1, x2, y2 = bbox_raw
                x1 = max(0, x1); y1 = max(0, y1)
                x2 = min(bgr.shape[1], x2); y2 = min(bgr.shape[0], y2)
                w, h = x2 - x1, y2 - y1
                if w <= 0 or h <= 0:
                    continue

                # ── Quality gate 1: minimum size ──────────────────
                if w < MIN_FACE_SIZE or h < MIN_FACE_SIZE:
                    print(f"   Face {idx}: SKIP — too small ({w}×{h}px)")
                    continue

                # ── Quality gate 2: detection confidence ──────────
                det_score = float(face.det_score) if hasattr(face, "det_score") else 1.0
                if det_score < FACE_QUALITY_GATE:
                    print(f"   Face {idx}: SKIP — low det_score ({det_score:.3f})")
                    continue

                # ── ArcFace embedding (from InsightFace) ──────────
                if face.embedding is None:
                    continue
                arcface_vec = face.embedding.astype(np.float32)
                n = np.linalg.norm(arcface_vec)
                if n > 0:
                    arcface_vec = arcface_vec / n

                # ── AdaFace embedding (quality-adaptive) ──────────
                face_chw   = _face_crop_for_adaface(bgr, x1, y1, x2, y2)
                adaface_vec = self._adaface_embed(face_chw)

                # ── Fuse: ArcFace + AdaFace → 1024-D ─────────────
                # ALWAYS output FUSED_FACE_DIM (1024) so Pinecone index
                # dimension never mismatches, regardless of AdaFace status.
                if adaface_vec is not None:
                    # Full fusion: ArcFace(512) + AdaFace(512) → 1024-D
                    fused_raw = np.concatenate([arcface_vec, adaface_vec])
                else:
                    # AdaFace unavailable — pad with zeros to maintain 1024-D
                    # The ArcFace half still carries full identity signal;
                    # zero padding is neutral and doesn't corrupt similarity.
                    print("   ⚠️  AdaFace unavailable — padding to 1024-D")
                    fused_raw = np.concatenate([arcface_vec,
                                                np.zeros(ADAFACE_DIM, dtype=np.float32)])
                n2 = np.linalg.norm(fused_raw)
                final_vec = (fused_raw / n2) if n2 > 0 else fused_raw
                vec_dim   = FUSED_FACE_DIM   # always 1024

                # ── Face crop thumbnail for UI ─────────────────────
                face_crop_b64 = _crop_to_b64(bgr, x1, y1, x2, y2)

                results.append({
                    "type":           "face",
                    "vector":         final_vec,
                    "vec_dim":        vec_dim,
                    "face_idx":       accepted,
                    "bbox":           [int(x1), int(y1), int(w), int(h)],
                    "face_crop":      face_crop_b64,
                    "det_score":      det_score,
                    "face_quality":   det_score,          # alias for metadata
                    "face_width_px":  int(w),
                })
                accepted += 1
                print(f"   Face {idx}: ACCEPTED — {w}×{h}px | "
                      f"det={det_score:.3f} | dim={vec_dim}")

            print(f"👤 {accepted} face(s) passed quality gate")
            return results

        except Exception as e:
            print(f"🟠 InsightFace error: {e}")
            print(traceback.format_exc()[-600:])
            return []

    # ── Main process_image ────────────────────────────────────────
    def process_image(
        self,
        image_path: str,
        is_query:     bool = False,
        detect_faces: bool = True,
    ) -> list:
        """
        Full pipeline for one image.

        Returns list of vector dicts:
          Face:   {type, vector (1024-D), face_idx, bbox, face_crop,
                   det_score, face_quality, face_width_px}
          Object: {type, vector (1536-D)}

        V4 changes vs V3:
          - SCRFD at 1280px (not 640) — catches small/group faces
          - buffalo_l always (not buffalo_sc on CPU)
          - ArcFace + AdaFace fused 1024-D vectors
          - Quality gate: det_score ≥ 0.60, width ≥ 40px
          - Multi-scale: detect at 1280, retry at 640 if 0 faces found
        """
        cache_key = f"{_img_hash(image_path)}_{detect_faces}_{is_query}"
        if cache_key in self._cache:
            print("⚡ Cache hit")
            return self._cache[cache_key]

        extracted    = []
        original_pil = Image.open(image_path).convert("RGB")
        img_np       = np.array(original_pil)     # RGB uint8
        faces_found  = False

        # ════════════════════════════════════════════════════════
        # FACE LANE
        # V4: Run at full resolution (up to 1280px) to catch small
        #     faces in group photos. If 0 faces detected, retry at
        #     the original resolution (multi-scale fallback).
        # ════════════════════════════════════════════════════════
        if detect_faces and self.face_app is not None:
            # Multi-scale + CLAHE + flip all handled inside _detect_and_encode_faces
            # Pass the full-resolution image — internal scaling handles the rest
            face_results = self._detect_and_encode_faces(img_np)

            if face_results:
                faces_found = True
                for fr in face_results:
                    extracted.append(fr)

        # ════════════════════════════════════════════════════════
        # OBJECT LANE
        # Always runs — even when faces are found.
        # PERSON-class YOLO crops are skipped when faces active
        # to avoid double-counting people.
        # ════════════════════════════════════════════════════════
        crops_pil    = [_resize_pil(original_pil, MAX_IMAGE_SIZE)]   # full image
        yolo_results = self.yolo(image_path, conf=0.5, verbose=False)

        for r in yolo_results:
            if r.masks is not None:
                for seg_idx, mask_xy in enumerate(r.masks.xy):
                    cls_id = int(r.boxes.cls[seg_idx].item())
                    if faces_found and cls_id == YOLO_PERSON_CLASS_ID:
                        continue
                    polygon = np.array(mask_xy, dtype=np.int32)
                    if len(polygon) < 3:
                        continue
                    x, y, w, h = cv2.boundingRect(polygon)
                    if w < 30 or h < 30:
                        continue
                    crop = original_pil.crop((x, y, x + w, y + h))
                    crops_pil.append(crop)
                    if len(crops_pil) >= MAX_CROPS + 1:
                        break
            elif r.boxes is not None:
                for box in r.boxes:
                    cls_id = int(box.cls.item())
                    if faces_found and cls_id == YOLO_PERSON_CLASS_ID:
                        continue
                    x1, y1, x2, y2 = box.xyxy[0].tolist()
                    if (x2 - x1) < 30 or (y2 - y1) < 30:
                        continue
                    crop = original_pil.crop((x1, y1, x2, y2))
                    crops_pil.append(crop)
            if len(crops_pil) >= MAX_CROPS + 1:
                break

        crops    = [_resize_pil(c, MAX_IMAGE_SIZE) for c in crops_pil]
        print(f"🧠 Embedding {len(crops)} object crop(s)...")
        obj_vecs = self._embed_crops_batch(crops)
        for vec in obj_vecs:
            extracted.append({"type": "object", "vector": vec})

        # Cache
        if len(self._cache) >= self._cache_maxsize:
            del self._cache[next(iter(self._cache))]
        self._cache[cache_key] = extracted
        return extracted

    async def process_image_async(
        self,
        image_path:   str,
        is_query:     bool = False,
        detect_faces: bool = True,
    ) -> list:
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(
            None,
            functools.partial(self.process_image, image_path, is_query, detect_faces),
        )