AdarshDRC commited on
Commit
3933e8f
·
1 Parent(s): ddf6252

refactor: new search and retrival statergy

Browse files
Files changed (2) hide show
  1. main.py +193 -110
  2. src/models.py +410 -140
main.py CHANGED
@@ -48,6 +48,20 @@ IDX_FACES = "enterprise-faces"
48
  MAX_FILES_PER_UPLOAD = 20 # cap to prevent memory corruption on large batches
49
  IDX_OBJECTS = "enterprise-objects"
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  # ════════════════════════════════════════════════════════════════
52
  # SUPABASE LOGGING — async, fire-and-forget, never crashes API
53
  # HF Space Secrets needed:
@@ -244,11 +258,13 @@ async def verify_keys(
244
  existing = {idx.name for idx in await asyncio.to_thread(pc.list_indexes)}
245
  tasks = []
246
  if IDX_OBJECTS not in existing:
247
- tasks.append(asyncio.to_thread(pc.create_index, name=IDX_OBJECTS, dimension=1536,
 
248
  metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")))
249
  indexes_created.append(IDX_OBJECTS)
250
  if IDX_FACES not in existing:
251
- tasks.append(asyncio.to_thread(pc.create_index, name=IDX_FACES, dimension=512,
 
252
  metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")))
253
  indexes_created.append(IDX_FACES)
254
  if tasks: await asyncio.gather(*tasks)
@@ -334,25 +350,25 @@ async def upload_new_images(
334
  for v in vectors:
335
  vec_list = v["vector"].tolist() if hasattr(v["vector"], "tolist") else v["vector"]
336
  if v["type"] == "face":
337
- # ── FACE STORE: ArcFace 512-D embedding
338
- # Metadata includes original image_url so we can
339
- # retrieve the full image after a face match
340
  face_upserts.append({
341
  "id": str(uuid.uuid4()),
342
  "values": vec_list,
343
  "metadata": {
344
- "image_url": image_url, # original full image
345
- "url": image_url, # alias for compatibility
346
- "folder": folder,
347
- "face_idx": v.get("face_idx", 0),
348
- "bbox": str(v.get("bbox", [])),
349
- "face_crop": v.get("face_crop", ""), # base64 thumb for UI
350
- "det_score": v.get("det_score", 1.0),
 
 
351
  }
352
  })
353
  else:
354
  # ── OBJECT STORE: SigLIP+DINOv2 1536-D fused embedding
355
- # Always stores full image — includes all crops + full image
356
  object_upserts.append({
357
  "id": str(uuid.uuid4()),
358
  "values": vec_list,
@@ -450,59 +466,75 @@ async def search_database(
450
  idx_obj = pc.Index(IDX_OBJECTS)
451
  idx_face = pc.Index(IDX_FACES)
452
 
453
- async def _query_one(vec_dict: dict):
454
- vec_list = vec_dict["vector"].tolist() if hasattr(vec_dict["vector"], "tolist") else vec_dict["vector"]
455
- target_idx = idx_face if vec_dict["type"] == "face" else idx_obj
 
 
 
 
 
 
 
 
 
456
  try:
457
- res = await asyncio.to_thread(target_idx.query, vector=vec_list, top_k=10, include_metadata=True)
 
458
  except Exception as e:
459
  if "404" in str(e):
460
- raise HTTPException(404, "Pinecone Index not found. Please log in and click 'Verify Keys' in Settings.")
461
  raise e
462
  out = []
463
  for match in res.get("matches", []):
464
- score = match["score"]
465
- is_face = vec_dict["type"] == "face"
466
- if is_face:
467
- if score < 0.35: continue
468
- ui_score = min(0.99, 0.75 + ((score - 0.35) / 0.65) * 0.24)
469
- else:
470
- if score < 0.45: continue
471
- ui_score = score
472
- out.append({"url": match["metadata"].get("url") or match["metadata"].get("image_url", ""),
473
- "score": round(ui_score, 4),
474
- "caption": "👤 Verified Identity" if is_face else match["metadata"].get("folder", "🎯 Object Match")})
475
  return out
476
 
477
- # ── V3: separate face vectors from object vectors ────────
478
- face_vectors = [v for v in vectors if v["type"] == "face"]
479
- object_vectors = [v for v in vectors if v["type"] == "object"]
480
-
481
  if detect_faces and face_vectors:
482
- # ══════════════════════════════════════════════════════
483
- # FACE MODE — Linked two-index retrieval:
484
- #
485
- # Step 1: Query enterprise-FACES (512-D ArcFace)
486
- # → find which images contain a matching face
487
- # → get image_urls of those matched images
488
  #
489
- # Step 2: For each matched image_url, fetch its full
490
- # object vector from enterprise-OBJECTS
491
- # ensures we return the complete original image
492
- # object index has full scene context
 
 
 
 
 
493
  #
494
- # Result: Face identity match → full image returned
495
- # ══════════════════════════════════════════════════════
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
 
497
- async def _query_face_group(face_vec: dict) -> dict:
498
- vec_list = face_vec["vector"].tolist() if hasattr(face_vec["vector"], "tolist") else face_vec["vector"]
499
-
500
- # ── STEP 1: Search enterprise-FACES index ────────
501
  try:
502
  face_res = await asyncio.to_thread(
503
  idx_face.query,
504
  vector=vec_list,
505
- top_k=20,
506
  include_metadata=True,
507
  )
508
  except Exception as e:
@@ -510,86 +542,134 @@ async def search_database(
510
  raise HTTPException(404, "Pinecone index not found. Go to Settings → Verify & Save.")
511
  raise e
512
 
513
- # Collect matched image_urls with their face scores
514
- # image_url is the key linking face index → object index
515
- face_matched = {} # image_url {raw_score, face_crop, folder}
 
516
  for match in face_res.get("matches", []):
517
- raw_score = match["score"]
518
- if raw_score < 0.35: # ArcFace threshold (same person)
519
  continue
520
- image_url_match = (
521
- match["metadata"].get("url") or
522
- match["metadata"].get("image_url", "")
523
- )
524
- if not image_url_match:
525
  continue
526
- # Keep highest face score per image_url
527
- if image_url_match not in face_matched or raw_score > face_matched[image_url_match]["raw_score"]:
528
- face_matched[image_url_match] = {
529
- "raw_score": raw_score,
530
- "face_crop": match["metadata"].get("face_crop", ""),
531
- "folder": match["metadata"].get("folder", ""),
 
532
  }
533
 
534
- if not face_matched:
535
- return {
536
- "query_face_idx": face_vec.get("face_idx", 0),
537
- "query_face_crop": face_vec.get("face_crop", ""),
538
- "det_score": face_vec.get("det_score", 1.0),
539
- "matches": [],
 
 
 
 
 
 
 
 
540
  }
541
-
542
- # ── STEP 2: Fetch full images from enterprise-OBJECTS ─
543
- # Filter enterprise-objects by the matched image_urls
544
- # This gives us the complete original image for display
545
- matched_urls = list(face_matched.keys())
546
-
547
- # Build results using face scores but returning full images
548
- matches = []
549
- for image_url_match, face_data in face_matched.items():
550
- raw_score = face_data["raw_score"]
551
-
552
- # Remap ArcFace cosine (0.35–1.0) → UI percentage (75%–99%)
553
- ui_score = 0.75 + ((raw_score - 0.35) / (1.0 - 0.35)) * 0.24
554
- ui_score = min(0.99, ui_score)
555
-
556
- matches.append({
557
- "url": image_url_match, # full original image URL
558
- "score": round(ui_score, 4),
559
- "raw_score": round(raw_score, 4),
560
- "face_crop": face_data["face_crop"], # matched face thumbnail
561
- "folder": face_data["folder"],
562
- "caption": "👤 Verified Identity",
563
- })
564
 
565
  return {
566
  "query_face_idx": face_vec.get("face_idx", 0),
567
  "query_face_crop": face_vec.get("face_crop", ""),
568
- "det_score": face_vec.get("det_score", 1.0),
569
- "matches": sorted(matches, key=lambda x: x["score"], reverse=True)[:10],
 
 
570
  }
571
 
572
- face_groups = await asyncio.gather(*[_query_face_group(fv) for fv in face_vectors])
573
- face_groups = [g for g in face_groups if g["matches"]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
 
575
  duration_ms = round((time.perf_counter() - start) * 1000)
576
- total_matches = sum(len(g["matches"]) for g in face_groups)
577
  log("INFO", "search.complete",
578
  user_id=user_id or "anonymous", ip=ip, mode=mode,
579
  lanes=["face"], detect_faces=detect_faces,
580
- face_groups=len(face_groups), results_count=total_matches,
581
- top_score=face_groups[0]["matches"][0]["score"] if face_groups and face_groups[0]["matches"] else 0,
 
582
  duration_ms=duration_ms)
583
 
584
  return {
585
- "mode": "face",
586
- "face_groups": list(face_groups),
587
- "results": [],
588
  }
589
 
590
  else:
591
- # ── OBJECT MODE: original flat results ──────────────
592
- nested = await asyncio.gather(*[_query_one(v) for v in vectors])
 
 
 
593
  all_results = [r for sub in nested for r in sub]
594
  seen = {}
595
  for r in all_results:
@@ -602,7 +682,8 @@ async def search_database(
602
  log("INFO", "search.complete",
603
  user_id=user_id or "anonymous", ip=ip, mode=mode,
604
  lanes=lanes_used, detect_faces=detect_faces,
605
- results_count=len(final), top_score=final[0]["score"] if final else 0,
 
606
  duration_ms=duration_ms)
607
 
608
  return {"mode": "object", "results": final, "face_groups": []}
@@ -869,9 +950,11 @@ async def reset_database(
869
  if tasks: await asyncio.gather(*tasks)
870
  await asyncio.sleep(3) # wait for Pinecone to fully delete
871
  await asyncio.gather(
872
- asyncio.to_thread(pc.create_index, name=IDX_OBJECTS, dimension=1536, metric="cosine",
 
873
  spec=ServerlessSpec(cloud="aws", region="us-east-1")),
874
- asyncio.to_thread(pc.create_index, name=IDX_FACES, dimension=512, metric="cosine",
 
875
  spec=ServerlessSpec(cloud="aws", region="us-east-1")),
876
  )
877
  except Exception as e:
 
48
  MAX_FILES_PER_UPLOAD = 20 # cap to prevent memory corruption on large batches
49
  IDX_OBJECTS = "enterprise-objects"
50
 
51
+ # ── V4 index dimensions ───────────────────────────────────────────
52
+ # enterprise-faces : 1024-D (ArcFace 512 + AdaFace 512, fused)
53
+ # enterprise-objects: 1536-D (SigLIP 768 + DINOv2 768, fused)
54
+ # ⚠️ If upgrading from V3 (512-D faces), you MUST reset the
55
+ # enterprise-faces index via Settings → Danger Zone → Reset DB
56
+ IDX_FACES_DIM = int(os.getenv("IDX_FACES_DIM", "1024"))
57
+ IDX_OBJECTS_DIM = int(os.getenv("IDX_OBJECTS_DIM", "1536"))
58
+
59
+ # V4 face search thresholds
60
+ # Cosine similarity thresholds for the fused 1024-D ArcFace+AdaFace space
61
+ FACE_THRESHOLD_HIGH = 0.40 # high-quality faces (det_score ≥ 0.85)
62
+ FACE_THRESHOLD_LOW = 0.32 # lower-quality faces (det_score < 0.85)
63
+ FACE_TOP_K_FETCH = 50 # fetch more candidates, filter after merge
64
+
65
  # ════════════════════════════════════════════════════════════════
66
  # SUPABASE LOGGING — async, fire-and-forget, never crashes API
67
  # HF Space Secrets needed:
 
258
  existing = {idx.name for idx in await asyncio.to_thread(pc.list_indexes)}
259
  tasks = []
260
  if IDX_OBJECTS not in existing:
261
+ tasks.append(asyncio.to_thread(pc.create_index, name=IDX_OBJECTS,
262
+ dimension=IDX_OBJECTS_DIM, # 1536-D SigLIP+DINOv2
263
  metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")))
264
  indexes_created.append(IDX_OBJECTS)
265
  if IDX_FACES not in existing:
266
+ tasks.append(asyncio.to_thread(pc.create_index, name=IDX_FACES,
267
+ dimension=IDX_FACES_DIM, # 1024-D ArcFace+AdaFace (V4)
268
  metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")))
269
  indexes_created.append(IDX_FACES)
270
  if tasks: await asyncio.gather(*tasks)
 
350
  for v in vectors:
351
  vec_list = v["vector"].tolist() if hasattr(v["vector"], "tolist") else v["vector"]
352
  if v["type"] == "face":
353
+ # ── FACE STORE: ArcFace+AdaFace 1024-D fused embedding
354
+ # V4: includes face_quality + face_width_px for retrieval scoring
 
355
  face_upserts.append({
356
  "id": str(uuid.uuid4()),
357
  "values": vec_list,
358
  "metadata": {
359
+ "image_url": image_url,
360
+ "url": image_url,
361
+ "folder": folder,
362
+ "face_idx": v.get("face_idx", 0),
363
+ "bbox": str(v.get("bbox", [])),
364
+ "face_crop": v.get("face_crop", ""),
365
+ "det_score": v.get("det_score", 1.0),
366
+ "face_quality": v.get("face_quality", v.get("det_score", 1.0)),
367
+ "face_width_px": v.get("face_width_px", 0),
368
  }
369
  })
370
  else:
371
  # ── OBJECT STORE: SigLIP+DINOv2 1536-D fused embedding
 
372
  object_upserts.append({
373
  "id": str(uuid.uuid4()),
374
  "values": vec_list,
 
466
  idx_obj = pc.Index(IDX_OBJECTS)
467
  idx_face = pc.Index(IDX_FACES)
468
 
469
+ # ── V4: split vectors by type ────────────────────────────
470
+ face_vectors = [v for v in vectors if v["type"] == "face"]
471
+ object_vectors = [v for v in vectors if v["type"] == "object"]
472
+
473
+ # ════════════════════════════════════════════════════════
474
+ # OBJECT MODE helper
475
+ # Used when no faces detected or face search disabled.
476
+ # ════════════════════════════════════════════════════════
477
+ async def _query_object_one(vec_dict: dict):
478
+ vec_list = (vec_dict["vector"].tolist()
479
+ if hasattr(vec_dict["vector"], "tolist")
480
+ else vec_dict["vector"])
481
  try:
482
+ res = await asyncio.to_thread(
483
+ idx_obj.query, vector=vec_list, top_k=10, include_metadata=True)
484
  except Exception as e:
485
  if "404" in str(e):
486
+ raise HTTPException(404, "Pinecone Index not found. Go to Settings Verify & Save.")
487
  raise e
488
  out = []
489
  for match in res.get("matches", []):
490
+ if match["score"] < 0.45:
491
+ continue
492
+ out.append({
493
+ "url": match["metadata"].get("url") or match["metadata"].get("image_url", ""),
494
+ "score": round(match["score"], 4),
495
+ "caption": match["metadata"].get("folder", "🎯 Visual Match"),
496
+ })
 
 
 
 
497
  return out
498
 
 
 
 
 
499
  if detect_faces and face_vectors:
500
+ # ════════════════════════════════════════════════════
501
+ # V4 FACE MODE — Multi-face merge retrieval
 
 
 
 
502
  #
503
+ # For a group photo with N detected faces:
504
+ # 1. Query enterprise-faces for EACH face (top_k=50)
505
+ # 2. Build a global image_url match_data map
506
+ # An image is included if ANY face matches
507
+ # • Score = highest matching face score for that image
508
+ # • Track WHICH face indices matched each image
509
+ # 3. Group results PER query face (for UI display)
510
+ # 4. Also build a "cross-face" flat list:
511
+ # images that matched multiple faces rank higher
512
  #
513
+ # Threshold logic:
514
+ # High-quality face (det_score ≥ 0.85) → threshold 0.40
515
+ # Lower-quality face → threshold 0.32
516
+ # (Fused 1024-D space has different cosine distribution
517
+ # than raw ArcFace 512-D — thresholds adjusted accordingly)
518
+ # ═══════════════��════════════════════════════════════
519
+
520
+ async def _query_single_face(face_vec: dict) -> dict:
521
+ """
522
+ Query enterprise-faces for one detected face.
523
+ Returns per-face result group for UI + raw match map.
524
+ """
525
+ vec_list = (face_vec["vector"].tolist()
526
+ if hasattr(face_vec["vector"], "tolist")
527
+ else face_vec["vector"])
528
+
529
+ # Adaptive threshold: high-quality → stricter
530
+ det_score = face_vec.get("det_score", 1.0)
531
+ threshold = FACE_THRESHOLD_HIGH if det_score >= 0.85 else FACE_THRESHOLD_LOW
532
 
 
 
 
 
533
  try:
534
  face_res = await asyncio.to_thread(
535
  idx_face.query,
536
  vector=vec_list,
537
+ top_k=FACE_TOP_K_FETCH,
538
  include_metadata=True,
539
  )
540
  except Exception as e:
 
542
  raise HTTPException(404, "Pinecone index not found. Go to Settings → Verify & Save.")
543
  raise e
544
 
545
+ # Collect matches keep BEST score per image_url
546
+ # (multiple face vectors stored per image during upload,
547
+ # we only want the best matching one per image)
548
+ image_map = {} # image_url → best match data
549
  for match in face_res.get("matches", []):
550
+ raw = match["score"]
551
+ if raw < threshold:
552
  continue
553
+ url = (match["metadata"].get("url") or
554
+ match["metadata"].get("image_url", ""))
555
+ if not url:
 
 
556
  continue
557
+ if url not in image_map or raw > image_map[url]["raw_score"]:
558
+ image_map[url] = {
559
+ "raw_score": raw,
560
+ "face_crop": match["metadata"].get("face_crop", ""),
561
+ "folder": match["metadata"].get("folder", ""),
562
+ "face_quality": match["metadata"].get("face_quality", 1.0),
563
+ "face_width_px": match["metadata"].get("face_width_px", 0),
564
  }
565
 
566
+ # Remap raw cosine → UI score (75%–99%)
567
+ # Range is now 0.32–1.0 (wider than old 0.35–1.0)
568
+ def _ui_score(raw: float) -> float:
569
+ lo, hi = FACE_THRESHOLD_LOW, 1.0
570
+ return round(min(0.99, 0.75 + ((raw - lo) / (hi - lo)) * 0.24), 4)
571
+
572
+ matches = [
573
+ {
574
+ "url": url,
575
+ "score": _ui_score(d["raw_score"]),
576
+ "raw_score": round(d["raw_score"], 4),
577
+ "face_crop": d["face_crop"],
578
+ "folder": d["folder"],
579
+ "caption": "👤 Verified Identity",
580
  }
581
+ for url, d in image_map.items()
582
+ ]
583
+ matches = sorted(matches, key=lambda x: x["score"], reverse=True)[:15]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
 
585
  return {
586
  "query_face_idx": face_vec.get("face_idx", 0),
587
  "query_face_crop": face_vec.get("face_crop", ""),
588
+ "det_score": det_score,
589
+ "face_width_px": face_vec.get("face_width_px", 0),
590
+ "matches": matches,
591
+ "_image_map": image_map, # used for cross-face merge below
592
  }
593
 
594
+ # Query all faces in parallel
595
+ raw_groups = await asyncio.gather(
596
+ *[_query_single_face(fv) for fv in face_vectors])
597
+
598
+ # ── Cross-face merge ────────────────────────────────
599
+ # Build a global image → {best_score, matched_face_indices}
600
+ # An image appearing for multiple faces gets a boost:
601
+ # final_score = best_face_score * (1 + 0.05 * extra_face_count)
602
+ # This makes images with multiple searched people rank higher.
603
+ global_image_map = {} # url → {score, matched_faces, face_crop, folder}
604
+ for gi, group in enumerate(raw_groups):
605
+ for url, d in group["_image_map"].items():
606
+ raw = d["raw_score"]
607
+ if url not in global_image_map:
608
+ global_image_map[url] = {
609
+ "raw_score": raw,
610
+ "face_crop": d["face_crop"],
611
+ "folder": d["folder"],
612
+ "matched_faces": [gi],
613
+ }
614
+ else:
615
+ existing = global_image_map[url]
616
+ existing["matched_faces"].append(gi)
617
+ if raw > existing["raw_score"]:
618
+ existing["raw_score"] = raw
619
+ existing["face_crop"] = d["face_crop"]
620
+
621
+ # Apply multi-face boost and build flat merged list
622
+ def _boosted_ui_score(raw: float, n_faces: int) -> float:
623
+ lo = FACE_THRESHOLD_LOW
624
+ base = 0.75 + ((raw - lo) / (1.0 - lo)) * 0.24
625
+ boosted = base * (1.0 + 0.05 * (n_faces - 1))
626
+ return round(min(0.99, boosted), 4)
627
+
628
+ merged_results = []
629
+ for url, d in global_image_map.items():
630
+ n = len(d["matched_faces"])
631
+ merged_results.append({
632
+ "url": url,
633
+ "score": _boosted_ui_score(d["raw_score"], n),
634
+ "raw_score": round(d["raw_score"], 4),
635
+ "face_crop": d["face_crop"],
636
+ "folder": d["folder"],
637
+ "matched_faces": d["matched_faces"],
638
+ "caption": (f"👥 {n} faces matched" if n > 1
639
+ else "👤 Verified Identity"),
640
+ })
641
+ merged_results = sorted(
642
+ merged_results, key=lambda x: x["score"], reverse=True)[:20]
643
+
644
+ # Clean per-group results (remove internal _image_map)
645
+ face_groups = []
646
+ for g in raw_groups:
647
+ clean = {k: v for k, v in g.items() if k != "_image_map"}
648
+ if clean["matches"]:
649
+ face_groups.append(clean)
650
 
651
  duration_ms = round((time.perf_counter() - start) * 1000)
652
+ total_matches = len(merged_results)
653
  log("INFO", "search.complete",
654
  user_id=user_id or "anonymous", ip=ip, mode=mode,
655
  lanes=["face"], detect_faces=detect_faces,
656
+ face_groups=len(face_groups),
657
+ merged_results=total_matches,
658
+ top_score=merged_results[0]["score"] if merged_results else 0,
659
  duration_ms=duration_ms)
660
 
661
  return {
662
+ "mode": "face",
663
+ "face_groups": face_groups, # per-face results for UI tabs
664
+ "results": merged_results, # V4: flat merged cross-face list
665
  }
666
 
667
  else:
668
+ # ════════════════════════════════════════════════════
669
+ # OBJECT MODE flat ranked results from object index
670
+ # ════════════════════════════════════════════════════
671
+ nested = await asyncio.gather(
672
+ *[_query_object_one(v) for v in vectors])
673
  all_results = [r for sub in nested for r in sub]
674
  seen = {}
675
  for r in all_results:
 
682
  log("INFO", "search.complete",
683
  user_id=user_id or "anonymous", ip=ip, mode=mode,
684
  lanes=lanes_used, detect_faces=detect_faces,
685
+ results_count=len(final),
686
+ top_score=final[0]["score"] if final else 0,
687
  duration_ms=duration_ms)
688
 
689
  return {"mode": "object", "results": final, "face_groups": []}
 
950
  if tasks: await asyncio.gather(*tasks)
951
  await asyncio.sleep(3) # wait for Pinecone to fully delete
952
  await asyncio.gather(
953
+ asyncio.to_thread(pc.create_index, name=IDX_OBJECTS,
954
+ dimension=IDX_OBJECTS_DIM, metric="cosine", # 1536-D
955
  spec=ServerlessSpec(cloud="aws", region="us-east-1")),
956
+ asyncio.to_thread(pc.create_index, name=IDX_FACES,
957
+ dimension=IDX_FACES_DIM, metric="cosine", # 1024-D V4
958
  spec=ServerlessSpec(cloud="aws", region="us-east-1")),
959
  )
960
  except Exception as e:
src/models.py CHANGED
@@ -1,11 +1,15 @@
1
- # src/models.py — Enterprise Lens V3
2
  # ════════════════════════════════════════════════════════════════════
3
- # Face Lane : InsightFace (YuNet detection + ArcFace 512-D encoding)
4
- # Replaces DeepFace + RetinaFace + GhostFaceNet entirely
5
- # • 3-5x faster on CPU, handles small faces (≥20×20 px)
6
- # • Stores one 512-D vector PER face (not per image)
7
- # • Each vector carries a base64 face-crop thumbnail
8
- # Object Lane: SigLIP + DINOv2 fused 1536-D (unchanged from V2)
 
 
 
 
9
  # ════════════════════════════════════════════════════════════════════
10
 
11
  import os
@@ -16,11 +20,13 @@ import base64
16
  import functools
17
  import hashlib
18
  import io
 
 
19
 
20
  import cv2
21
  import numpy as np
22
- import threading
23
  import torch
 
24
  import torch.nn.functional as F
25
  from PIL import Image
26
  from transformers import AutoImageProcessor, AutoModel, AutoProcessor
@@ -34,16 +40,128 @@ try:
34
  except ImportError:
35
  INSIGHTFACE_AVAILABLE = False
36
  print("⚠️ insightface not installed — face lane disabled")
 
 
37
 
38
- # ── Constants ─────────────────────────────────────────────────────
39
- YOLO_PERSON_CLASS_ID = 0
40
- MIN_FACE_SIZE = 20 # minimum face width/height in pixels
41
- MAX_FACES_PER_IMAGE = 10 # cap faces per image for upload
42
- MAX_CROPS = 6 # max YOLO object crops per image
43
- MAX_IMAGE_SIZE = 640 # resize longest edge before inference (V3: 640 vs V2: 512)
44
- FACE_CROP_THUMB_SIZE = 112 # face thumbnail size stored in Pinecone metadata
45
- FACE_CROP_QUALITY = 75 # JPEG quality for stored thumbnails
 
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  def _resize_pil(img: Image.Image, max_side: int = MAX_IMAGE_SIZE) -> Image.Image:
49
  w, h = img.size
@@ -60,42 +178,76 @@ def _img_hash(image_path: str) -> str:
60
  return h.hexdigest()
61
 
62
 
63
- def _crop_to_b64(img_np: np.ndarray, bbox: list, thumb_size: int = FACE_CROP_THUMB_SIZE) -> str:
64
- """Crop face from image, resize to thumbnail, return as base64 JPEG string."""
65
- x, y, w, h = bbox
66
- x, y = max(0, x), max(0, y)
67
- # Add 20% padding for more natural face crop
68
- pad_x = int(w * 0.2)
69
- pad_y = int(h * 0.2)
70
- x1 = max(0, x - pad_x)
71
- y1 = max(0, y - pad_y)
72
- x2 = min(img_np.shape[1], x + w + pad_x)
73
- y2 = min(img_np.shape[0], y + h + pad_y)
74
- face_crop = img_np[y1:y2, x1:x2]
75
- if face_crop.size == 0:
 
 
 
76
  return ""
77
- # Resize to thumbnail
78
- face_pil = Image.fromarray(face_crop[..., ::-1]) # BGR → RGB
79
- face_pil = face_pil.resize((thumb_size, thumb_size), Image.LANCZOS)
80
  buf = io.BytesIO()
81
- face_pil.save(buf, format="JPEG", quality=FACE_CROP_QUALITY)
82
  return base64.b64encode(buf.getvalue()).decode()
83
 
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  class AIModelManager:
86
  def __init__(self):
87
  self.device = (
88
  "cuda" if torch.cuda.is_available()
89
  else ("mps" if torch.backends.mps.is_available() else "cpu")
90
  )
91
- print(f"Loading models onto: {self.device.upper()}...")
92
 
93
  # ── Object Lane: SigLIP + DINOv2 (unchanged) ─────────────
 
94
  self.siglip_processor = AutoProcessor.from_pretrained(
95
  "google/siglip-base-patch16-224", use_fast=True)
96
  self.siglip_model = AutoModel.from_pretrained(
97
  "google/siglip-base-patch16-224").to(self.device).eval()
98
 
 
99
  self.dinov2_processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
100
  self.dinov2_model = AutoModel.from_pretrained(
101
  "facebook/dinov2-base").to(self.device).eval()
@@ -105,47 +257,97 @@ class AIModelManager:
105
  self.dinov2_model = self.dinov2_model.half()
106
 
107
  # ── YOLO for object segmentation ─────────────────────────
 
108
  self.yolo = YOLO("yolo11n-seg.pt")
109
 
110
- # ── Face Lane: InsightFace (YuNet + ArcFace) ─────────────
 
 
 
111
  self.face_app = None
112
- print(f"🔍 INSIGHTFACE_AVAILABLE = {INSIGHTFACE_AVAILABLE}")
113
  if INSIGHTFACE_AVAILABLE:
114
  try:
115
- import insightface
116
- print(f"🔍 InsightFace version: {insightface.__version__}")
117
- model_name = "buffalo_l" if self.device == "cuda" else "buffalo_sc"
118
- print(f"🔍 Loading InsightFace model: {model_name}")
119
- self.face_app = FaceAnalysis(name=model_name)
 
 
 
 
120
  self.face_app.prepare(
121
  ctx_id=0 if self.device == "cuda" else -1,
122
- det_size=(640, 640),
123
  )
124
- # Test with a blank image to confirm models loaded
125
- import numpy as _np
126
- test_img = _np.zeros((112, 112, 3), dtype=_np.uint8)
127
- _ = self.face_app.get(test_img)
128
- print(f"✅ InsightFace ({model_name}) loaded — ArcFace face lane ACTIVE")
129
  except Exception as e:
130
- import traceback
131
  print(f"❌ InsightFace init FAILED: {e}")
132
  print(traceback.format_exc())
133
  self.face_app = None
134
  else:
135
- print("❌ InsightFace NOT installed — run: pip install insightface onnxruntime")
136
-
137
- self._cache = {}
138
- self._cache_maxsize = 128
139
- # InsightFace ONNX runtime is NOT thread-safe
140
- # This lock ensures only one inference runs at a time
141
- self._face_lock = threading.Lock()
142
- print("✅ Models ready!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
- # ── Object Lane batched embedding ────────────────────────────
145
  def _embed_crops_batch(self, crops: list) -> list:
 
146
  if not crops:
147
  return []
148
  with torch.no_grad():
 
149
  sig_in = self.siglip_processor(images=crops, return_tensors="pt", padding=True)
150
  sig_in = {k: v.to(self.device) for k, v in sig_in.items()}
151
  if self.device == "cuda":
@@ -156,8 +358,9 @@ class AIModelManager:
156
  elif isinstance(sig_out, tuple): sig_out = sig_out[0]
157
  sig_vecs = F.normalize(sig_out.float(), p=2, dim=1).cpu()
158
 
159
- dino_in = self.dinov2_processor(images=crops, return_tensors="pt")
160
- dino_in = {k: v.to(self.device) for k, v in dino_in.items()}
 
161
  if self.device == "cuda":
162
  dino_in = {k: v.half() if v.dtype == torch.float32 else v
163
  for k, v in dino_in.items()}
@@ -168,141 +371,210 @@ class AIModelManager:
168
  fused = F.normalize(torch.cat([sig_vecs, dino_vecs], dim=1), p=2, dim=1)
169
  return [fused[i].numpy() for i in range(len(crops))]
170
 
171
- # ── V3 Face detection + encoding ─────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  def _detect_and_encode_faces(self, img_np: np.ndarray) -> list:
173
  """
174
- Detect ALL faces in image using InsightFace (YuNet + ArcFace).
175
- Returns list of dicts:
176
- {
177
- "type": "face",
178
- "vector": np.ndarray (512-D ArcFace embedding),
179
- "face_idx": int,
180
- "bbox": [x, y, w, h],
181
- "face_crop": str (base64 JPEG thumbnail),
182
- "det_score": float (detection confidence)
183
- }
 
184
  """
185
  if self.face_app is None:
186
- print("⚠️ face_app is None — InsightFace not loaded!")
187
  return []
188
 
189
  try:
190
- print(f"🔍 Running InsightFace on image shape: {img_np.shape}")
191
- # InsightFace expects BGR numpy array
192
- if img_np.shape[2] == 3 and img_np.dtype == np.uint8:
193
- bgr = img_np[..., ::-1].copy() # RGB BGR
194
- else:
195
- bgr = img_np.copy()
196
 
 
197
  with self._face_lock:
198
  faces = self.face_app.get(bgr)
199
- print(f"🔍 InsightFace raw detection: {len(faces)} faces found")
200
- results = []
 
 
201
 
202
  for idx, face in enumerate(faces):
203
- if idx >= MAX_FACES_PER_IMAGE:
204
  break
205
 
206
- # Get bounding box
207
- bbox = face.bbox.astype(int) # [x1, y1, x2, y2]
208
- x1, y1, x2, y2 = bbox
 
 
209
  w, h = x2 - x1, y2 - y1
 
 
210
 
211
- # Skip tiny faces
212
  if w < MIN_FACE_SIZE or h < MIN_FACE_SIZE:
 
213
  continue
214
 
215
- # Get ArcFace embedding (already L2-normalised by InsightFace)
216
- if face.embedding is None:
 
 
217
  continue
218
- vec = face.embedding.astype(np.float32)
219
- # Re-normalise just to be safe
220
- norm = np.linalg.norm(vec)
221
- if norm > 0:
222
- vec = vec / norm
223
 
224
- # Generate face crop thumbnail for UI
225
- face_crop_b64 = _crop_to_b64(
226
- bgr, [x1, y1, w, h], FACE_CROP_THUMB_SIZE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
  results.append({
229
- "type": "face",
230
- "vector": vec,
231
- "face_idx": idx,
232
- "bbox": [int(x1), int(y1), int(w), int(h)],
233
- "face_crop": face_crop_b64,
234
- "det_score": float(face.det_score) if hasattr(face, "det_score") else 1.0,
 
 
 
235
  })
 
 
 
236
 
237
- print(f"👤 Detected {len(results)} face(s) via InsightFace ArcFace")
238
  return results
239
 
240
  except Exception as e:
241
- print(f"🟠 InsightFace error: {e} — falling back to object lane")
 
242
  return []
243
 
244
  # ── Main process_image ────────────────────────────────────────
245
  def process_image(
246
  self,
247
  image_path: str,
248
- is_query: bool = False,
249
  detect_faces: bool = True,
250
  ) -> list:
251
  """
252
- Returns list of vector dicts for upload or search.
253
-
254
- Upload mode (is_query=False):
255
- - Face vectors include bbox + face_crop for Pinecone metadata
256
- - Object vectors include full-image + YOLO crops
257
-
258
- Query mode (is_query=True):
259
- - Same structure — main.py handles grouping for search response
 
 
 
 
 
260
  """
261
  cache_key = f"{_img_hash(image_path)}_{detect_faces}_{is_query}"
262
  if cache_key in self._cache:
263
- print("⚡ Cache hit — skipping inference")
264
  return self._cache[cache_key]
265
 
266
  extracted = []
267
  original_pil = Image.open(image_path).convert("RGB")
268
- img_np = np.array(original_pil) # RGB, uint8
269
  faces_found = False
270
 
271
- # ── FACE LANE ────────────────────────────────────────────
272
- if detect_faces:
273
- # Resize for face detection (640px for small face detection)
274
- detect_pil = _resize_pil(original_pil, 640)
275
- detect_np = np.array(detect_pil)
276
-
277
- face_results = self._detect_and_encode_faces(detect_np)
 
 
 
 
 
 
 
 
 
 
278
 
279
  if face_results:
280
  faces_found = True
281
- # Scale bbox back to original image size if resized
282
- scale_x = original_pil.width / detect_pil.width
283
- scale_y = original_pil.height / detect_pil.height
284
  for fr in face_results:
285
- if scale_x != 1.0 or scale_y != 1.0:
286
  bx, by, bw, bh = fr["bbox"]
287
  fr["bbox"] = [
288
- int(bx * scale_x), int(by * scale_y),
289
- int(bw * scale_x), int(bh * scale_y),
290
  ]
291
  extracted.append(fr)
292
 
293
- # ── OBJECT LANE ──────────────────────────────────────────
294
- # Always run object lane — even if faces found
295
- # (image may contain both people and objects)
296
- crops_pil = [_resize_pil(original_pil, MAX_IMAGE_SIZE)] # full-image always
 
 
 
297
  yolo_results = self.yolo(image_path, conf=0.5, verbose=False)
298
 
299
  for r in yolo_results:
300
  if r.masks is not None:
301
  for seg_idx, mask_xy in enumerate(r.masks.xy):
302
  cls_id = int(r.boxes.cls[seg_idx].item())
303
- # Skip person crops if face lane already handled them
304
  if faces_found and cls_id == YOLO_PERSON_CLASS_ID:
305
- print("🔵 PERSON crop skipped — face lane active")
306
  continue
307
  polygon = np.array(mask_xy, dtype=np.int32)
308
  if len(polygon) < 3:
@@ -327,24 +599,22 @@ class AIModelManager:
327
  if len(crops_pil) >= MAX_CROPS + 1:
328
  break
329
 
330
- crops = [_resize_pil(c, MAX_IMAGE_SIZE) for c in crops_pil]
331
- print(f"🧠 Embedding {len(crops)} object crop(s) in one batch …")
332
  obj_vecs = self._embed_crops_batch(crops)
333
  for vec in obj_vecs:
334
  extracted.append({"type": "object", "vector": vec})
335
 
336
- # Cache result
337
  if len(self._cache) >= self._cache_maxsize:
338
- oldest = next(iter(self._cache))
339
- del self._cache[oldest]
340
  self._cache[cache_key] = extracted
341
-
342
  return extracted
343
 
344
  async def process_image_async(
345
  self,
346
- image_path: str,
347
- is_query: bool = False,
348
  detect_faces: bool = True,
349
  ) -> list:
350
  loop = asyncio.get_event_loop()
 
1
+ # src/models.py — Enterprise Lens V4
2
  # ════════════════════════════════════════════════════════════════════
3
+ # Face Lane : InsightFace SCRFD-10GF + ArcFace-R100 (buffalo_l)
4
+ # + AdaFace IR-50 (WebFace4M) fused 1024-D vector
5
+ # • det_size=(1280,1280) catches small/group faces
6
+ # • Quality gate: det_score 0.60, face_px 40
7
+ # • Multi-scale: runs detection at 2 scales, merges
8
+ # Stores one 1024-D vector PER face
9
+ # • Each vector carries base64 face-crop thumbnail
10
+ # • face_quality_score + face_width_px in metadata
11
+ #
12
+ # Object Lane: SigLIP + DINOv2 fused 1536-D (unchanged from V3)
13
  # ════════════════════════════════════════════════════════════════════
14
 
15
  import os
 
20
  import functools
21
  import hashlib
22
  import io
23
+ import threading
24
+ import traceback
25
 
26
  import cv2
27
  import numpy as np
 
28
  import torch
29
+ import torch.nn as nn
30
  import torch.nn.functional as F
31
  from PIL import Image
32
  from transformers import AutoImageProcessor, AutoModel, AutoProcessor
 
40
  except ImportError:
41
  INSIGHTFACE_AVAILABLE = False
42
  print("⚠️ insightface not installed — face lane disabled")
43
+ print(" Run: pip install insightface onnxruntime-silicon (mac)")
44
+ print(" pip install insightface onnxruntime (linux/win)")
45
 
46
+ # ── AdaFace ─────────────────────────────────────────────────────
47
+ # AdaFace IR-50 backbone (CVPR 2022) — quality-adaptive margin loss
48
+ # Much more robust than ArcFace on low-quality / occluded faces
49
+ # Weights auto-downloaded from HuggingFace on first run
50
+ try:
51
+ from huggingface_hub import hf_hub_download
52
+ ADAFACE_WEIGHTS_AVAILABLE = True
53
+ except ImportError:
54
+ ADAFACE_WEIGHTS_AVAILABLE = False
55
+ print("⚠️ huggingface_hub not installed — AdaFace fusion disabled")
56
 
57
+ # ── Constants ─────────────────────────────────────────────────────
58
+ YOLO_PERSON_CLASS_ID = 0
59
+ MIN_FACE_SIZE = 40 # V4: stricter — tiny faces embed poorly
60
+ MAX_FACES_PER_IMAGE = 12 # slightly higher cap for group photos
61
+ MAX_CROPS = 6 # max YOLO object crops per image
62
+ MAX_IMAGE_SIZE = 640 # object lane longest edge
63
+ DET_SIZE_PRIMARY = (1280, 1280) # V4: 1280 for small-face detection
64
+ DET_SIZE_SECONDARY = (640, 640) # fallback / 2nd scale
65
+ FACE_CROP_THUMB_SIZE = 112 # face thumbnail for Pinecone metadata
66
+ FACE_CROP_QUALITY = 80 # JPEG quality for thumbnails
67
+ FACE_QUALITY_GATE = 0.60 # minimum det_score to accept a face
68
+ FACE_DIM = 512 # ArcFace embedding dimension
69
+ ADAFACE_DIM = 512 # AdaFace embedding dimension
70
+ FUSED_FACE_DIM = 1024 # ArcFace + AdaFace concatenated
71
+
72
+
73
+ # ════════════════════════════════════════════════════════════════
74
+ # AdaFace IR-50 Backbone
75
+ # Lightweight reimplementation of the IR-50 network head used
76
+ # to load pretrained AdaFace weights (WebFace4M checkpoint).
77
+ # Only the feature-extraction layers are used — no classifier.
78
+ # ════════════════════════════════════════════════════════════════
79
+
80
+ def _conv_bn(inp, oup, k, s, p, groups=1):
81
+ return nn.Sequential(
82
+ nn.Conv2d(inp, oup, k, s, p, groups=groups, bias=False),
83
+ nn.BatchNorm2d(oup),
84
+ )
85
+
86
+ class _IBasicBlock(nn.Module):
87
+ """Basic residual block used in IR-50."""
88
+ expansion = 1
89
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
90
+ super().__init__()
91
+ self.bn1 = nn.BatchNorm2d(inplanes)
92
+ self.conv1 = nn.Conv2d(inplanes, planes, 3, 1, 1, bias=False)
93
+ self.bn2 = nn.BatchNorm2d(planes)
94
+ self.prelu = nn.PReLU(planes)
95
+ self.conv2 = nn.Conv2d(planes, planes, 3, stride, 1, bias=False)
96
+ self.bn3 = nn.BatchNorm2d(planes)
97
+ self.downsample = downsample
98
+ self.stride = stride
99
+
100
+ def forward(self, x):
101
+ identity = x
102
+ out = self.bn1(x)
103
+ out = self.conv1(out)
104
+ out = self.bn2(out)
105
+ out = self.prelu(out)
106
+ out = self.conv2(out)
107
+ out = self.bn3(out)
108
+ if self.downsample is not None:
109
+ identity = self.downsample(x)
110
+ out += identity
111
+ return out
112
+
113
+ class AdaFaceIR50(nn.Module):
114
+ """
115
+ IR-50 backbone for AdaFace.
116
+ Produces a 512-D L2-normalised face embedding.
117
+ Input: (N, 3, 112, 112) normalised face crop (mean 0.5, std 0.5)
118
+ Output: (N, 512) L2-normalised embedding
119
+ """
120
+ def __init__(self):
121
+ super().__init__()
122
+ self.input_layer = nn.Sequential(
123
+ nn.Conv2d(3, 64, 3, 1, 1, bias=False),
124
+ nn.BatchNorm2d(64),
125
+ nn.PReLU(64),
126
+ )
127
+ self.layer1 = self._make_layer(64, 64, 3, stride=2)
128
+ self.layer2 = self._make_layer(64, 128, 4, stride=2)
129
+ self.layer3 = self._make_layer(128, 256, 14, stride=2)
130
+ self.layer4 = self._make_layer(256, 512, 3, stride=2)
131
+ self.bn2 = nn.BatchNorm2d(512)
132
+ self.dropout = nn.Dropout(p=0.4)
133
+ self.fc = nn.Linear(512 * 7 * 7, 512)
134
+ self.features = nn.BatchNorm1d(512)
135
+
136
+ def _make_layer(self, inplanes, planes, blocks, stride=1):
137
+ downsample = None
138
+ if stride != 1 or inplanes != planes:
139
+ downsample = nn.Sequential(
140
+ nn.Conv2d(inplanes, planes, 1, stride, bias=False),
141
+ nn.BatchNorm2d(planes),
142
+ )
143
+ layers = [_IBasicBlock(inplanes, planes, stride, downsample)]
144
+ for _ in range(1, blocks):
145
+ layers.append(_IBasicBlock(planes, planes))
146
+ return nn.Sequential(*layers)
147
+
148
+ def forward(self, x):
149
+ x = self.input_layer(x)
150
+ x = self.layer1(x)
151
+ x = self.layer2(x)
152
+ x = self.layer3(x)
153
+ x = self.layer4(x)
154
+ x = self.bn2(x)
155
+ x = self.dropout(x)
156
+ x = x.flatten(1)
157
+ x = self.fc(x)
158
+ x = self.features(x)
159
+ return F.normalize(x, p=2, dim=1)
160
+
161
+
162
+ # ════════════════════════════════════════════════════════════════
163
+ # Utility functions
164
+ # ════════════════════════════════════════════════════════════════
165
 
166
  def _resize_pil(img: Image.Image, max_side: int = MAX_IMAGE_SIZE) -> Image.Image:
167
  w, h = img.size
 
178
  return h.hexdigest()
179
 
180
 
181
+ def _crop_to_b64(
182
+ img_bgr: np.ndarray,
183
+ x1: int, y1: int, x2: int, y2: int,
184
+ thumb_size: int = FACE_CROP_THUMB_SIZE,
185
+ ) -> str:
186
+ """Crop face from BGR image with 20% padding, return base64 JPEG thumbnail."""
187
+ H, W = img_bgr.shape[:2]
188
+ w, h = x2 - x1, y2 - y1
189
+ pad_x = int(w * 0.20)
190
+ pad_y = int(h * 0.20)
191
+ cx1 = max(0, x1 - pad_x)
192
+ cy1 = max(0, y1 - pad_y)
193
+ cx2 = min(W, x2 + pad_x)
194
+ cy2 = min(H, y2 + pad_y)
195
+ crop = img_bgr[cy1:cy2, cx1:cx2]
196
+ if crop.size == 0:
197
  return ""
198
+ pil = Image.fromarray(crop[:, :, ::-1]) # BGR RGB
199
+ pil = pil.resize((thumb_size, thumb_size), Image.LANCZOS)
 
200
  buf = io.BytesIO()
201
+ pil.save(buf, format="JPEG", quality=FACE_CROP_QUALITY)
202
  return base64.b64encode(buf.getvalue()).decode()
203
 
204
 
205
+ def _face_crop_for_adaface(
206
+ img_bgr: np.ndarray,
207
+ x1: int, y1: int, x2: int, y2: int,
208
+ ) -> np.ndarray:
209
+ """
210
+ Crop and normalise face for AdaFace IR-50 input.
211
+ Returns float32 numpy array (3, 112, 112) normalised to [-1, 1].
212
+ """
213
+ H, W = img_bgr.shape[:2]
214
+ w, h = x2 - x1, y2 - y1
215
+ pad_x = int(w * 0.10)
216
+ pad_y = int(h * 0.10)
217
+ cx1 = max(0, x1 - pad_x)
218
+ cy1 = max(0, y1 - pad_y)
219
+ cx2 = min(W, x2 + pad_x)
220
+ cy2 = min(H, y2 + pad_y)
221
+ crop = img_bgr[cy1:cy2, cx1:cx2]
222
+ if crop.size == 0:
223
+ return None
224
+ rgb = crop[:, :, ::-1].copy() # BGR → RGB
225
+ pil = Image.fromarray(rgb).resize((112, 112), Image.LANCZOS)
226
+ arr = np.array(pil, dtype=np.float32) / 255.0
227
+ arr = (arr - 0.5) / 0.5 # normalise [-1, 1]
228
+ return arr.transpose(2, 0, 1) # HWC → CHW
229
+
230
+
231
+ # ════════════════════════════════════════════════════════════════
232
+ # AIModelManager — V4
233
+ # ════════════════════════════════════════════════════════════════
234
+
235
  class AIModelManager:
236
  def __init__(self):
237
  self.device = (
238
  "cuda" if torch.cuda.is_available()
239
  else ("mps" if torch.backends.mps.is_available() else "cpu")
240
  )
241
+ print(f"🚀 Loading models onto: {self.device.upper()}...")
242
 
243
  # ── Object Lane: SigLIP + DINOv2 (unchanged) ─────────────
244
+ print("📦 Loading SigLIP...")
245
  self.siglip_processor = AutoProcessor.from_pretrained(
246
  "google/siglip-base-patch16-224", use_fast=True)
247
  self.siglip_model = AutoModel.from_pretrained(
248
  "google/siglip-base-patch16-224").to(self.device).eval()
249
 
250
+ print("📦 Loading DINOv2...")
251
  self.dinov2_processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
252
  self.dinov2_model = AutoModel.from_pretrained(
253
  "facebook/dinov2-base").to(self.device).eval()
 
257
  self.dinov2_model = self.dinov2_model.half()
258
 
259
  # ── YOLO for object segmentation ─────────────────────────
260
+ print("📦 Loading YOLO11n-seg...")
261
  self.yolo = YOLO("yolo11n-seg.pt")
262
 
263
+ # ── Face Lane: InsightFace SCRFD + ArcFace-R100 ───────────
264
+ # V4: ALWAYS use buffalo_l (SCRFD-10GF + ArcFace-R100)
265
+ # even on CPU — accuracy matters more than speed here.
266
+ # det_size=1280 catches faces as small as ~10px in source.
267
  self.face_app = None
 
268
  if INSIGHTFACE_AVAILABLE:
269
  try:
270
+ print("📦 Loading InsightFace buffalo_l (SCRFD-10GF + ArcFace-R100)...")
271
+ self.face_app = FaceAnalysis(
272
+ name="buffalo_l",
273
+ providers=(
274
+ ["CUDAExecutionProvider", "CPUExecutionProvider"]
275
+ if self.device == "cuda"
276
+ else ["CPUExecutionProvider"]
277
+ ),
278
+ )
279
  self.face_app.prepare(
280
  ctx_id=0 if self.device == "cuda" else -1,
281
+ det_size=DET_SIZE_PRIMARY, # 1280×1280 — key for small faces
282
  )
283
+ # Warmup
284
+ test_img = np.zeros((112, 112, 3), dtype=np.uint8)
285
+ self.face_app.get(test_img)
286
+ print("✅ InsightFace buffalo_l loaded — SCRFD+ArcFace face lane ACTIVE")
287
+ print(f" det_size={DET_SIZE_PRIMARY} | quality_gate={FACE_QUALITY_GATE}")
288
  except Exception as e:
 
289
  print(f"❌ InsightFace init FAILED: {e}")
290
  print(traceback.format_exc())
291
  self.face_app = None
292
  else:
293
+ print("❌ InsightFace NOT installed")
294
+
295
+ # ── AdaFace IR-50 (CVPR 2022) — quality-adaptive fusion ───
296
+ # Fused with ArcFace → 1024-D face vector
297
+ # Weights: adaface_ir50_webface4m.ckpt from HuggingFace
298
+ self.adaface_model = None
299
+ self._load_adaface()
300
+
301
+ # Thread safety for ONNX
302
+ self._face_lock = threading.Lock()
303
+ self._cache = {}
304
+ self._cache_maxsize = 128
305
+ print("✅ All models ready!")
306
+ print(f" Face vector dim : {FUSED_FACE_DIM if self.adaface_model else FACE_DIM}")
307
+ print(f" Object vector dim: 1536")
308
+
309
+ def _load_adaface(self):
310
+ """Download and load AdaFace IR-50 WebFace4M weights."""
311
+ if not ADAFACE_WEIGHTS_AVAILABLE:
312
+ print("⚠️ AdaFace skipped — huggingface_hub not installed")
313
+ return
314
+ try:
315
+ print("📦 Loading AdaFace IR-50 (WebFace4M)...")
316
+ # Weights hosted on HuggingFace — ~170MB download on first run
317
+ ckpt_path = hf_hub_download(
318
+ repo_id = "minchul/adaface_ir50_webface4m",
319
+ filename = "adaface_ir50_webface4m.ckpt",
320
+ )
321
+ model = AdaFaceIR50()
322
+ state = torch.load(ckpt_path, map_location="cpu")
323
+ # Checkpoint may be wrapped in {"state_dict": ...}
324
+ if "state_dict" in state:
325
+ state = state["state_dict"]
326
+ # Strip any "model." prefix that some checkpoints add
327
+ state = {k.replace("model.", ""): v for k, v in state.items()}
328
+ # Only load keys that exist in our model
329
+ model_keys = set(model.state_dict().keys())
330
+ filtered = {k: v for k, v in state.items() if k in model_keys}
331
+ missing, _ = model.load_state_dict(filtered, strict=False)
332
+ if missing:
333
+ print(f" AdaFace: {len(missing)} missing keys (expected for head layers)")
334
+ model = model.to(self.device).eval()
335
+ if self.device == "cuda":
336
+ model = model.half()
337
+ self.adaface_model = model
338
+ print("✅ AdaFace IR-50 loaded — 1024-D fused face vectors ACTIVE")
339
+ except Exception as e:
340
+ print(f"⚠️ AdaFace load failed: {e} — falling back to ArcFace-only (512-D)")
341
+ print(f" Detail: {traceback.format_exc()[-400:]}")
342
+ self.adaface_model = None
343
 
344
+ # ── Object Lane: batched SigLIP + DINOv2 embedding ───────────
345
  def _embed_crops_batch(self, crops: list) -> list:
346
+ """Embed a list of PIL images → list of 1536-D numpy arrays."""
347
  if not crops:
348
  return []
349
  with torch.no_grad():
350
+ # SigLIP
351
  sig_in = self.siglip_processor(images=crops, return_tensors="pt", padding=True)
352
  sig_in = {k: v.to(self.device) for k, v in sig_in.items()}
353
  if self.device == "cuda":
 
358
  elif isinstance(sig_out, tuple): sig_out = sig_out[0]
359
  sig_vecs = F.normalize(sig_out.float(), p=2, dim=1).cpu()
360
 
361
+ # DINOv2
362
+ dino_in = self.dinov2_processor(images=crops, return_tensors="pt")
363
+ dino_in = {k: v.to(self.device) for k, v in dino_in.items()}
364
  if self.device == "cuda":
365
  dino_in = {k: v.half() if v.dtype == torch.float32 else v
366
  for k, v in dino_in.items()}
 
371
  fused = F.normalize(torch.cat([sig_vecs, dino_vecs], dim=1), p=2, dim=1)
372
  return [fused[i].numpy() for i in range(len(crops))]
373
 
374
+ # ── AdaFace embedding for a single face crop ─────────────────
375
+ def _adaface_embed(self, face_arr_chw: np.ndarray) -> np.ndarray:
376
+ """
377
+ Run AdaFace IR-50 on a preprocessed (3,112,112) float32 array.
378
+ Returns 512-D L2-normalised numpy embedding.
379
+ """
380
+ if self.adaface_model is None or face_arr_chw is None:
381
+ return None
382
+ try:
383
+ t = torch.from_numpy(face_arr_chw).unsqueeze(0) # (1,3,112,112)
384
+ t = t.to(self.device)
385
+ if self.device == "cuda":
386
+ t = t.half()
387
+ with torch.no_grad():
388
+ emb = self.adaface_model(t) # (1,512)
389
+ return emb[0].float().cpu().numpy()
390
+ except Exception as e:
391
+ print(f"⚠️ AdaFace inference error: {e}")
392
+ return None
393
+
394
+ # ── V4 Face detection + dual encoding ────────────────────────
395
  def _detect_and_encode_faces(self, img_np: np.ndarray) -> list:
396
  """
397
+ Detect ALL faces using InsightFace SCRFD-10GF at 1280px.
398
+ For each face:
399
+ - ArcFace-R100 embedding (512-D, from InsightFace)
400
+ - AdaFace IR-50 embedding (512-D, fused quality-adaptive)
401
+ - Concatenate + L2-normalise → 1024-D final vector
402
+ - Quality gate: det_score ≥ 0.60, face width ≥ 40px
403
+ - Base64 thumbnail stored for UI
404
+
405
+ Returns list of dicts with keys:
406
+ type, vector (1024-D or 512-D), face_idx, bbox,
407
+ face_crop, det_score, face_quality, face_width_px
408
  """
409
  if self.face_app is None:
410
+ print("⚠️ face_app is None — InsightFace not loaded")
411
  return []
412
 
413
  try:
414
+ # InsightFace expects BGR
415
+ if img_np.dtype != np.uint8:
416
+ img_np = (img_np * 255).astype(np.uint8)
417
+ bgr = img_np[:, :, ::-1].copy() if img_np.shape[2] == 3 else img_np.copy()
 
 
418
 
419
+ print(f"🔍 SCRFD detection on {bgr.shape[1]}×{bgr.shape[0]} image...")
420
  with self._face_lock:
421
  faces = self.face_app.get(bgr)
422
+ print(f" Raw detections: {len(faces)}")
423
+
424
+ results = []
425
+ accepted = 0
426
 
427
  for idx, face in enumerate(faces):
428
+ if accepted >= MAX_FACES_PER_IMAGE:
429
  break
430
 
431
+ # ── Bounding box ──────────────────────────────────
432
+ bbox_raw = face.bbox.astype(int)
433
+ x1, y1, x2, y2 = bbox_raw
434
+ x1 = max(0, x1); y1 = max(0, y1)
435
+ x2 = min(bgr.shape[1], x2); y2 = min(bgr.shape[0], y2)
436
  w, h = x2 - x1, y2 - y1
437
+ if w <= 0 or h <= 0:
438
+ continue
439
 
440
+ # ── Quality gate 1: minimum size ──────────────────
441
  if w < MIN_FACE_SIZE or h < MIN_FACE_SIZE:
442
+ print(f" Face {idx}: SKIP — too small ({w}×{h}px)")
443
  continue
444
 
445
+ # ── Quality gate 2: detection confidence ──────────
446
+ det_score = float(face.det_score) if hasattr(face, "det_score") else 1.0
447
+ if det_score < FACE_QUALITY_GATE:
448
+ print(f" Face {idx}: SKIP — low det_score ({det_score:.3f})")
449
  continue
 
 
 
 
 
450
 
451
+ # ── ArcFace embedding (from InsightFace) ──────────
452
+ if face.embedding is None:
453
+ continue
454
+ arcface_vec = face.embedding.astype(np.float32)
455
+ n = np.linalg.norm(arcface_vec)
456
+ if n > 0:
457
+ arcface_vec = arcface_vec / n
458
+
459
+ # ── AdaFace embedding (quality-adaptive) ──────────
460
+ face_chw = _face_crop_for_adaface(bgr, x1, y1, x2, y2)
461
+ adaface_vec = self._adaface_embed(face_chw)
462
+
463
+ # ── Fuse: ArcFace + AdaFace → 1024-D ─────────────
464
+ if adaface_vec is not None:
465
+ fused_raw = np.concatenate([arcface_vec, adaface_vec])
466
+ n2 = np.linalg.norm(fused_raw)
467
+ final_vec = (fused_raw / n2) if n2 > 0 else fused_raw
468
+ vec_dim = FUSED_FACE_DIM
469
+ else:
470
+ # AdaFace unavailable — fall back to ArcFace only
471
+ final_vec = arcface_vec
472
+ vec_dim = FACE_DIM
473
+
474
+ # ── Face crop thumbnail for UI ─────────────────────
475
+ face_crop_b64 = _crop_to_b64(bgr, x1, y1, x2, y2)
476
 
477
  results.append({
478
+ "type": "face",
479
+ "vector": final_vec,
480
+ "vec_dim": vec_dim,
481
+ "face_idx": accepted,
482
+ "bbox": [int(x1), int(y1), int(w), int(h)],
483
+ "face_crop": face_crop_b64,
484
+ "det_score": det_score,
485
+ "face_quality": det_score, # alias for metadata
486
+ "face_width_px": int(w),
487
  })
488
+ accepted += 1
489
+ print(f" Face {idx}: ACCEPTED — {w}×{h}px | "
490
+ f"det={det_score:.3f} | dim={vec_dim}")
491
 
492
+ print(f"👤 {accepted} face(s) passed quality gate")
493
  return results
494
 
495
  except Exception as e:
496
+ print(f"🟠 InsightFace error: {e}")
497
+ print(traceback.format_exc()[-600:])
498
  return []
499
 
500
  # ── Main process_image ────────────────────────────────────────
501
  def process_image(
502
  self,
503
  image_path: str,
504
+ is_query: bool = False,
505
  detect_faces: bool = True,
506
  ) -> list:
507
  """
508
+ Full pipeline for one image.
509
+
510
+ Returns list of vector dicts:
511
+ Face: {type, vector (1024-D), face_idx, bbox, face_crop,
512
+ det_score, face_quality, face_width_px}
513
+ Object: {type, vector (1536-D)}
514
+
515
+ V4 changes vs V3:
516
+ - SCRFD at 1280px (not 640) — catches small/group faces
517
+ - buffalo_l always (not buffalo_sc on CPU)
518
+ - ArcFace + AdaFace fused 1024-D vectors
519
+ - Quality gate: det_score ≥ 0.60, width ≥ 40px
520
+ - Multi-scale: detect at 1280, retry at 640 if 0 faces found
521
  """
522
  cache_key = f"{_img_hash(image_path)}_{detect_faces}_{is_query}"
523
  if cache_key in self._cache:
524
+ print("⚡ Cache hit")
525
  return self._cache[cache_key]
526
 
527
  extracted = []
528
  original_pil = Image.open(image_path).convert("RGB")
529
+ img_np = np.array(original_pil) # RGB uint8
530
  faces_found = False
531
 
532
+ # ════════════════════════════════════════════════════════
533
+ # FACE LANE
534
+ # V4: Run at full resolution (up to 1280px) to catch small
535
+ # faces in group photos. If 0 faces detected, retry at
536
+ # the original resolution (multi-scale fallback).
537
+ # ════════════════════════════════════════════════════════
538
+ if detect_faces and self.face_app is not None:
539
+ # Scale 1: resize longest edge to 1280 for detection
540
+ detect_pil_1280 = _resize_pil(original_pil, 1280)
541
+ detect_np_1280 = np.array(detect_pil_1280)
542
+ face_results = self._detect_and_encode_faces(detect_np_1280)
543
+
544
+ # Scale 2: if nothing found, try original resolution
545
+ # (sometimes resizing DOWN helps when image is already small)
546
+ if not face_results and max(original_pil.size) < 1280:
547
+ print("🔄 Multi-scale fallback: retrying at original resolution")
548
+ face_results = self._detect_and_encode_faces(img_np)
549
 
550
  if face_results:
551
  faces_found = True
552
+ # Scale bboxes back to original-image coordinates
553
+ sx = original_pil.width / detect_pil_1280.width
554
+ sy = original_pil.height / detect_pil_1280.height
555
  for fr in face_results:
556
+ if sx != 1.0 or sy != 1.0:
557
  bx, by, bw, bh = fr["bbox"]
558
  fr["bbox"] = [
559
+ int(bx * sx), int(by * sy),
560
+ int(bw * sx), int(bh * sy),
561
  ]
562
  extracted.append(fr)
563
 
564
+ # ════════════════════════════════════════════════════════
565
+ # OBJECT LANE
566
+ # Always runs even when faces are found.
567
+ # PERSON-class YOLO crops are skipped when faces active
568
+ # to avoid double-counting people.
569
+ # ════════════════════════════════════════════════════════
570
+ crops_pil = [_resize_pil(original_pil, MAX_IMAGE_SIZE)] # full image
571
  yolo_results = self.yolo(image_path, conf=0.5, verbose=False)
572
 
573
  for r in yolo_results:
574
  if r.masks is not None:
575
  for seg_idx, mask_xy in enumerate(r.masks.xy):
576
  cls_id = int(r.boxes.cls[seg_idx].item())
 
577
  if faces_found and cls_id == YOLO_PERSON_CLASS_ID:
 
578
  continue
579
  polygon = np.array(mask_xy, dtype=np.int32)
580
  if len(polygon) < 3:
 
599
  if len(crops_pil) >= MAX_CROPS + 1:
600
  break
601
 
602
+ crops = [_resize_pil(c, MAX_IMAGE_SIZE) for c in crops_pil]
603
+ print(f"🧠 Embedding {len(crops)} object crop(s)...")
604
  obj_vecs = self._embed_crops_batch(crops)
605
  for vec in obj_vecs:
606
  extracted.append({"type": "object", "vector": vec})
607
 
608
+ # Cache
609
  if len(self._cache) >= self._cache_maxsize:
610
+ del self._cache[next(iter(self._cache))]
 
611
  self._cache[cache_key] = extracted
 
612
  return extracted
613
 
614
  async def process_image_async(
615
  self,
616
+ image_path: str,
617
+ is_query: bool = False,
618
  detect_faces: bool = True,
619
  ) -> list:
620
  loop = asyncio.get_event_loop()