Spaces:
Sleeping
Sleeping
refactor: new search and retrival statergy
Browse files- main.py +193 -110
- src/models.py +410 -140
main.py
CHANGED
|
@@ -48,6 +48,20 @@ IDX_FACES = "enterprise-faces"
|
|
| 48 |
MAX_FILES_PER_UPLOAD = 20 # cap to prevent memory corruption on large batches
|
| 49 |
IDX_OBJECTS = "enterprise-objects"
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
# ════════════════════════════════════════════════════════════════
|
| 52 |
# SUPABASE LOGGING — async, fire-and-forget, never crashes API
|
| 53 |
# HF Space Secrets needed:
|
|
@@ -244,11 +258,13 @@ async def verify_keys(
|
|
| 244 |
existing = {idx.name for idx in await asyncio.to_thread(pc.list_indexes)}
|
| 245 |
tasks = []
|
| 246 |
if IDX_OBJECTS not in existing:
|
| 247 |
-
tasks.append(asyncio.to_thread(pc.create_index, name=IDX_OBJECTS,
|
|
|
|
| 248 |
metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")))
|
| 249 |
indexes_created.append(IDX_OBJECTS)
|
| 250 |
if IDX_FACES not in existing:
|
| 251 |
-
tasks.append(asyncio.to_thread(pc.create_index, name=IDX_FACES,
|
|
|
|
| 252 |
metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")))
|
| 253 |
indexes_created.append(IDX_FACES)
|
| 254 |
if tasks: await asyncio.gather(*tasks)
|
|
@@ -334,25 +350,25 @@ async def upload_new_images(
|
|
| 334 |
for v in vectors:
|
| 335 |
vec_list = v["vector"].tolist() if hasattr(v["vector"], "tolist") else v["vector"]
|
| 336 |
if v["type"] == "face":
|
| 337 |
-
# ── FACE STORE: ArcFace
|
| 338 |
-
#
|
| 339 |
-
# retrieve the full image after a face match
|
| 340 |
face_upserts.append({
|
| 341 |
"id": str(uuid.uuid4()),
|
| 342 |
"values": vec_list,
|
| 343 |
"metadata": {
|
| 344 |
-
"image_url":
|
| 345 |
-
"url":
|
| 346 |
-
"folder":
|
| 347 |
-
"face_idx":
|
| 348 |
-
"bbox":
|
| 349 |
-
"face_crop":
|
| 350 |
-
"det_score":
|
|
|
|
|
|
|
| 351 |
}
|
| 352 |
})
|
| 353 |
else:
|
| 354 |
# ── OBJECT STORE: SigLIP+DINOv2 1536-D fused embedding
|
| 355 |
-
# Always stores full image — includes all crops + full image
|
| 356 |
object_upserts.append({
|
| 357 |
"id": str(uuid.uuid4()),
|
| 358 |
"values": vec_list,
|
|
@@ -450,59 +466,75 @@ async def search_database(
|
|
| 450 |
idx_obj = pc.Index(IDX_OBJECTS)
|
| 451 |
idx_face = pc.Index(IDX_FACES)
|
| 452 |
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 456 |
try:
|
| 457 |
-
res = await asyncio.to_thread(
|
|
|
|
| 458 |
except Exception as e:
|
| 459 |
if "404" in str(e):
|
| 460 |
-
raise HTTPException(404, "Pinecone Index not found.
|
| 461 |
raise e
|
| 462 |
out = []
|
| 463 |
for match in res.get("matches", []):
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
ui_score = score
|
| 472 |
-
out.append({"url": match["metadata"].get("url") or match["metadata"].get("image_url", ""),
|
| 473 |
-
"score": round(ui_score, 4),
|
| 474 |
-
"caption": "👤 Verified Identity" if is_face else match["metadata"].get("folder", "🎯 Object Match")})
|
| 475 |
return out
|
| 476 |
|
| 477 |
-
# ── V3: separate face vectors from object vectors ────────
|
| 478 |
-
face_vectors = [v for v in vectors if v["type"] == "face"]
|
| 479 |
-
object_vectors = [v for v in vectors if v["type"] == "object"]
|
| 480 |
-
|
| 481 |
if detect_faces and face_vectors:
|
| 482 |
-
# ════════════════════════════════════════════════════
|
| 483 |
-
# FACE MODE —
|
| 484 |
-
#
|
| 485 |
-
# Step 1: Query enterprise-FACES (512-D ArcFace)
|
| 486 |
-
# → find which images contain a matching face
|
| 487 |
-
# → get image_urls of those matched images
|
| 488 |
#
|
| 489 |
-
#
|
| 490 |
-
#
|
| 491 |
-
#
|
| 492 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
#
|
| 494 |
-
#
|
| 495 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
|
| 497 |
-
async def _query_face_group(face_vec: dict) -> dict:
|
| 498 |
-
vec_list = face_vec["vector"].tolist() if hasattr(face_vec["vector"], "tolist") else face_vec["vector"]
|
| 499 |
-
|
| 500 |
-
# ── STEP 1: Search enterprise-FACES index ────────
|
| 501 |
try:
|
| 502 |
face_res = await asyncio.to_thread(
|
| 503 |
idx_face.query,
|
| 504 |
vector=vec_list,
|
| 505 |
-
top_k=
|
| 506 |
include_metadata=True,
|
| 507 |
)
|
| 508 |
except Exception as e:
|
|
@@ -510,86 +542,134 @@ async def search_database(
|
|
| 510 |
raise HTTPException(404, "Pinecone index not found. Go to Settings → Verify & Save.")
|
| 511 |
raise e
|
| 512 |
|
| 513 |
-
# Collect
|
| 514 |
-
#
|
| 515 |
-
|
|
|
|
| 516 |
for match in face_res.get("matches", []):
|
| 517 |
-
|
| 518 |
-
if
|
| 519 |
continue
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
)
|
| 524 |
-
if not image_url_match:
|
| 525 |
continue
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
"
|
| 530 |
-
"
|
| 531 |
-
"
|
|
|
|
| 532 |
}
|
| 533 |
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
}
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
# This gives us the complete original image for display
|
| 545 |
-
matched_urls = list(face_matched.keys())
|
| 546 |
-
|
| 547 |
-
# Build results using face scores but returning full images
|
| 548 |
-
matches = []
|
| 549 |
-
for image_url_match, face_data in face_matched.items():
|
| 550 |
-
raw_score = face_data["raw_score"]
|
| 551 |
-
|
| 552 |
-
# Remap ArcFace cosine (0.35–1.0) → UI percentage (75%–99%)
|
| 553 |
-
ui_score = 0.75 + ((raw_score - 0.35) / (1.0 - 0.35)) * 0.24
|
| 554 |
-
ui_score = min(0.99, ui_score)
|
| 555 |
-
|
| 556 |
-
matches.append({
|
| 557 |
-
"url": image_url_match, # full original image URL
|
| 558 |
-
"score": round(ui_score, 4),
|
| 559 |
-
"raw_score": round(raw_score, 4),
|
| 560 |
-
"face_crop": face_data["face_crop"], # matched face thumbnail
|
| 561 |
-
"folder": face_data["folder"],
|
| 562 |
-
"caption": "👤 Verified Identity",
|
| 563 |
-
})
|
| 564 |
|
| 565 |
return {
|
| 566 |
"query_face_idx": face_vec.get("face_idx", 0),
|
| 567 |
"query_face_crop": face_vec.get("face_crop", ""),
|
| 568 |
-
"det_score":
|
| 569 |
-
"
|
|
|
|
|
|
|
| 570 |
}
|
| 571 |
|
| 572 |
-
|
| 573 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
|
| 575 |
duration_ms = round((time.perf_counter() - start) * 1000)
|
| 576 |
-
total_matches =
|
| 577 |
log("INFO", "search.complete",
|
| 578 |
user_id=user_id or "anonymous", ip=ip, mode=mode,
|
| 579 |
lanes=["face"], detect_faces=detect_faces,
|
| 580 |
-
face_groups=len(face_groups),
|
| 581 |
-
|
|
|
|
| 582 |
duration_ms=duration_ms)
|
| 583 |
|
| 584 |
return {
|
| 585 |
-
"mode":
|
| 586 |
-
"face_groups":
|
| 587 |
-
"results":
|
| 588 |
}
|
| 589 |
|
| 590 |
else:
|
| 591 |
-
#
|
| 592 |
-
|
|
|
|
|
|
|
|
|
|
| 593 |
all_results = [r for sub in nested for r in sub]
|
| 594 |
seen = {}
|
| 595 |
for r in all_results:
|
|
@@ -602,7 +682,8 @@ async def search_database(
|
|
| 602 |
log("INFO", "search.complete",
|
| 603 |
user_id=user_id or "anonymous", ip=ip, mode=mode,
|
| 604 |
lanes=lanes_used, detect_faces=detect_faces,
|
| 605 |
-
results_count=len(final),
|
|
|
|
| 606 |
duration_ms=duration_ms)
|
| 607 |
|
| 608 |
return {"mode": "object", "results": final, "face_groups": []}
|
|
@@ -869,9 +950,11 @@ async def reset_database(
|
|
| 869 |
if tasks: await asyncio.gather(*tasks)
|
| 870 |
await asyncio.sleep(3) # wait for Pinecone to fully delete
|
| 871 |
await asyncio.gather(
|
| 872 |
-
asyncio.to_thread(pc.create_index, name=IDX_OBJECTS,
|
|
|
|
| 873 |
spec=ServerlessSpec(cloud="aws", region="us-east-1")),
|
| 874 |
-
asyncio.to_thread(pc.create_index, name=IDX_FACES,
|
|
|
|
| 875 |
spec=ServerlessSpec(cloud="aws", region="us-east-1")),
|
| 876 |
)
|
| 877 |
except Exception as e:
|
|
|
|
| 48 |
MAX_FILES_PER_UPLOAD = 20 # cap to prevent memory corruption on large batches
|
| 49 |
IDX_OBJECTS = "enterprise-objects"
|
| 50 |
|
| 51 |
+
# ── V4 index dimensions ───────────────────────────────────────────
|
| 52 |
+
# enterprise-faces : 1024-D (ArcFace 512 + AdaFace 512, fused)
|
| 53 |
+
# enterprise-objects: 1536-D (SigLIP 768 + DINOv2 768, fused)
|
| 54 |
+
# ⚠️ If upgrading from V3 (512-D faces), you MUST reset the
|
| 55 |
+
# enterprise-faces index via Settings → Danger Zone → Reset DB
|
| 56 |
+
IDX_FACES_DIM = int(os.getenv("IDX_FACES_DIM", "1024"))
|
| 57 |
+
IDX_OBJECTS_DIM = int(os.getenv("IDX_OBJECTS_DIM", "1536"))
|
| 58 |
+
|
| 59 |
+
# V4 face search thresholds
|
| 60 |
+
# Cosine similarity thresholds for the fused 1024-D ArcFace+AdaFace space
|
| 61 |
+
FACE_THRESHOLD_HIGH = 0.40 # high-quality faces (det_score ≥ 0.85)
|
| 62 |
+
FACE_THRESHOLD_LOW = 0.32 # lower-quality faces (det_score < 0.85)
|
| 63 |
+
FACE_TOP_K_FETCH = 50 # fetch more candidates, filter after merge
|
| 64 |
+
|
| 65 |
# ════════════════════════════════════════════════════════════════
|
| 66 |
# SUPABASE LOGGING — async, fire-and-forget, never crashes API
|
| 67 |
# HF Space Secrets needed:
|
|
|
|
| 258 |
existing = {idx.name for idx in await asyncio.to_thread(pc.list_indexes)}
|
| 259 |
tasks = []
|
| 260 |
if IDX_OBJECTS not in existing:
|
| 261 |
+
tasks.append(asyncio.to_thread(pc.create_index, name=IDX_OBJECTS,
|
| 262 |
+
dimension=IDX_OBJECTS_DIM, # 1536-D SigLIP+DINOv2
|
| 263 |
metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")))
|
| 264 |
indexes_created.append(IDX_OBJECTS)
|
| 265 |
if IDX_FACES not in existing:
|
| 266 |
+
tasks.append(asyncio.to_thread(pc.create_index, name=IDX_FACES,
|
| 267 |
+
dimension=IDX_FACES_DIM, # 1024-D ArcFace+AdaFace (V4)
|
| 268 |
metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")))
|
| 269 |
indexes_created.append(IDX_FACES)
|
| 270 |
if tasks: await asyncio.gather(*tasks)
|
|
|
|
| 350 |
for v in vectors:
|
| 351 |
vec_list = v["vector"].tolist() if hasattr(v["vector"], "tolist") else v["vector"]
|
| 352 |
if v["type"] == "face":
|
| 353 |
+
# ── FACE STORE: ArcFace+AdaFace 1024-D fused embedding
|
| 354 |
+
# V4: includes face_quality + face_width_px for retrieval scoring
|
|
|
|
| 355 |
face_upserts.append({
|
| 356 |
"id": str(uuid.uuid4()),
|
| 357 |
"values": vec_list,
|
| 358 |
"metadata": {
|
| 359 |
+
"image_url": image_url,
|
| 360 |
+
"url": image_url,
|
| 361 |
+
"folder": folder,
|
| 362 |
+
"face_idx": v.get("face_idx", 0),
|
| 363 |
+
"bbox": str(v.get("bbox", [])),
|
| 364 |
+
"face_crop": v.get("face_crop", ""),
|
| 365 |
+
"det_score": v.get("det_score", 1.0),
|
| 366 |
+
"face_quality": v.get("face_quality", v.get("det_score", 1.0)),
|
| 367 |
+
"face_width_px": v.get("face_width_px", 0),
|
| 368 |
}
|
| 369 |
})
|
| 370 |
else:
|
| 371 |
# ── OBJECT STORE: SigLIP+DINOv2 1536-D fused embedding
|
|
|
|
| 372 |
object_upserts.append({
|
| 373 |
"id": str(uuid.uuid4()),
|
| 374 |
"values": vec_list,
|
|
|
|
| 466 |
idx_obj = pc.Index(IDX_OBJECTS)
|
| 467 |
idx_face = pc.Index(IDX_FACES)
|
| 468 |
|
| 469 |
+
# ── V4: split vectors by type ────────────────────────────
|
| 470 |
+
face_vectors = [v for v in vectors if v["type"] == "face"]
|
| 471 |
+
object_vectors = [v for v in vectors if v["type"] == "object"]
|
| 472 |
+
|
| 473 |
+
# ════════════════════════════════════════════════════════
|
| 474 |
+
# OBJECT MODE helper
|
| 475 |
+
# Used when no faces detected or face search disabled.
|
| 476 |
+
# ════════════════════════════════════════════════════════
|
| 477 |
+
async def _query_object_one(vec_dict: dict):
|
| 478 |
+
vec_list = (vec_dict["vector"].tolist()
|
| 479 |
+
if hasattr(vec_dict["vector"], "tolist")
|
| 480 |
+
else vec_dict["vector"])
|
| 481 |
try:
|
| 482 |
+
res = await asyncio.to_thread(
|
| 483 |
+
idx_obj.query, vector=vec_list, top_k=10, include_metadata=True)
|
| 484 |
except Exception as e:
|
| 485 |
if "404" in str(e):
|
| 486 |
+
raise HTTPException(404, "Pinecone Index not found. Go to Settings → Verify & Save.")
|
| 487 |
raise e
|
| 488 |
out = []
|
| 489 |
for match in res.get("matches", []):
|
| 490 |
+
if match["score"] < 0.45:
|
| 491 |
+
continue
|
| 492 |
+
out.append({
|
| 493 |
+
"url": match["metadata"].get("url") or match["metadata"].get("image_url", ""),
|
| 494 |
+
"score": round(match["score"], 4),
|
| 495 |
+
"caption": match["metadata"].get("folder", "🎯 Visual Match"),
|
| 496 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
return out
|
| 498 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 499 |
if detect_faces and face_vectors:
|
| 500 |
+
# ════════════════════════════════════════════════════
|
| 501 |
+
# V4 FACE MODE — Multi-face merge retrieval
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
#
|
| 503 |
+
# For a group photo with N detected faces:
|
| 504 |
+
# 1. Query enterprise-faces for EACH face (top_k=50)
|
| 505 |
+
# 2. Build a global image_url → match_data map
|
| 506 |
+
# • An image is included if ANY face matches
|
| 507 |
+
# • Score = highest matching face score for that image
|
| 508 |
+
# • Track WHICH face indices matched each image
|
| 509 |
+
# 3. Group results PER query face (for UI display)
|
| 510 |
+
# 4. Also build a "cross-face" flat list:
|
| 511 |
+
# images that matched multiple faces rank higher
|
| 512 |
#
|
| 513 |
+
# Threshold logic:
|
| 514 |
+
# High-quality face (det_score ≥ 0.85) → threshold 0.40
|
| 515 |
+
# Lower-quality face → threshold 0.32
|
| 516 |
+
# (Fused 1024-D space has different cosine distribution
|
| 517 |
+
# than raw ArcFace 512-D — thresholds adjusted accordingly)
|
| 518 |
+
# ═══════════════��════════════════════════════════════
|
| 519 |
+
|
| 520 |
+
async def _query_single_face(face_vec: dict) -> dict:
|
| 521 |
+
"""
|
| 522 |
+
Query enterprise-faces for one detected face.
|
| 523 |
+
Returns per-face result group for UI + raw match map.
|
| 524 |
+
"""
|
| 525 |
+
vec_list = (face_vec["vector"].tolist()
|
| 526 |
+
if hasattr(face_vec["vector"], "tolist")
|
| 527 |
+
else face_vec["vector"])
|
| 528 |
+
|
| 529 |
+
# Adaptive threshold: high-quality → stricter
|
| 530 |
+
det_score = face_vec.get("det_score", 1.0)
|
| 531 |
+
threshold = FACE_THRESHOLD_HIGH if det_score >= 0.85 else FACE_THRESHOLD_LOW
|
| 532 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
try:
|
| 534 |
face_res = await asyncio.to_thread(
|
| 535 |
idx_face.query,
|
| 536 |
vector=vec_list,
|
| 537 |
+
top_k=FACE_TOP_K_FETCH,
|
| 538 |
include_metadata=True,
|
| 539 |
)
|
| 540 |
except Exception as e:
|
|
|
|
| 542 |
raise HTTPException(404, "Pinecone index not found. Go to Settings → Verify & Save.")
|
| 543 |
raise e
|
| 544 |
|
| 545 |
+
# Collect matches — keep BEST score per image_url
|
| 546 |
+
# (multiple face vectors stored per image during upload,
|
| 547 |
+
# we only want the best matching one per image)
|
| 548 |
+
image_map = {} # image_url → best match data
|
| 549 |
for match in face_res.get("matches", []):
|
| 550 |
+
raw = match["score"]
|
| 551 |
+
if raw < threshold:
|
| 552 |
continue
|
| 553 |
+
url = (match["metadata"].get("url") or
|
| 554 |
+
match["metadata"].get("image_url", ""))
|
| 555 |
+
if not url:
|
|
|
|
|
|
|
| 556 |
continue
|
| 557 |
+
if url not in image_map or raw > image_map[url]["raw_score"]:
|
| 558 |
+
image_map[url] = {
|
| 559 |
+
"raw_score": raw,
|
| 560 |
+
"face_crop": match["metadata"].get("face_crop", ""),
|
| 561 |
+
"folder": match["metadata"].get("folder", ""),
|
| 562 |
+
"face_quality": match["metadata"].get("face_quality", 1.0),
|
| 563 |
+
"face_width_px": match["metadata"].get("face_width_px", 0),
|
| 564 |
}
|
| 565 |
|
| 566 |
+
# Remap raw cosine → UI score (75%–99%)
|
| 567 |
+
# Range is now 0.32–1.0 (wider than old 0.35–1.0)
|
| 568 |
+
def _ui_score(raw: float) -> float:
|
| 569 |
+
lo, hi = FACE_THRESHOLD_LOW, 1.0
|
| 570 |
+
return round(min(0.99, 0.75 + ((raw - lo) / (hi - lo)) * 0.24), 4)
|
| 571 |
+
|
| 572 |
+
matches = [
|
| 573 |
+
{
|
| 574 |
+
"url": url,
|
| 575 |
+
"score": _ui_score(d["raw_score"]),
|
| 576 |
+
"raw_score": round(d["raw_score"], 4),
|
| 577 |
+
"face_crop": d["face_crop"],
|
| 578 |
+
"folder": d["folder"],
|
| 579 |
+
"caption": "👤 Verified Identity",
|
| 580 |
}
|
| 581 |
+
for url, d in image_map.items()
|
| 582 |
+
]
|
| 583 |
+
matches = sorted(matches, key=lambda x: x["score"], reverse=True)[:15]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
|
| 585 |
return {
|
| 586 |
"query_face_idx": face_vec.get("face_idx", 0),
|
| 587 |
"query_face_crop": face_vec.get("face_crop", ""),
|
| 588 |
+
"det_score": det_score,
|
| 589 |
+
"face_width_px": face_vec.get("face_width_px", 0),
|
| 590 |
+
"matches": matches,
|
| 591 |
+
"_image_map": image_map, # used for cross-face merge below
|
| 592 |
}
|
| 593 |
|
| 594 |
+
# Query all faces in parallel
|
| 595 |
+
raw_groups = await asyncio.gather(
|
| 596 |
+
*[_query_single_face(fv) for fv in face_vectors])
|
| 597 |
+
|
| 598 |
+
# ── Cross-face merge ────────────────────────────────
|
| 599 |
+
# Build a global image → {best_score, matched_face_indices}
|
| 600 |
+
# An image appearing for multiple faces gets a boost:
|
| 601 |
+
# final_score = best_face_score * (1 + 0.05 * extra_face_count)
|
| 602 |
+
# This makes images with multiple searched people rank higher.
|
| 603 |
+
global_image_map = {} # url → {score, matched_faces, face_crop, folder}
|
| 604 |
+
for gi, group in enumerate(raw_groups):
|
| 605 |
+
for url, d in group["_image_map"].items():
|
| 606 |
+
raw = d["raw_score"]
|
| 607 |
+
if url not in global_image_map:
|
| 608 |
+
global_image_map[url] = {
|
| 609 |
+
"raw_score": raw,
|
| 610 |
+
"face_crop": d["face_crop"],
|
| 611 |
+
"folder": d["folder"],
|
| 612 |
+
"matched_faces": [gi],
|
| 613 |
+
}
|
| 614 |
+
else:
|
| 615 |
+
existing = global_image_map[url]
|
| 616 |
+
existing["matched_faces"].append(gi)
|
| 617 |
+
if raw > existing["raw_score"]:
|
| 618 |
+
existing["raw_score"] = raw
|
| 619 |
+
existing["face_crop"] = d["face_crop"]
|
| 620 |
+
|
| 621 |
+
# Apply multi-face boost and build flat merged list
|
| 622 |
+
def _boosted_ui_score(raw: float, n_faces: int) -> float:
|
| 623 |
+
lo = FACE_THRESHOLD_LOW
|
| 624 |
+
base = 0.75 + ((raw - lo) / (1.0 - lo)) * 0.24
|
| 625 |
+
boosted = base * (1.0 + 0.05 * (n_faces - 1))
|
| 626 |
+
return round(min(0.99, boosted), 4)
|
| 627 |
+
|
| 628 |
+
merged_results = []
|
| 629 |
+
for url, d in global_image_map.items():
|
| 630 |
+
n = len(d["matched_faces"])
|
| 631 |
+
merged_results.append({
|
| 632 |
+
"url": url,
|
| 633 |
+
"score": _boosted_ui_score(d["raw_score"], n),
|
| 634 |
+
"raw_score": round(d["raw_score"], 4),
|
| 635 |
+
"face_crop": d["face_crop"],
|
| 636 |
+
"folder": d["folder"],
|
| 637 |
+
"matched_faces": d["matched_faces"],
|
| 638 |
+
"caption": (f"👥 {n} faces matched" if n > 1
|
| 639 |
+
else "👤 Verified Identity"),
|
| 640 |
+
})
|
| 641 |
+
merged_results = sorted(
|
| 642 |
+
merged_results, key=lambda x: x["score"], reverse=True)[:20]
|
| 643 |
+
|
| 644 |
+
# Clean per-group results (remove internal _image_map)
|
| 645 |
+
face_groups = []
|
| 646 |
+
for g in raw_groups:
|
| 647 |
+
clean = {k: v for k, v in g.items() if k != "_image_map"}
|
| 648 |
+
if clean["matches"]:
|
| 649 |
+
face_groups.append(clean)
|
| 650 |
|
| 651 |
duration_ms = round((time.perf_counter() - start) * 1000)
|
| 652 |
+
total_matches = len(merged_results)
|
| 653 |
log("INFO", "search.complete",
|
| 654 |
user_id=user_id or "anonymous", ip=ip, mode=mode,
|
| 655 |
lanes=["face"], detect_faces=detect_faces,
|
| 656 |
+
face_groups=len(face_groups),
|
| 657 |
+
merged_results=total_matches,
|
| 658 |
+
top_score=merged_results[0]["score"] if merged_results else 0,
|
| 659 |
duration_ms=duration_ms)
|
| 660 |
|
| 661 |
return {
|
| 662 |
+
"mode": "face",
|
| 663 |
+
"face_groups": face_groups, # per-face results for UI tabs
|
| 664 |
+
"results": merged_results, # V4: flat merged cross-face list
|
| 665 |
}
|
| 666 |
|
| 667 |
else:
|
| 668 |
+
# ════════════════════════════════════════════════════
|
| 669 |
+
# OBJECT MODE — flat ranked results from object index
|
| 670 |
+
# ════════════════════════════════════════════════════
|
| 671 |
+
nested = await asyncio.gather(
|
| 672 |
+
*[_query_object_one(v) for v in vectors])
|
| 673 |
all_results = [r for sub in nested for r in sub]
|
| 674 |
seen = {}
|
| 675 |
for r in all_results:
|
|
|
|
| 682 |
log("INFO", "search.complete",
|
| 683 |
user_id=user_id or "anonymous", ip=ip, mode=mode,
|
| 684 |
lanes=lanes_used, detect_faces=detect_faces,
|
| 685 |
+
results_count=len(final),
|
| 686 |
+
top_score=final[0]["score"] if final else 0,
|
| 687 |
duration_ms=duration_ms)
|
| 688 |
|
| 689 |
return {"mode": "object", "results": final, "face_groups": []}
|
|
|
|
| 950 |
if tasks: await asyncio.gather(*tasks)
|
| 951 |
await asyncio.sleep(3) # wait for Pinecone to fully delete
|
| 952 |
await asyncio.gather(
|
| 953 |
+
asyncio.to_thread(pc.create_index, name=IDX_OBJECTS,
|
| 954 |
+
dimension=IDX_OBJECTS_DIM, metric="cosine", # 1536-D
|
| 955 |
spec=ServerlessSpec(cloud="aws", region="us-east-1")),
|
| 956 |
+
asyncio.to_thread(pc.create_index, name=IDX_FACES,
|
| 957 |
+
dimension=IDX_FACES_DIM, metric="cosine", # 1024-D V4
|
| 958 |
spec=ServerlessSpec(cloud="aws", region="us-east-1")),
|
| 959 |
)
|
| 960 |
except Exception as e:
|
src/models.py
CHANGED
|
@@ -1,11 +1,15 @@
|
|
| 1 |
-
# src/models.py — Enterprise Lens
|
| 2 |
# ════════════════════════════════════════════════════════════════════
|
| 3 |
-
# Face Lane : InsightFace
|
| 4 |
-
#
|
| 5 |
-
# •
|
| 6 |
-
# •
|
| 7 |
-
# •
|
| 8 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
# ════════════════════════════════════════════════════════════════════
|
| 10 |
|
| 11 |
import os
|
|
@@ -16,11 +20,13 @@ import base64
|
|
| 16 |
import functools
|
| 17 |
import hashlib
|
| 18 |
import io
|
|
|
|
|
|
|
| 19 |
|
| 20 |
import cv2
|
| 21 |
import numpy as np
|
| 22 |
-
import threading
|
| 23 |
import torch
|
|
|
|
| 24 |
import torch.nn.functional as F
|
| 25 |
from PIL import Image
|
| 26 |
from transformers import AutoImageProcessor, AutoModel, AutoProcessor
|
|
@@ -34,16 +40,128 @@ try:
|
|
| 34 |
except ImportError:
|
| 35 |
INSIGHTFACE_AVAILABLE = False
|
| 36 |
print("⚠️ insightface not installed — face lane disabled")
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
# ──
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
def _resize_pil(img: Image.Image, max_side: int = MAX_IMAGE_SIZE) -> Image.Image:
|
| 49 |
w, h = img.size
|
|
@@ -60,42 +178,76 @@ def _img_hash(image_path: str) -> str:
|
|
| 60 |
return h.hexdigest()
|
| 61 |
|
| 62 |
|
| 63 |
-
def _crop_to_b64(
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
| 76 |
return ""
|
| 77 |
-
#
|
| 78 |
-
|
| 79 |
-
face_pil = face_pil.resize((thumb_size, thumb_size), Image.LANCZOS)
|
| 80 |
buf = io.BytesIO()
|
| 81 |
-
|
| 82 |
return base64.b64encode(buf.getvalue()).decode()
|
| 83 |
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
class AIModelManager:
|
| 86 |
def __init__(self):
|
| 87 |
self.device = (
|
| 88 |
"cuda" if torch.cuda.is_available()
|
| 89 |
else ("mps" if torch.backends.mps.is_available() else "cpu")
|
| 90 |
)
|
| 91 |
-
print(f"Loading models onto: {self.device.upper()}...")
|
| 92 |
|
| 93 |
# ── Object Lane: SigLIP + DINOv2 (unchanged) ─────────────
|
|
|
|
| 94 |
self.siglip_processor = AutoProcessor.from_pretrained(
|
| 95 |
"google/siglip-base-patch16-224", use_fast=True)
|
| 96 |
self.siglip_model = AutoModel.from_pretrained(
|
| 97 |
"google/siglip-base-patch16-224").to(self.device).eval()
|
| 98 |
|
|
|
|
| 99 |
self.dinov2_processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
|
| 100 |
self.dinov2_model = AutoModel.from_pretrained(
|
| 101 |
"facebook/dinov2-base").to(self.device).eval()
|
|
@@ -105,47 +257,97 @@ class AIModelManager:
|
|
| 105 |
self.dinov2_model = self.dinov2_model.half()
|
| 106 |
|
| 107 |
# ── YOLO for object segmentation ─────────────────────────
|
|
|
|
| 108 |
self.yolo = YOLO("yolo11n-seg.pt")
|
| 109 |
|
| 110 |
-
# ── Face Lane: InsightFace
|
|
|
|
|
|
|
|
|
|
| 111 |
self.face_app = None
|
| 112 |
-
print(f"🔍 INSIGHTFACE_AVAILABLE = {INSIGHTFACE_AVAILABLE}")
|
| 113 |
if INSIGHTFACE_AVAILABLE:
|
| 114 |
try:
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
self.face_app.prepare(
|
| 121 |
ctx_id=0 if self.device == "cuda" else -1,
|
| 122 |
-
det_size=
|
| 123 |
)
|
| 124 |
-
#
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
print(f"
|
| 129 |
except Exception as e:
|
| 130 |
-
import traceback
|
| 131 |
print(f"❌ InsightFace init FAILED: {e}")
|
| 132 |
print(traceback.format_exc())
|
| 133 |
self.face_app = None
|
| 134 |
else:
|
| 135 |
-
print("❌ InsightFace NOT installed
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
#
|
| 140 |
-
|
| 141 |
-
self.
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
-
# ── Object Lane batched embedding ───────────
|
| 145 |
def _embed_crops_batch(self, crops: list) -> list:
|
|
|
|
| 146 |
if not crops:
|
| 147 |
return []
|
| 148 |
with torch.no_grad():
|
|
|
|
| 149 |
sig_in = self.siglip_processor(images=crops, return_tensors="pt", padding=True)
|
| 150 |
sig_in = {k: v.to(self.device) for k, v in sig_in.items()}
|
| 151 |
if self.device == "cuda":
|
|
@@ -156,8 +358,9 @@ class AIModelManager:
|
|
| 156 |
elif isinstance(sig_out, tuple): sig_out = sig_out[0]
|
| 157 |
sig_vecs = F.normalize(sig_out.float(), p=2, dim=1).cpu()
|
| 158 |
|
| 159 |
-
|
| 160 |
-
dino_in
|
|
|
|
| 161 |
if self.device == "cuda":
|
| 162 |
dino_in = {k: v.half() if v.dtype == torch.float32 else v
|
| 163 |
for k, v in dino_in.items()}
|
|
@@ -168,141 +371,210 @@ class AIModelManager:
|
|
| 168 |
fused = F.normalize(torch.cat([sig_vecs, dino_vecs], dim=1), p=2, dim=1)
|
| 169 |
return [fused[i].numpy() for i in range(len(crops))]
|
| 170 |
|
| 171 |
-
# ──
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
def _detect_and_encode_faces(self, img_np: np.ndarray) -> list:
|
| 173 |
"""
|
| 174 |
-
Detect ALL faces
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
| 184 |
"""
|
| 185 |
if self.face_app is None:
|
| 186 |
-
print("⚠️ face_app is None — InsightFace not loaded
|
| 187 |
return []
|
| 188 |
|
| 189 |
try:
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
else:
|
| 195 |
-
bgr = img_np.copy()
|
| 196 |
|
|
|
|
| 197 |
with self._face_lock:
|
| 198 |
faces = self.face_app.get(bgr)
|
| 199 |
-
print(f"
|
| 200 |
-
|
|
|
|
|
|
|
| 201 |
|
| 202 |
for idx, face in enumerate(faces):
|
| 203 |
-
if
|
| 204 |
break
|
| 205 |
|
| 206 |
-
#
|
| 207 |
-
|
| 208 |
-
x1, y1, x2, y2 =
|
|
|
|
|
|
|
| 209 |
w, h = x2 - x1, y2 - y1
|
|
|
|
|
|
|
| 210 |
|
| 211 |
-
#
|
| 212 |
if w < MIN_FACE_SIZE or h < MIN_FACE_SIZE:
|
|
|
|
| 213 |
continue
|
| 214 |
|
| 215 |
-
#
|
| 216 |
-
|
|
|
|
|
|
|
| 217 |
continue
|
| 218 |
-
vec = face.embedding.astype(np.float32)
|
| 219 |
-
# Re-normalise just to be safe
|
| 220 |
-
norm = np.linalg.norm(vec)
|
| 221 |
-
if norm > 0:
|
| 222 |
-
vec = vec / norm
|
| 223 |
|
| 224 |
-
#
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
results.append({
|
| 229 |
-
"type":
|
| 230 |
-
"vector":
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
|
|
|
|
|
|
|
|
|
| 235 |
})
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
-
print(f"👤
|
| 238 |
return results
|
| 239 |
|
| 240 |
except Exception as e:
|
| 241 |
-
print(f"🟠 InsightFace error: {e}
|
|
|
|
| 242 |
return []
|
| 243 |
|
| 244 |
# ── Main process_image ────────────────────────────────────────
|
| 245 |
def process_image(
|
| 246 |
self,
|
| 247 |
image_path: str,
|
| 248 |
-
is_query:
|
| 249 |
detect_faces: bool = True,
|
| 250 |
) -> list:
|
| 251 |
"""
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
"""
|
| 261 |
cache_key = f"{_img_hash(image_path)}_{detect_faces}_{is_query}"
|
| 262 |
if cache_key in self._cache:
|
| 263 |
-
print("⚡ Cache hit
|
| 264 |
return self._cache[cache_key]
|
| 265 |
|
| 266 |
extracted = []
|
| 267 |
original_pil = Image.open(image_path).convert("RGB")
|
| 268 |
-
img_np = np.array(original_pil)
|
| 269 |
faces_found = False
|
| 270 |
|
| 271 |
-
#
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
if face_results:
|
| 280 |
faces_found = True
|
| 281 |
-
# Scale
|
| 282 |
-
|
| 283 |
-
|
| 284 |
for fr in face_results:
|
| 285 |
-
if
|
| 286 |
bx, by, bw, bh = fr["bbox"]
|
| 287 |
fr["bbox"] = [
|
| 288 |
-
int(bx *
|
| 289 |
-
int(bw *
|
| 290 |
]
|
| 291 |
extracted.append(fr)
|
| 292 |
|
| 293 |
-
#
|
| 294 |
-
#
|
| 295 |
-
#
|
| 296 |
-
|
|
|
|
|
|
|
|
|
|
| 297 |
yolo_results = self.yolo(image_path, conf=0.5, verbose=False)
|
| 298 |
|
| 299 |
for r in yolo_results:
|
| 300 |
if r.masks is not None:
|
| 301 |
for seg_idx, mask_xy in enumerate(r.masks.xy):
|
| 302 |
cls_id = int(r.boxes.cls[seg_idx].item())
|
| 303 |
-
# Skip person crops if face lane already handled them
|
| 304 |
if faces_found and cls_id == YOLO_PERSON_CLASS_ID:
|
| 305 |
-
print("🔵 PERSON crop skipped — face lane active")
|
| 306 |
continue
|
| 307 |
polygon = np.array(mask_xy, dtype=np.int32)
|
| 308 |
if len(polygon) < 3:
|
|
@@ -327,24 +599,22 @@ class AIModelManager:
|
|
| 327 |
if len(crops_pil) >= MAX_CROPS + 1:
|
| 328 |
break
|
| 329 |
|
| 330 |
-
crops
|
| 331 |
-
print(f"🧠 Embedding {len(crops)} object crop(s)
|
| 332 |
obj_vecs = self._embed_crops_batch(crops)
|
| 333 |
for vec in obj_vecs:
|
| 334 |
extracted.append({"type": "object", "vector": vec})
|
| 335 |
|
| 336 |
-
# Cache
|
| 337 |
if len(self._cache) >= self._cache_maxsize:
|
| 338 |
-
|
| 339 |
-
del self._cache[oldest]
|
| 340 |
self._cache[cache_key] = extracted
|
| 341 |
-
|
| 342 |
return extracted
|
| 343 |
|
| 344 |
async def process_image_async(
|
| 345 |
self,
|
| 346 |
-
image_path:
|
| 347 |
-
is_query:
|
| 348 |
detect_faces: bool = True,
|
| 349 |
) -> list:
|
| 350 |
loop = asyncio.get_event_loop()
|
|
|
|
| 1 |
+
# src/models.py — Enterprise Lens V4
|
| 2 |
# ════════════════════════════════════════════════════════════════════
|
| 3 |
+
# Face Lane : InsightFace SCRFD-10GF + ArcFace-R100 (buffalo_l)
|
| 4 |
+
# + AdaFace IR-50 (WebFace4M) fused → 1024-D vector
|
| 5 |
+
# • det_size=(1280,1280) — catches small/group faces
|
| 6 |
+
# • Quality gate: det_score ≥ 0.60, face_px ≥ 40
|
| 7 |
+
# • Multi-scale: runs detection at 2 scales, merges
|
| 8 |
+
# • Stores one 1024-D vector PER face
|
| 9 |
+
# • Each vector carries base64 face-crop thumbnail
|
| 10 |
+
# • face_quality_score + face_width_px in metadata
|
| 11 |
+
#
|
| 12 |
+
# Object Lane: SigLIP + DINOv2 fused 1536-D (unchanged from V3)
|
| 13 |
# ════════════════════════════════════════════════════════════════════
|
| 14 |
|
| 15 |
import os
|
|
|
|
| 20 |
import functools
|
| 21 |
import hashlib
|
| 22 |
import io
|
| 23 |
+
import threading
|
| 24 |
+
import traceback
|
| 25 |
|
| 26 |
import cv2
|
| 27 |
import numpy as np
|
|
|
|
| 28 |
import torch
|
| 29 |
+
import torch.nn as nn
|
| 30 |
import torch.nn.functional as F
|
| 31 |
from PIL import Image
|
| 32 |
from transformers import AutoImageProcessor, AutoModel, AutoProcessor
|
|
|
|
| 40 |
except ImportError:
|
| 41 |
INSIGHTFACE_AVAILABLE = False
|
| 42 |
print("⚠️ insightface not installed — face lane disabled")
|
| 43 |
+
print(" Run: pip install insightface onnxruntime-silicon (mac)")
|
| 44 |
+
print(" pip install insightface onnxruntime (linux/win)")
|
| 45 |
|
| 46 |
+
# ── AdaFace ──────────────────────────────────────────────────────
|
| 47 |
+
# AdaFace IR-50 backbone (CVPR 2022) — quality-adaptive margin loss
|
| 48 |
+
# Much more robust than ArcFace on low-quality / occluded faces
|
| 49 |
+
# Weights auto-downloaded from HuggingFace on first run
|
| 50 |
+
try:
|
| 51 |
+
from huggingface_hub import hf_hub_download
|
| 52 |
+
ADAFACE_WEIGHTS_AVAILABLE = True
|
| 53 |
+
except ImportError:
|
| 54 |
+
ADAFACE_WEIGHTS_AVAILABLE = False
|
| 55 |
+
print("⚠️ huggingface_hub not installed — AdaFace fusion disabled")
|
| 56 |
|
| 57 |
+
# ── Constants ─────────────────────────────────────────────────────
|
| 58 |
+
YOLO_PERSON_CLASS_ID = 0
|
| 59 |
+
MIN_FACE_SIZE = 40 # V4: stricter — tiny faces embed poorly
|
| 60 |
+
MAX_FACES_PER_IMAGE = 12 # slightly higher cap for group photos
|
| 61 |
+
MAX_CROPS = 6 # max YOLO object crops per image
|
| 62 |
+
MAX_IMAGE_SIZE = 640 # object lane longest edge
|
| 63 |
+
DET_SIZE_PRIMARY = (1280, 1280) # V4: 1280 for small-face detection
|
| 64 |
+
DET_SIZE_SECONDARY = (640, 640) # fallback / 2nd scale
|
| 65 |
+
FACE_CROP_THUMB_SIZE = 112 # face thumbnail for Pinecone metadata
|
| 66 |
+
FACE_CROP_QUALITY = 80 # JPEG quality for thumbnails
|
| 67 |
+
FACE_QUALITY_GATE = 0.60 # minimum det_score to accept a face
|
| 68 |
+
FACE_DIM = 512 # ArcFace embedding dimension
|
| 69 |
+
ADAFACE_DIM = 512 # AdaFace embedding dimension
|
| 70 |
+
FUSED_FACE_DIM = 1024 # ArcFace + AdaFace concatenated
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# ════════════════════════════════════════════════════════════════
|
| 74 |
+
# AdaFace IR-50 Backbone
|
| 75 |
+
# Lightweight reimplementation of the IR-50 network head used
|
| 76 |
+
# to load pretrained AdaFace weights (WebFace4M checkpoint).
|
| 77 |
+
# Only the feature-extraction layers are used — no classifier.
|
| 78 |
+
# ════════════════════════════════════════════════════════════════
|
| 79 |
+
|
| 80 |
+
def _conv_bn(inp, oup, k, s, p, groups=1):
|
| 81 |
+
return nn.Sequential(
|
| 82 |
+
nn.Conv2d(inp, oup, k, s, p, groups=groups, bias=False),
|
| 83 |
+
nn.BatchNorm2d(oup),
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
class _IBasicBlock(nn.Module):
|
| 87 |
+
"""Basic residual block used in IR-50."""
|
| 88 |
+
expansion = 1
|
| 89 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
| 90 |
+
super().__init__()
|
| 91 |
+
self.bn1 = nn.BatchNorm2d(inplanes)
|
| 92 |
+
self.conv1 = nn.Conv2d(inplanes, planes, 3, 1, 1, bias=False)
|
| 93 |
+
self.bn2 = nn.BatchNorm2d(planes)
|
| 94 |
+
self.prelu = nn.PReLU(planes)
|
| 95 |
+
self.conv2 = nn.Conv2d(planes, planes, 3, stride, 1, bias=False)
|
| 96 |
+
self.bn3 = nn.BatchNorm2d(planes)
|
| 97 |
+
self.downsample = downsample
|
| 98 |
+
self.stride = stride
|
| 99 |
+
|
| 100 |
+
def forward(self, x):
|
| 101 |
+
identity = x
|
| 102 |
+
out = self.bn1(x)
|
| 103 |
+
out = self.conv1(out)
|
| 104 |
+
out = self.bn2(out)
|
| 105 |
+
out = self.prelu(out)
|
| 106 |
+
out = self.conv2(out)
|
| 107 |
+
out = self.bn3(out)
|
| 108 |
+
if self.downsample is not None:
|
| 109 |
+
identity = self.downsample(x)
|
| 110 |
+
out += identity
|
| 111 |
+
return out
|
| 112 |
+
|
| 113 |
+
class AdaFaceIR50(nn.Module):
|
| 114 |
+
"""
|
| 115 |
+
IR-50 backbone for AdaFace.
|
| 116 |
+
Produces a 512-D L2-normalised face embedding.
|
| 117 |
+
Input: (N, 3, 112, 112) normalised face crop (mean 0.5, std 0.5)
|
| 118 |
+
Output: (N, 512) L2-normalised embedding
|
| 119 |
+
"""
|
| 120 |
+
def __init__(self):
|
| 121 |
+
super().__init__()
|
| 122 |
+
self.input_layer = nn.Sequential(
|
| 123 |
+
nn.Conv2d(3, 64, 3, 1, 1, bias=False),
|
| 124 |
+
nn.BatchNorm2d(64),
|
| 125 |
+
nn.PReLU(64),
|
| 126 |
+
)
|
| 127 |
+
self.layer1 = self._make_layer(64, 64, 3, stride=2)
|
| 128 |
+
self.layer2 = self._make_layer(64, 128, 4, stride=2)
|
| 129 |
+
self.layer3 = self._make_layer(128, 256, 14, stride=2)
|
| 130 |
+
self.layer4 = self._make_layer(256, 512, 3, stride=2)
|
| 131 |
+
self.bn2 = nn.BatchNorm2d(512)
|
| 132 |
+
self.dropout = nn.Dropout(p=0.4)
|
| 133 |
+
self.fc = nn.Linear(512 * 7 * 7, 512)
|
| 134 |
+
self.features = nn.BatchNorm1d(512)
|
| 135 |
+
|
| 136 |
+
def _make_layer(self, inplanes, planes, blocks, stride=1):
|
| 137 |
+
downsample = None
|
| 138 |
+
if stride != 1 or inplanes != planes:
|
| 139 |
+
downsample = nn.Sequential(
|
| 140 |
+
nn.Conv2d(inplanes, planes, 1, stride, bias=False),
|
| 141 |
+
nn.BatchNorm2d(planes),
|
| 142 |
+
)
|
| 143 |
+
layers = [_IBasicBlock(inplanes, planes, stride, downsample)]
|
| 144 |
+
for _ in range(1, blocks):
|
| 145 |
+
layers.append(_IBasicBlock(planes, planes))
|
| 146 |
+
return nn.Sequential(*layers)
|
| 147 |
+
|
| 148 |
+
def forward(self, x):
|
| 149 |
+
x = self.input_layer(x)
|
| 150 |
+
x = self.layer1(x)
|
| 151 |
+
x = self.layer2(x)
|
| 152 |
+
x = self.layer3(x)
|
| 153 |
+
x = self.layer4(x)
|
| 154 |
+
x = self.bn2(x)
|
| 155 |
+
x = self.dropout(x)
|
| 156 |
+
x = x.flatten(1)
|
| 157 |
+
x = self.fc(x)
|
| 158 |
+
x = self.features(x)
|
| 159 |
+
return F.normalize(x, p=2, dim=1)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
# ════════════════════════════════════════════════════════════════
|
| 163 |
+
# Utility functions
|
| 164 |
+
# ════════════════════════════════════════════════════════════════
|
| 165 |
|
| 166 |
def _resize_pil(img: Image.Image, max_side: int = MAX_IMAGE_SIZE) -> Image.Image:
|
| 167 |
w, h = img.size
|
|
|
|
| 178 |
return h.hexdigest()
|
| 179 |
|
| 180 |
|
| 181 |
+
def _crop_to_b64(
|
| 182 |
+
img_bgr: np.ndarray,
|
| 183 |
+
x1: int, y1: int, x2: int, y2: int,
|
| 184 |
+
thumb_size: int = FACE_CROP_THUMB_SIZE,
|
| 185 |
+
) -> str:
|
| 186 |
+
"""Crop face from BGR image with 20% padding, return base64 JPEG thumbnail."""
|
| 187 |
+
H, W = img_bgr.shape[:2]
|
| 188 |
+
w, h = x2 - x1, y2 - y1
|
| 189 |
+
pad_x = int(w * 0.20)
|
| 190 |
+
pad_y = int(h * 0.20)
|
| 191 |
+
cx1 = max(0, x1 - pad_x)
|
| 192 |
+
cy1 = max(0, y1 - pad_y)
|
| 193 |
+
cx2 = min(W, x2 + pad_x)
|
| 194 |
+
cy2 = min(H, y2 + pad_y)
|
| 195 |
+
crop = img_bgr[cy1:cy2, cx1:cx2]
|
| 196 |
+
if crop.size == 0:
|
| 197 |
return ""
|
| 198 |
+
pil = Image.fromarray(crop[:, :, ::-1]) # BGR → RGB
|
| 199 |
+
pil = pil.resize((thumb_size, thumb_size), Image.LANCZOS)
|
|
|
|
| 200 |
buf = io.BytesIO()
|
| 201 |
+
pil.save(buf, format="JPEG", quality=FACE_CROP_QUALITY)
|
| 202 |
return base64.b64encode(buf.getvalue()).decode()
|
| 203 |
|
| 204 |
|
| 205 |
+
def _face_crop_for_adaface(
|
| 206 |
+
img_bgr: np.ndarray,
|
| 207 |
+
x1: int, y1: int, x2: int, y2: int,
|
| 208 |
+
) -> np.ndarray:
|
| 209 |
+
"""
|
| 210 |
+
Crop and normalise face for AdaFace IR-50 input.
|
| 211 |
+
Returns float32 numpy array (3, 112, 112) normalised to [-1, 1].
|
| 212 |
+
"""
|
| 213 |
+
H, W = img_bgr.shape[:2]
|
| 214 |
+
w, h = x2 - x1, y2 - y1
|
| 215 |
+
pad_x = int(w * 0.10)
|
| 216 |
+
pad_y = int(h * 0.10)
|
| 217 |
+
cx1 = max(0, x1 - pad_x)
|
| 218 |
+
cy1 = max(0, y1 - pad_y)
|
| 219 |
+
cx2 = min(W, x2 + pad_x)
|
| 220 |
+
cy2 = min(H, y2 + pad_y)
|
| 221 |
+
crop = img_bgr[cy1:cy2, cx1:cx2]
|
| 222 |
+
if crop.size == 0:
|
| 223 |
+
return None
|
| 224 |
+
rgb = crop[:, :, ::-1].copy() # BGR → RGB
|
| 225 |
+
pil = Image.fromarray(rgb).resize((112, 112), Image.LANCZOS)
|
| 226 |
+
arr = np.array(pil, dtype=np.float32) / 255.0
|
| 227 |
+
arr = (arr - 0.5) / 0.5 # normalise [-1, 1]
|
| 228 |
+
return arr.transpose(2, 0, 1) # HWC → CHW
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# ════════════════════════════════════════════════════════════════
|
| 232 |
+
# AIModelManager — V4
|
| 233 |
+
# ════════════════════════════════════════════════════════════════
|
| 234 |
+
|
| 235 |
class AIModelManager:
|
| 236 |
def __init__(self):
|
| 237 |
self.device = (
|
| 238 |
"cuda" if torch.cuda.is_available()
|
| 239 |
else ("mps" if torch.backends.mps.is_available() else "cpu")
|
| 240 |
)
|
| 241 |
+
print(f"🚀 Loading models onto: {self.device.upper()}...")
|
| 242 |
|
| 243 |
# ── Object Lane: SigLIP + DINOv2 (unchanged) ─────────────
|
| 244 |
+
print("📦 Loading SigLIP...")
|
| 245 |
self.siglip_processor = AutoProcessor.from_pretrained(
|
| 246 |
"google/siglip-base-patch16-224", use_fast=True)
|
| 247 |
self.siglip_model = AutoModel.from_pretrained(
|
| 248 |
"google/siglip-base-patch16-224").to(self.device).eval()
|
| 249 |
|
| 250 |
+
print("📦 Loading DINOv2...")
|
| 251 |
self.dinov2_processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
|
| 252 |
self.dinov2_model = AutoModel.from_pretrained(
|
| 253 |
"facebook/dinov2-base").to(self.device).eval()
|
|
|
|
| 257 |
self.dinov2_model = self.dinov2_model.half()
|
| 258 |
|
| 259 |
# ── YOLO for object segmentation ─────────────────────────
|
| 260 |
+
print("📦 Loading YOLO11n-seg...")
|
| 261 |
self.yolo = YOLO("yolo11n-seg.pt")
|
| 262 |
|
| 263 |
+
# ── Face Lane: InsightFace SCRFD + ArcFace-R100 ───────────
|
| 264 |
+
# V4: ALWAYS use buffalo_l (SCRFD-10GF + ArcFace-R100)
|
| 265 |
+
# even on CPU — accuracy matters more than speed here.
|
| 266 |
+
# det_size=1280 catches faces as small as ~10px in source.
|
| 267 |
self.face_app = None
|
|
|
|
| 268 |
if INSIGHTFACE_AVAILABLE:
|
| 269 |
try:
|
| 270 |
+
print("📦 Loading InsightFace buffalo_l (SCRFD-10GF + ArcFace-R100)...")
|
| 271 |
+
self.face_app = FaceAnalysis(
|
| 272 |
+
name="buffalo_l",
|
| 273 |
+
providers=(
|
| 274 |
+
["CUDAExecutionProvider", "CPUExecutionProvider"]
|
| 275 |
+
if self.device == "cuda"
|
| 276 |
+
else ["CPUExecutionProvider"]
|
| 277 |
+
),
|
| 278 |
+
)
|
| 279 |
self.face_app.prepare(
|
| 280 |
ctx_id=0 if self.device == "cuda" else -1,
|
| 281 |
+
det_size=DET_SIZE_PRIMARY, # 1280×1280 — key for small faces
|
| 282 |
)
|
| 283 |
+
# Warmup
|
| 284 |
+
test_img = np.zeros((112, 112, 3), dtype=np.uint8)
|
| 285 |
+
self.face_app.get(test_img)
|
| 286 |
+
print("✅ InsightFace buffalo_l loaded — SCRFD+ArcFace face lane ACTIVE")
|
| 287 |
+
print(f" det_size={DET_SIZE_PRIMARY} | quality_gate={FACE_QUALITY_GATE}")
|
| 288 |
except Exception as e:
|
|
|
|
| 289 |
print(f"❌ InsightFace init FAILED: {e}")
|
| 290 |
print(traceback.format_exc())
|
| 291 |
self.face_app = None
|
| 292 |
else:
|
| 293 |
+
print("❌ InsightFace NOT installed")
|
| 294 |
+
|
| 295 |
+
# ── AdaFace IR-50 (CVPR 2022) — quality-adaptive fusion ───
|
| 296 |
+
# Fused with ArcFace → 1024-D face vector
|
| 297 |
+
# Weights: adaface_ir50_webface4m.ckpt from HuggingFace
|
| 298 |
+
self.adaface_model = None
|
| 299 |
+
self._load_adaface()
|
| 300 |
+
|
| 301 |
+
# Thread safety for ONNX
|
| 302 |
+
self._face_lock = threading.Lock()
|
| 303 |
+
self._cache = {}
|
| 304 |
+
self._cache_maxsize = 128
|
| 305 |
+
print("✅ All models ready!")
|
| 306 |
+
print(f" Face vector dim : {FUSED_FACE_DIM if self.adaface_model else FACE_DIM}")
|
| 307 |
+
print(f" Object vector dim: 1536")
|
| 308 |
+
|
| 309 |
+
def _load_adaface(self):
|
| 310 |
+
"""Download and load AdaFace IR-50 WebFace4M weights."""
|
| 311 |
+
if not ADAFACE_WEIGHTS_AVAILABLE:
|
| 312 |
+
print("⚠️ AdaFace skipped — huggingface_hub not installed")
|
| 313 |
+
return
|
| 314 |
+
try:
|
| 315 |
+
print("📦 Loading AdaFace IR-50 (WebFace4M)...")
|
| 316 |
+
# Weights hosted on HuggingFace — ~170MB download on first run
|
| 317 |
+
ckpt_path = hf_hub_download(
|
| 318 |
+
repo_id = "minchul/adaface_ir50_webface4m",
|
| 319 |
+
filename = "adaface_ir50_webface4m.ckpt",
|
| 320 |
+
)
|
| 321 |
+
model = AdaFaceIR50()
|
| 322 |
+
state = torch.load(ckpt_path, map_location="cpu")
|
| 323 |
+
# Checkpoint may be wrapped in {"state_dict": ...}
|
| 324 |
+
if "state_dict" in state:
|
| 325 |
+
state = state["state_dict"]
|
| 326 |
+
# Strip any "model." prefix that some checkpoints add
|
| 327 |
+
state = {k.replace("model.", ""): v for k, v in state.items()}
|
| 328 |
+
# Only load keys that exist in our model
|
| 329 |
+
model_keys = set(model.state_dict().keys())
|
| 330 |
+
filtered = {k: v for k, v in state.items() if k in model_keys}
|
| 331 |
+
missing, _ = model.load_state_dict(filtered, strict=False)
|
| 332 |
+
if missing:
|
| 333 |
+
print(f" AdaFace: {len(missing)} missing keys (expected for head layers)")
|
| 334 |
+
model = model.to(self.device).eval()
|
| 335 |
+
if self.device == "cuda":
|
| 336 |
+
model = model.half()
|
| 337 |
+
self.adaface_model = model
|
| 338 |
+
print("✅ AdaFace IR-50 loaded — 1024-D fused face vectors ACTIVE")
|
| 339 |
+
except Exception as e:
|
| 340 |
+
print(f"⚠️ AdaFace load failed: {e} — falling back to ArcFace-only (512-D)")
|
| 341 |
+
print(f" Detail: {traceback.format_exc()[-400:]}")
|
| 342 |
+
self.adaface_model = None
|
| 343 |
|
| 344 |
+
# ── Object Lane: batched SigLIP + DINOv2 embedding ───────────
|
| 345 |
def _embed_crops_batch(self, crops: list) -> list:
|
| 346 |
+
"""Embed a list of PIL images → list of 1536-D numpy arrays."""
|
| 347 |
if not crops:
|
| 348 |
return []
|
| 349 |
with torch.no_grad():
|
| 350 |
+
# SigLIP
|
| 351 |
sig_in = self.siglip_processor(images=crops, return_tensors="pt", padding=True)
|
| 352 |
sig_in = {k: v.to(self.device) for k, v in sig_in.items()}
|
| 353 |
if self.device == "cuda":
|
|
|
|
| 358 |
elif isinstance(sig_out, tuple): sig_out = sig_out[0]
|
| 359 |
sig_vecs = F.normalize(sig_out.float(), p=2, dim=1).cpu()
|
| 360 |
|
| 361 |
+
# DINOv2
|
| 362 |
+
dino_in = self.dinov2_processor(images=crops, return_tensors="pt")
|
| 363 |
+
dino_in = {k: v.to(self.device) for k, v in dino_in.items()}
|
| 364 |
if self.device == "cuda":
|
| 365 |
dino_in = {k: v.half() if v.dtype == torch.float32 else v
|
| 366 |
for k, v in dino_in.items()}
|
|
|
|
| 371 |
fused = F.normalize(torch.cat([sig_vecs, dino_vecs], dim=1), p=2, dim=1)
|
| 372 |
return [fused[i].numpy() for i in range(len(crops))]
|
| 373 |
|
| 374 |
+
# ── AdaFace embedding for a single face crop ─────────────────
|
| 375 |
+
def _adaface_embed(self, face_arr_chw: np.ndarray) -> np.ndarray:
|
| 376 |
+
"""
|
| 377 |
+
Run AdaFace IR-50 on a preprocessed (3,112,112) float32 array.
|
| 378 |
+
Returns 512-D L2-normalised numpy embedding.
|
| 379 |
+
"""
|
| 380 |
+
if self.adaface_model is None or face_arr_chw is None:
|
| 381 |
+
return None
|
| 382 |
+
try:
|
| 383 |
+
t = torch.from_numpy(face_arr_chw).unsqueeze(0) # (1,3,112,112)
|
| 384 |
+
t = t.to(self.device)
|
| 385 |
+
if self.device == "cuda":
|
| 386 |
+
t = t.half()
|
| 387 |
+
with torch.no_grad():
|
| 388 |
+
emb = self.adaface_model(t) # (1,512)
|
| 389 |
+
return emb[0].float().cpu().numpy()
|
| 390 |
+
except Exception as e:
|
| 391 |
+
print(f"⚠️ AdaFace inference error: {e}")
|
| 392 |
+
return None
|
| 393 |
+
|
| 394 |
+
# ── V4 Face detection + dual encoding ────────────────────────
|
| 395 |
def _detect_and_encode_faces(self, img_np: np.ndarray) -> list:
|
| 396 |
"""
|
| 397 |
+
Detect ALL faces using InsightFace SCRFD-10GF at 1280px.
|
| 398 |
+
For each face:
|
| 399 |
+
- ArcFace-R100 embedding (512-D, from InsightFace)
|
| 400 |
+
- AdaFace IR-50 embedding (512-D, fused quality-adaptive)
|
| 401 |
+
- Concatenate + L2-normalise → 1024-D final vector
|
| 402 |
+
- Quality gate: det_score ≥ 0.60, face width ≥ 40px
|
| 403 |
+
- Base64 thumbnail stored for UI
|
| 404 |
+
|
| 405 |
+
Returns list of dicts with keys:
|
| 406 |
+
type, vector (1024-D or 512-D), face_idx, bbox,
|
| 407 |
+
face_crop, det_score, face_quality, face_width_px
|
| 408 |
"""
|
| 409 |
if self.face_app is None:
|
| 410 |
+
print("⚠️ face_app is None — InsightFace not loaded")
|
| 411 |
return []
|
| 412 |
|
| 413 |
try:
|
| 414 |
+
# InsightFace expects BGR
|
| 415 |
+
if img_np.dtype != np.uint8:
|
| 416 |
+
img_np = (img_np * 255).astype(np.uint8)
|
| 417 |
+
bgr = img_np[:, :, ::-1].copy() if img_np.shape[2] == 3 else img_np.copy()
|
|
|
|
|
|
|
| 418 |
|
| 419 |
+
print(f"🔍 SCRFD detection on {bgr.shape[1]}×{bgr.shape[0]} image...")
|
| 420 |
with self._face_lock:
|
| 421 |
faces = self.face_app.get(bgr)
|
| 422 |
+
print(f" Raw detections: {len(faces)}")
|
| 423 |
+
|
| 424 |
+
results = []
|
| 425 |
+
accepted = 0
|
| 426 |
|
| 427 |
for idx, face in enumerate(faces):
|
| 428 |
+
if accepted >= MAX_FACES_PER_IMAGE:
|
| 429 |
break
|
| 430 |
|
| 431 |
+
# ── Bounding box ──────────────────────────────────
|
| 432 |
+
bbox_raw = face.bbox.astype(int)
|
| 433 |
+
x1, y1, x2, y2 = bbox_raw
|
| 434 |
+
x1 = max(0, x1); y1 = max(0, y1)
|
| 435 |
+
x2 = min(bgr.shape[1], x2); y2 = min(bgr.shape[0], y2)
|
| 436 |
w, h = x2 - x1, y2 - y1
|
| 437 |
+
if w <= 0 or h <= 0:
|
| 438 |
+
continue
|
| 439 |
|
| 440 |
+
# ── Quality gate 1: minimum size ──────────────────
|
| 441 |
if w < MIN_FACE_SIZE or h < MIN_FACE_SIZE:
|
| 442 |
+
print(f" Face {idx}: SKIP — too small ({w}×{h}px)")
|
| 443 |
continue
|
| 444 |
|
| 445 |
+
# ── Quality gate 2: detection confidence ──────────
|
| 446 |
+
det_score = float(face.det_score) if hasattr(face, "det_score") else 1.0
|
| 447 |
+
if det_score < FACE_QUALITY_GATE:
|
| 448 |
+
print(f" Face {idx}: SKIP — low det_score ({det_score:.3f})")
|
| 449 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
|
| 451 |
+
# ── ArcFace embedding (from InsightFace) ──────────
|
| 452 |
+
if face.embedding is None:
|
| 453 |
+
continue
|
| 454 |
+
arcface_vec = face.embedding.astype(np.float32)
|
| 455 |
+
n = np.linalg.norm(arcface_vec)
|
| 456 |
+
if n > 0:
|
| 457 |
+
arcface_vec = arcface_vec / n
|
| 458 |
+
|
| 459 |
+
# ── AdaFace embedding (quality-adaptive) ──────────
|
| 460 |
+
face_chw = _face_crop_for_adaface(bgr, x1, y1, x2, y2)
|
| 461 |
+
adaface_vec = self._adaface_embed(face_chw)
|
| 462 |
+
|
| 463 |
+
# ── Fuse: ArcFace + AdaFace → 1024-D ─────────────
|
| 464 |
+
if adaface_vec is not None:
|
| 465 |
+
fused_raw = np.concatenate([arcface_vec, adaface_vec])
|
| 466 |
+
n2 = np.linalg.norm(fused_raw)
|
| 467 |
+
final_vec = (fused_raw / n2) if n2 > 0 else fused_raw
|
| 468 |
+
vec_dim = FUSED_FACE_DIM
|
| 469 |
+
else:
|
| 470 |
+
# AdaFace unavailable — fall back to ArcFace only
|
| 471 |
+
final_vec = arcface_vec
|
| 472 |
+
vec_dim = FACE_DIM
|
| 473 |
+
|
| 474 |
+
# ── Face crop thumbnail for UI ─────────────────────
|
| 475 |
+
face_crop_b64 = _crop_to_b64(bgr, x1, y1, x2, y2)
|
| 476 |
|
| 477 |
results.append({
|
| 478 |
+
"type": "face",
|
| 479 |
+
"vector": final_vec,
|
| 480 |
+
"vec_dim": vec_dim,
|
| 481 |
+
"face_idx": accepted,
|
| 482 |
+
"bbox": [int(x1), int(y1), int(w), int(h)],
|
| 483 |
+
"face_crop": face_crop_b64,
|
| 484 |
+
"det_score": det_score,
|
| 485 |
+
"face_quality": det_score, # alias for metadata
|
| 486 |
+
"face_width_px": int(w),
|
| 487 |
})
|
| 488 |
+
accepted += 1
|
| 489 |
+
print(f" Face {idx}: ACCEPTED — {w}×{h}px | "
|
| 490 |
+
f"det={det_score:.3f} | dim={vec_dim}")
|
| 491 |
|
| 492 |
+
print(f"👤 {accepted} face(s) passed quality gate")
|
| 493 |
return results
|
| 494 |
|
| 495 |
except Exception as e:
|
| 496 |
+
print(f"🟠 InsightFace error: {e}")
|
| 497 |
+
print(traceback.format_exc()[-600:])
|
| 498 |
return []
|
| 499 |
|
| 500 |
# ── Main process_image ────────────────────────────────────────
|
| 501 |
def process_image(
|
| 502 |
self,
|
| 503 |
image_path: str,
|
| 504 |
+
is_query: bool = False,
|
| 505 |
detect_faces: bool = True,
|
| 506 |
) -> list:
|
| 507 |
"""
|
| 508 |
+
Full pipeline for one image.
|
| 509 |
+
|
| 510 |
+
Returns list of vector dicts:
|
| 511 |
+
Face: {type, vector (1024-D), face_idx, bbox, face_crop,
|
| 512 |
+
det_score, face_quality, face_width_px}
|
| 513 |
+
Object: {type, vector (1536-D)}
|
| 514 |
+
|
| 515 |
+
V4 changes vs V3:
|
| 516 |
+
- SCRFD at 1280px (not 640) — catches small/group faces
|
| 517 |
+
- buffalo_l always (not buffalo_sc on CPU)
|
| 518 |
+
- ArcFace + AdaFace fused 1024-D vectors
|
| 519 |
+
- Quality gate: det_score ≥ 0.60, width ≥ 40px
|
| 520 |
+
- Multi-scale: detect at 1280, retry at 640 if 0 faces found
|
| 521 |
"""
|
| 522 |
cache_key = f"{_img_hash(image_path)}_{detect_faces}_{is_query}"
|
| 523 |
if cache_key in self._cache:
|
| 524 |
+
print("⚡ Cache hit")
|
| 525 |
return self._cache[cache_key]
|
| 526 |
|
| 527 |
extracted = []
|
| 528 |
original_pil = Image.open(image_path).convert("RGB")
|
| 529 |
+
img_np = np.array(original_pil) # RGB uint8
|
| 530 |
faces_found = False
|
| 531 |
|
| 532 |
+
# ════════════════════════════════════════════════════════
|
| 533 |
+
# FACE LANE
|
| 534 |
+
# V4: Run at full resolution (up to 1280px) to catch small
|
| 535 |
+
# faces in group photos. If 0 faces detected, retry at
|
| 536 |
+
# the original resolution (multi-scale fallback).
|
| 537 |
+
# ════════════════════════════════════════════════════════
|
| 538 |
+
if detect_faces and self.face_app is not None:
|
| 539 |
+
# Scale 1: resize longest edge to 1280 for detection
|
| 540 |
+
detect_pil_1280 = _resize_pil(original_pil, 1280)
|
| 541 |
+
detect_np_1280 = np.array(detect_pil_1280)
|
| 542 |
+
face_results = self._detect_and_encode_faces(detect_np_1280)
|
| 543 |
+
|
| 544 |
+
# Scale 2: if nothing found, try original resolution
|
| 545 |
+
# (sometimes resizing DOWN helps when image is already small)
|
| 546 |
+
if not face_results and max(original_pil.size) < 1280:
|
| 547 |
+
print("🔄 Multi-scale fallback: retrying at original resolution")
|
| 548 |
+
face_results = self._detect_and_encode_faces(img_np)
|
| 549 |
|
| 550 |
if face_results:
|
| 551 |
faces_found = True
|
| 552 |
+
# Scale bboxes back to original-image coordinates
|
| 553 |
+
sx = original_pil.width / detect_pil_1280.width
|
| 554 |
+
sy = original_pil.height / detect_pil_1280.height
|
| 555 |
for fr in face_results:
|
| 556 |
+
if sx != 1.0 or sy != 1.0:
|
| 557 |
bx, by, bw, bh = fr["bbox"]
|
| 558 |
fr["bbox"] = [
|
| 559 |
+
int(bx * sx), int(by * sy),
|
| 560 |
+
int(bw * sx), int(bh * sy),
|
| 561 |
]
|
| 562 |
extracted.append(fr)
|
| 563 |
|
| 564 |
+
# ════════════════════════════════════════════════════════
|
| 565 |
+
# OBJECT LANE
|
| 566 |
+
# Always runs — even when faces are found.
|
| 567 |
+
# PERSON-class YOLO crops are skipped when faces active
|
| 568 |
+
# to avoid double-counting people.
|
| 569 |
+
# ════════════════════════════════════════════════════════
|
| 570 |
+
crops_pil = [_resize_pil(original_pil, MAX_IMAGE_SIZE)] # full image
|
| 571 |
yolo_results = self.yolo(image_path, conf=0.5, verbose=False)
|
| 572 |
|
| 573 |
for r in yolo_results:
|
| 574 |
if r.masks is not None:
|
| 575 |
for seg_idx, mask_xy in enumerate(r.masks.xy):
|
| 576 |
cls_id = int(r.boxes.cls[seg_idx].item())
|
|
|
|
| 577 |
if faces_found and cls_id == YOLO_PERSON_CLASS_ID:
|
|
|
|
| 578 |
continue
|
| 579 |
polygon = np.array(mask_xy, dtype=np.int32)
|
| 580 |
if len(polygon) < 3:
|
|
|
|
| 599 |
if len(crops_pil) >= MAX_CROPS + 1:
|
| 600 |
break
|
| 601 |
|
| 602 |
+
crops = [_resize_pil(c, MAX_IMAGE_SIZE) for c in crops_pil]
|
| 603 |
+
print(f"🧠 Embedding {len(crops)} object crop(s)...")
|
| 604 |
obj_vecs = self._embed_crops_batch(crops)
|
| 605 |
for vec in obj_vecs:
|
| 606 |
extracted.append({"type": "object", "vector": vec})
|
| 607 |
|
| 608 |
+
# Cache
|
| 609 |
if len(self._cache) >= self._cache_maxsize:
|
| 610 |
+
del self._cache[next(iter(self._cache))]
|
|
|
|
| 611 |
self._cache[cache_key] = extracted
|
|
|
|
| 612 |
return extracted
|
| 613 |
|
| 614 |
async def process_image_async(
|
| 615 |
self,
|
| 616 |
+
image_path: str,
|
| 617 |
+
is_query: bool = False,
|
| 618 |
detect_faces: bool = True,
|
| 619 |
) -> list:
|
| 620 |
loop = asyncio.get_event_loop()
|