Spaces:
Sleeping
Sleeping
| # src/cloud_db.py — Enterprise Lens V4 | |
| # ════════════════════════════════════════════════════════════════ | |
| # NOTE: In the production FastAPI app (main.py), ALL Pinecone and | |
| # Cloudinary operations are performed directly — this class is NOT | |
| # called by main.py. It exists as a standalone utility / SDK wrapper | |
| # for scripts, notebooks, or future use outside the API. | |
| # | |
| # If you use this class, ensure your Pinecone indexes match V4 dims: | |
| # enterprise-faces → 1024-D (ArcFace-512 + AdaFace-512, fused) | |
| # enterprise-objects → 1536-D (SigLIP-768 + DINOv2-768, fused) | |
| # ════════════════════════════════════════════════════════════════ | |
| import os | |
| import uuid | |
| import cloudinary | |
| import cloudinary.uploader | |
| from pinecone import Pinecone, ServerlessSpec | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # ── V4 Index constants — MUST match main.py and models.py ──────── | |
| IDX_FACES = "enterprise-faces" | |
| IDX_OBJECTS = "enterprise-objects" | |
| IDX_FACES_DIM = 1024 # ArcFace(512) + AdaFace(512) fused, always 1024 | |
| IDX_OBJECTS_DIM = 1536 # SigLIP(768) + DINOv2(768) fused, always 1536 | |
| # V4 face similarity thresholds (fused 1024-D cosine space) | |
| # These MUST stay in sync with main.py FACE_THRESHOLD_* constants | |
| FACE_THRESHOLD_HIGH = 0.40 # high-quality face (det_score >= 0.85) | |
| FACE_THRESHOLD_LOW = 0.32 # lower-quality face (det_score < 0.85) | |
| OBJECT_THRESHOLD = 0.45 # object/scene similarity threshold | |
| class CloudDB: | |
| """ | |
| Utility wrapper around Pinecone + Cloudinary for Enterprise Lens V4. | |
| Index dimensions: | |
| enterprise-faces : 1024-D cosine | |
| enterprise-objects : 1536-D cosine | |
| Face vectors: ArcFace(512) + AdaFace(512) concatenated + L2-normalised | |
| Object vectors: SigLIP(768) + DINOv2(768) concatenated + L2-normalised | |
| """ | |
| def __init__(self): | |
| # ── Cloudinary ──────────────────────────────────────────── | |
| cloudinary.config( | |
| cloud_name = os.getenv("CLOUDINARY_CLOUD_NAME"), | |
| api_key = os.getenv("CLOUDINARY_API_KEY"), | |
| api_secret = os.getenv("CLOUDINARY_API_SECRET"), | |
| ) | |
| # ── Pinecone ────────────────────────────────────────────── | |
| self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY")) | |
| self._ensure_indexes() | |
| self.index_faces = self.pc.Index(IDX_FACES) | |
| self.index_objects = self.pc.Index(IDX_OBJECTS) | |
| def _ensure_indexes(self): | |
| """ | |
| Create Pinecone indexes at correct V4 dimensions if they don't exist. | |
| Safe to call multiple times — skips existing indexes. | |
| """ | |
| existing = {idx.name for idx in self.pc.list_indexes()} | |
| if IDX_FACES not in existing: | |
| print(f"📦 Creating {IDX_FACES} at {IDX_FACES_DIM}-D...") | |
| self.pc.create_index( | |
| name = IDX_FACES, | |
| dimension = IDX_FACES_DIM, # 1024-D — ArcFace+AdaFace | |
| metric = "cosine", | |
| spec = ServerlessSpec(cloud="aws", region="us-east-1"), | |
| ) | |
| print(f" ✅ {IDX_FACES} created at {IDX_FACES_DIM}-D") | |
| else: | |
| # Validate existing index has correct dimension | |
| desc = self.pc.describe_index(IDX_FACES) | |
| actual_dim = desc.dimension | |
| if actual_dim != IDX_FACES_DIM: | |
| raise ValueError( | |
| f"❌ {IDX_FACES} exists at {actual_dim}-D but V4 needs " | |
| f"{IDX_FACES_DIM}-D. Go to Settings → Danger Zone → " | |
| f"Reset Database to recreate at correct dimensions." | |
| ) | |
| if IDX_OBJECTS not in existing: | |
| print(f"📦 Creating {IDX_OBJECTS} at {IDX_OBJECTS_DIM}-D...") | |
| self.pc.create_index( | |
| name = IDX_OBJECTS, | |
| dimension = IDX_OBJECTS_DIM, # 1536-D — SigLIP+DINOv2 | |
| metric = "cosine", | |
| spec = ServerlessSpec(cloud="aws", region="us-east-1"), | |
| ) | |
| print(f" ✅ {IDX_OBJECTS} created at {IDX_OBJECTS_DIM}-D") | |
| else: | |
| desc = self.pc.describe_index(IDX_OBJECTS) | |
| actual_dim = desc.dimension | |
| if actual_dim != IDX_OBJECTS_DIM: | |
| raise ValueError( | |
| f"❌ {IDX_OBJECTS} exists at {actual_dim}-D but V4 needs " | |
| f"{IDX_OBJECTS_DIM}-D. Go to Settings → Danger Zone → " | |
| f"Reset Database to recreate at correct dimensions." | |
| ) | |
| # ── Upload image to Cloudinary ──────────────────────────────── | |
| def upload_image(self, file_path: str, folder_name: str = "visual_search") -> str: | |
| """Upload image to Cloudinary, return secure_url.""" | |
| response = cloudinary.uploader.upload(file_path, folder=folder_name) | |
| return response["secure_url"] | |
| # ── Store vector in correct Pinecone index ──────────────────── | |
| def add_vector(self, data_dict: dict, image_url: str, image_id: str = None): | |
| """ | |
| Upsert one vector into the correct Pinecone index. | |
| data_dict keys: | |
| type : "face" or "object" | |
| vector : np.ndarray or list — must match index dimension | |
| face_crop : str (base64 JPEG thumbnail, face only) | |
| det_score : float (InsightFace detection confidence, face only) | |
| face_quality: float (alias for det_score) | |
| face_width_px: int (face bounding box width in pixels) | |
| face_idx : int (face index within the source image) | |
| bbox : list [x, y, w, h] | |
| folder : str (Cloudinary folder / category name) | |
| """ | |
| vec_id = image_id or str(uuid.uuid4()) | |
| vec_list = (data_dict["vector"].tolist() | |
| if hasattr(data_dict["vector"], "tolist") | |
| else list(data_dict["vector"])) | |
| if data_dict["type"] == "face": | |
| # ── V4 face metadata — full set required for UI ─────── | |
| payload = [{ | |
| "id": vec_id, | |
| "values": vec_list, | |
| "metadata": { | |
| "image_url": image_url, | |
| "url": image_url, # alias for compatibility | |
| "folder": data_dict.get("folder", ""), | |
| "face_idx": data_dict.get("face_idx", 0), | |
| "bbox": str(data_dict.get("bbox", [])), | |
| "face_crop": data_dict.get("face_crop", ""), # base64 thumb | |
| "det_score": data_dict.get("det_score", 1.0), | |
| "face_quality": data_dict.get("face_quality", | |
| data_dict.get("det_score", 1.0)), | |
| "face_width_px": data_dict.get("face_width_px", 0), | |
| }, | |
| }] | |
| self.index_faces.upsert(vectors=payload) | |
| else: | |
| # ── V4 object metadata ──────────────────────────────── | |
| payload = [{ | |
| "id": vec_id, | |
| "values": vec_list, | |
| "metadata": { | |
| "image_url": image_url, | |
| "url": image_url, | |
| "folder": data_dict.get("folder", ""), | |
| }, | |
| }] | |
| self.index_objects.upsert(vectors=payload) | |
| # ── Search ──────────────────────────────────────────────────── | |
| def search(self, query_dict: dict, top_k: int = 10, | |
| min_score: float = None) -> list: | |
| """ | |
| Search the correct Pinecone index for one query vector. | |
| For face vectors: uses adaptive threshold based on det_score. | |
| For object vectors: uses OBJECT_THRESHOLD (default 0.45). | |
| Returns list of dicts: {url, score, caption, [face_crop, folder]} | |
| """ | |
| vec_list = (query_dict["vector"].tolist() | |
| if hasattr(query_dict["vector"], "tolist") | |
| else list(query_dict["vector"])) | |
| results = [] | |
| if query_dict["type"] == "face": | |
| # ── V4 face search ──────────────────────────────────── | |
| # Adaptive threshold: high-quality faces are stricter | |
| det_score = query_dict.get("det_score", 1.0) | |
| threshold = (FACE_THRESHOLD_HIGH if det_score >= 0.85 | |
| else FACE_THRESHOLD_LOW) | |
| if min_score is not None: | |
| threshold = min_score | |
| response = self.index_faces.query( | |
| vector=vec_list, top_k=top_k * 3, # over-fetch, filter below | |
| include_metadata=True, | |
| ) | |
| # Deduplicate by image_url — keep best score per image | |
| image_map = {} | |
| for match in response.get("matches", []): | |
| raw = match["score"] | |
| if raw < threshold: | |
| continue | |
| url = (match["metadata"].get("url") or | |
| match["metadata"].get("image_url", "")) | |
| if not url: | |
| continue | |
| if url not in image_map or raw > image_map[url]["raw"]: | |
| image_map[url] = { | |
| "raw": raw, | |
| "face_crop": match["metadata"].get("face_crop", ""), | |
| "folder": match["metadata"].get("folder", ""), | |
| } | |
| # Remap raw cosine → UI percentage (75%–99%) | |
| for url, d in image_map.items(): | |
| lo = FACE_THRESHOLD_LOW | |
| ui = round(min(0.99, 0.75 + ((d["raw"] - lo) / (1.0 - lo)) * 0.24), 4) | |
| results.append({ | |
| "url": url, | |
| "score": ui, | |
| "raw_score": round(d["raw"], 4), | |
| "face_crop": d["face_crop"], | |
| "folder": d["folder"], | |
| "caption": "👤 Verified Identity Match", | |
| }) | |
| results = sorted(results, key=lambda x: x["score"], reverse=True)[:top_k] | |
| else: | |
| # ── V4 object search ────────────────────────────────── | |
| threshold = min_score if min_score is not None else OBJECT_THRESHOLD | |
| response = self.index_objects.query( | |
| vector=vec_list, top_k=top_k, include_metadata=True) | |
| for match in response.get("matches", []): | |
| if match["score"] < threshold: | |
| continue | |
| results.append({ | |
| "url": (match["metadata"].get("url") or | |
| match["metadata"].get("image_url", "")), | |
| "score": round(match["score"], 4), | |
| "folder": match["metadata"].get("folder", ""), | |
| "caption": "🎯 Visual & Semantic Match", | |
| }) | |
| return results |