Add dynamic label management with Hub persistence

a6ffb65 6 months ago

16.5 kB

	import contextlib, io, base64, torch, json, os, threading
	from PIL import Image
	import open_clip
	from huggingface_hub import hf_hub_download, create_commit, CommitOperationAdd
	from safetensors.torch import save_file, load_file
	from reparam import reparameterize_model

	ADMIN_TOKEN = os.getenv("ADMIN_TOKEN", "")
	HF_LABEL_REPO = os.getenv("HF_LABEL_REPO", "") # e.g. "org/mobileclip-labels"
	HF_WRITE_TOKEN = os.getenv("HF_WRITE_TOKEN", "")
	HF_READ_TOKEN = os.getenv("HF_READ_TOKEN", HF_WRITE_TOKEN)


	def _fingerprint(device: str, dtype: torch.dtype) -> dict:
	return {
	"model_id": "MobileCLIP-B",
	"pretrained": "datacompdr",
	"open_clip": getattr(open_clip, "__version__", "unknown"),
	"torch": torch.__version__,
	"cuda": torch.version.cuda if torch.cuda.is_available() else None,
	"dtype_runtime": str(dtype),
	"text_norm": "L2",
	"logit_scale": 100.0,
	}


	class EndpointHandler:
	def __init__(self, path: str = ""):
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.dtype = torch.float16 if self.device == "cuda" else torch.float32

	# 1) Load model + transforms
	model, _, self.preprocess = open_clip.create_model_and_transforms(
	"MobileCLIP-B", pretrained="datacompdr"
	)
	model.eval()
	model = reparameterize_model(model)
	model.to(self.device)
	if self.device == "cuda":
	model = model.to(torch.float16)
	self.model = model
	self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
	self.fingerprint = _fingerprint(self.device, self.dtype)
	self._lock = threading.Lock()

	# 2) Try to load snapshot from Hub; else seed from items.json
	loaded = False
	if HF_LABEL_REPO:
	with contextlib.suppress(Exception):
	loaded = self._load_snapshot_from_hub_latest()
	if not loaded:
	with open(f"{path}/items.json", "r", encoding="utf-8") as f:
	items = json.load(f)
	prompts = [it["prompt"] for it in items]
	self.class_ids = [int(it["id"]) for it in items]
	self.class_names = [it["name"] for it in items]
	with torch.no_grad():
	toks = self.tokenizer(prompts).to(self.device)
	feats = self.model.encode_text(toks)
	feats = feats / feats.norm(dim=-1, keepdim=True)
	self.text_features_cpu = feats.detach().cpu().to(torch.float32).contiguous()
	self._to_device()
	self.labels_version = 1

	def __call__(self, data):
	payload = data.get("inputs", data)

	# Admin op: upsert_labels
	op = payload.get("op")
	if op == "upsert_labels":
	if payload.get("token") != ADMIN_TOKEN:
	return {"error": "unauthorized"}
	items = payload.get("items", []) or []
	added = self._upsert_items(items)
	if added > 0:
	new_ver = int(getattr(self, "labels_version", 1)) + 1
	try:
	self._persist_snapshot_to_hub(new_ver)
	self.labels_version = new_ver
	except Exception as e:
	return {"status": "error", "added": added, "detail": str(e)}
	return {"status": "ok", "added": added, "labels_version": getattr(self, "labels_version", 1)}

	# Admin op: reload_labels
	if op == "reload_labels":
	if payload.get("token") != ADMIN_TOKEN:
	return {"error": "unauthorized"}
	try:
	ver = int(payload.get("version"))
	except Exception:
	return {"error": "invalid_version"}
	ok = self._load_snapshot_from_hub_version(ver)
	return {"status": "ok" if ok else "nochange", "labels_version": getattr(self, "labels_version", 0)}

	# Freshness guard (optional)
	min_ver = payload.get("min_labels_version")
	if isinstance(min_ver, int) and min_ver > getattr(self, "labels_version", 0):
	with contextlib.suppress(Exception):
	self._load_snapshot_from_hub_version(min_ver)

	# Classification path (unchanged contract)
	img_b64 = payload["image"]
	image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
	img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
	if self.device == "cuda":
	img_tensor = img_tensor.to(torch.float16)
	with torch.no_grad():
	img_feat = self.model.encode_image(img_tensor)
	img_feat /= img_feat.norm(dim=-1, keepdim=True)
	probs = (100.0 * img_feat @ self.text_features.T).softmax(dim=-1)[0]
	results = zip(self.class_ids, self.class_names, probs.detach().cpu().tolist())
	top_k = int(payload.get("top_k", len(self.class_ids)))
	return sorted(
	[{"id": i, "label": name, "score": float(p)} for i, name, p in results],
	key=lambda x: x["score"],
	reverse=True,
	)[:top_k]

	# ------------- helpers -------------
	def _encode_text(self, prompts):
	with torch.no_grad():
	toks = self.tokenizer(prompts).to(self.device)
	feats = self.model.encode_text(toks)
	feats = feats / feats.norm(dim=-1, keepdim=True)
	return feats

	def _to_device(self):
	self.text_features = self.text_features_cpu.to(
	self.device, dtype=(torch.float16 if self.device == "cuda" else torch.float32)
	)

	def _upsert_items(self, new_items):
	if not new_items:
	return 0
	with self._lock:
	known = set(getattr(self, "class_ids", []))
	batch = [it for it in new_items if int(it.get("id")) not in known]
	if not batch:
	return 0
	prompts = [it["prompt"] for it in batch]
	feats = self._encode_text(prompts).detach().cpu().to(torch.float32)
	if not hasattr(self, "text_features_cpu"):
	self.text_features_cpu = feats.contiguous()
	self.class_ids = [int(it["id"]) for it in batch]
	self.class_names = [it["name"] for it in batch]
	else:
	self.text_features_cpu = torch.cat([self.text_features_cpu, feats], dim=0).contiguous()
	self.class_ids.extend([int(it["id"]) for it in batch])
	self.class_names.extend([it["name"] for it in batch])
	self._to_device()
	return len(batch)

	def _persist_snapshot_to_hub(self, version: int):
	if not HF_LABEL_REPO:
	raise RuntimeError("HF_LABEL_REPO not set")
	if not HF_WRITE_TOKEN:
	raise RuntimeError("HF_WRITE_TOKEN not set for publishing")

	emb_path = "/tmp/embeddings.safetensors"
	meta_path = "/tmp/meta.json"
	latest_bytes = io.BytesIO(json.dumps({"version": int(version)}).encode("utf-8"))

	save_file({"embeddings": self.text_features_cpu.to(torch.float32)}, emb_path)
	meta = {
	"items": [{"id": int(i), "name": n} for i, n in zip(self.class_ids, self.class_names)],
	"fingerprint": self.fingerprint,
	"dims": int(self.text_features_cpu.shape[1]),
	"count": int(self.text_features_cpu.shape[0]),
	"version": int(version),
	}
	with open(meta_path, "w", encoding="utf-8") as f:
	json.dump(meta, f)

	ops = [
	CommitOperationAdd(
	path_in_repo=f"snapshots/v{version}/embeddings.safetensors",
	path_or_fileobj=emb_path,
	lfs=True,
	),
	CommitOperationAdd(
	path_in_repo=f"snapshots/v{version}/meta.json",
	path_or_fileobj=meta_path,
	),
	CommitOperationAdd(
	path_in_repo="snapshots/latest.json",
	path_or_fileobj=latest_bytes,
	),
	]
	create_commit(
	repo_id=HF_LABEL_REPO,
	repo_type="dataset",
	operations=ops,
	token=HF_WRITE_TOKEN,
	commit_message=f"labels v{version}",
	)

	def _load_snapshot_from_hub_version(self, version: int) -> bool:
	if not HF_LABEL_REPO:
	return False
	with self._lock:
	emb_p = hf_hub_download(
	HF_LABEL_REPO,
	f"snapshots/v{version}/embeddings.safetensors",
	repo_type="dataset",
	token=HF_READ_TOKEN,
	force_download=True,
	)
	meta_p = hf_hub_download(
	HF_LABEL_REPO,
	f"snapshots/v{version}/meta.json",
	repo_type="dataset",
	token=HF_READ_TOKEN,
	force_download=True,
	)
	meta = json.load(open(meta_p, "r", encoding="utf-8"))
	if meta.get("fingerprint") != self.fingerprint:
	raise RuntimeError("Embedding/model fingerprint mismatch")
	feats = load_file(emb_p)["embeddings"] # float32 CPU
	self.text_features_cpu = feats.contiguous()
	self.class_ids = [int(x["id"]) for x in meta.get("items", [])]
	self.class_names = [x["name"] for x in meta.get("items", [])]
	self.labels_version = int(meta.get("version", version))
	self._to_device()
	return True

	def _load_snapshot_from_hub_latest(self) -> bool:
	if not HF_LABEL_REPO:
	return False
	try:
	latest_p = hf_hub_download(
	HF_LABEL_REPO,
	"snapshots/latest.json",
	repo_type="dataset",
	token=HF_READ_TOKEN,
	)
	except Exception:
	return False
	latest = json.load(open(latest_p, "r", encoding="utf-8"))
	ver = int(latest.get("version", 0))
	if ver <= 0:
	return False
	return self._load_snapshot_from_hub_version(ver)



	# """
	# MobileCLIP‑B Zero‑Shot Image Classifier (Hugging Face Inference Endpoint)
	# ===========================================================================

	# * One container instance is created per replica; the `EndpointHandler`
	# object below is instantiated exactly once at start‑up.

	# * At request time (`__call__`) we receive a base‑64‑encoded image, run a
	# single forward pass, and return class probabilities.

	# Design choices
	# --------------

	# 1. Model & transform come from OpenCLIP
	# This guarantees we apply identical preprocessing to what the model
	# was trained with (224 × 224 crop + mean/std normalisation).

	# 2. Re‑parameterisation for inference
	# MobileCLIP uses MobileOne blocks that have extra convolution branches
	# for training; `reparameterize_model` fuses them so inference is fast
	# and deterministic.

	# 3. Text embeddings are cached
	# The class “prompts” (e.g. `"a photo of a cat"`) are encoded **once at
	# start‑up*. Each request therefore encodes only* the image and
	# performs a single matrix multiplication.

	# 4. Mixed precision on GPU
	# If the container has CUDA, we cast the model and inputs to
	# `float16`. That halves memory and roughly doubles throughput on most
	# modern GPUs. On CPU we stay in `float32` for numerical stability.
	# """

	# import contextlib, io, base64, json
	# from pathlib import Path
	# from typing import Any, Dict, List

	# import torch
	# from PIL import Image
	# import open_clip

	# from reparam import reparameterize_model # local copy (~60 LoC) of Apple’s helper


	# class EndpointHandler:
	# """
	# Hugging Face entry‑point. The toolkit will instantiate this class
	# once and call it for every HTTP request.

	# Parameters
	# ----------
	# path : str, optional
	# Root directory of the repository. HF mounts the code under
	# `/repository`; we use this path to locate `items.json`.
	# """

	# # ------------------------------------------------------------------ #
	# # INITIALISATION (runs once) #
	# # ------------------------------------------------------------------ #
	# def __init__(self, path: str = "") -> None:
	# self.device = "cuda" if torch.cuda.is_available() else "cpu"

	# # 1️⃣ Load MobileCLIP‑B weights & transforms -------------------
	# # `pretrained="datacompdr"` makes OpenCLIP download the
	# # official checkpoint from the Hub (cached in the image layer).
	# model, _, self.preprocess = open_clip.create_model_and_transforms(
	# "MobileCLIP-B", pretrained="datacompdr"
	# )
	# model.eval() # disable dropout / BN updates
	# model = reparameterize_model(model) # fuse MobileOne branches
	# model.to(self.device)
	# if self.device == "cuda":
	# model = model.to(torch.float16) # FP16 for throughput
	# self.model = model # hold a reference

	# # 2️⃣ Build the tokenizer once --------------------------------
	# tokenizer = open_clip.get_tokenizer("MobileCLIP-B")

	# # 3️⃣ Load class metadata -------------------------------------
	# # Expect JSON file: [{"id": 3, "name": "cat", "prompt": "cat"}, …]
	# items_path = Path(path) / "items.json"
	# with items_path.open("r", encoding="utf-8") as f:
	# class_defs: List[Dict[str, Any]] = json.load(f)

	# # Extract the bits we need later
	# prompts = [item["prompt"] for item in class_defs]
	# self.class_ids: List[int] = [item["id"] for item in class_defs]
	# self.class_names: List[str] = [item["name"] for item in class_defs]

	# # 4️⃣ Encode all prompts once ---------------------------------
	# with torch.no_grad():
	# text_tokens = tokenizer(prompts).to(self.device)
	# text_feats = self.model.encode_text(text_tokens)
	# text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
	# self.text_features = text_feats # [num_classes, 512]

	# # ------------------------------------------------------------------ #
	# # INFERENCE CALL #
	# # ------------------------------------------------------------------ #
	# def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	# """
	# Parameters
	# ----------
	# data : dict
	# Either the raw payload `{"image": "<base64>"}` or the
	# Hugging Face convention `{"inputs": {...}}`.

	# Returns
	# -------
	# list of dict
	# Sorted list of `{"id": int, "label": str, "score": float}`.
	# Scores are the softmax probabilities over the provided
	# class list (they sum to 1.0).
	# """
	# # 1️⃣ Unpack the request payload ------------------------------
	# payload: Dict[str, Any] = data.get("inputs", data)
	# img_b64: str = payload["image"]

	# # 2️⃣ Decode + preprocess -------------------------------------
	# image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
	# img_tensor = self.preprocess(image).unsqueeze(0).to(self.device) # [1, 3, 224, 224]
	# if self.device == "cuda":
	# img_tensor = img_tensor.to(torch.float16)

	# # 3️⃣ Forward pass (image only) -------------------------------
	# with torch.no_grad(): # no autograd graph
	# img_feat = self.model.encode_image(img_tensor) # [1, 512]
	# img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True) # L2‑normalise

	# # cosine similarity → logits → softmax probabilities
	# probs = (100 * img_feat @ self.text_features.T).softmax(dim=-1)[0] # [num_classes]

	# # 4️⃣ Assemble JSON‑serialisable response ---------------------
	# results = zip(self.class_ids, self.class_names, probs.cpu().tolist())
	# return sorted(
	# [{"id": cid, "label": name, "score": float(p)} for cid, name, p in results],
	# key=lambda x: x["score"],
	# reverse=True,
	# )