slig

+from typing import Any, Dict, List
+import torch
+from PIL import Image
+import requests
+from io import BytesIO
+import base64
+from transformers import AutoProcessor, AutoModel
+class EndpointHandler:
+    def __init__(self, path: str = ""):
+        """
+        Initialize the handler by loading the SigLIP2 model and processor.
+        Args:
+            path: Path to the model directory (provided by HF Inference Endpoints)
+        """
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model = AutoModel.from_pretrained(path, trust_remote_code=True).to(self.device)
+        self.processor = AutoProcessor.from_pretrained(path, trust_remote_code=True)
+        self.model.eval()
+    def _load_image(self, image_data: Any) -> Image.Image:
+        """
+        Load an image from various input formats.
+        Args:
+            image_data: Can be a URL string, base64 string, or raw bytes
+        Returns:
+            PIL Image object
+        """
+        if isinstance(image_data, str):
+            # Check if it's a URL
+            if image_data.startswith(("http://", "https://")):
+                response = requests.get(image_data, timeout=10)
+                response.raise_for_status()
+                return Image.open(BytesIO(response.content)).convert("RGB")
+            # Otherwise assume base64
+            else:
+                # Handle data URI format
+                if "," in image_data:
+                    image_data = image_data.split(",")[1]
+                image_bytes = base64.b64decode(image_data)
+                return Image.open(BytesIO(image_bytes)).convert("RGB")
+        elif isinstance(image_data, bytes):
+            return Image.open(BytesIO(image_data)).convert("RGB")
+        else:
+            raise ValueError(f"Unsupported image format: {type(image_data)}")
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Process inference requests for zero-shot image classification.
+        Args:
+            data: Dictionary containing:
+                - "inputs": Image data (URL, base64, or bytes)
+                - "parameters": Optional dict with:
+                    - "candidate_labels": List of text labels to classify against
+        Returns:
+            List of dictionaries with "label" and "score" for each candidate
+        """
+        # Extract inputs
+        inputs = data.get("inputs")
+        parameters = data.get("parameters", {})
+        # Get candidate labels (required for zero-shot classification)
+        candidate_labels = parameters.get("candidate_labels", [])
+        if not candidate_labels:
+            # Default labels if none provided
+            candidate_labels = ["a photo", "an illustration", "a diagram"]
+        # Ensure candidate_labels is a list
+        if isinstance(candidate_labels, str):
+            candidate_labels = [label.strip() for label in candidate_labels.split(",")]
+        # Load the image
+        image = self._load_image(inputs)
+        # Process inputs
+        processed_inputs = self.processor(
+            text=candidate_labels,
+            images=image,
+            padding="max_length",
+            return_tensors="pt"
+        ).to(self.device)
+        # Run inference
+        with torch.no_grad():
+            outputs = self.model(**processed_inputs)
+            # Get image and text embeddings
+            image_embeds = outputs.image_embeds
+            text_embeds = outputs.text_embeds
+            # Normalize embeddings
+            image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True)
+            text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True)
+            # Compute similarity scores
+            logits_per_image = torch.matmul(image_embeds, text_embeds.t())
+            # Apply softmax to get probabilities
+            probs = torch.softmax(logits_per_image, dim=-1)
+        # Format results
+        scores = probs[0].cpu().tolist()
+        results = [
+            {"label": label, "score": score}
+            for label, score in zip(candidate_labels, scores)
+        ]
+        # Sort by score descending
+        results.sort(key=lambda x: x["score"], reverse=True)
+        return results