Spaces:
Running
Running
| """Facial landmark extraction using MediaPipe Face Mesh v2.""" | |
| from __future__ import annotations | |
| import logging | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| import cv2 | |
| import mediapipe as mp | |
| import numpy as np | |
| logger = logging.getLogger(__name__) | |
| # Region color map for visualization (BGR) | |
| REGION_COLORS: dict[str, tuple[int, int, int]] = { | |
| "jawline": (255, 255, 255), # white | |
| "eyebrow_left": (0, 255, 0), # green | |
| "eyebrow_right": (0, 255, 0), | |
| "eye_left": (255, 255, 0), # cyan | |
| "eye_right": (255, 255, 0), | |
| "nose": (0, 255, 255), # yellow | |
| "lips": (0, 0, 255), # red | |
| "iris_left": (255, 0, 255), # magenta | |
| "iris_right": (255, 0, 255), | |
| } | |
| # MediaPipe landmark index groups by anatomical region | |
| LANDMARK_REGIONS: dict[str, list[int]] = { | |
| "jawline": [ | |
| 10, | |
| 338, | |
| 297, | |
| 332, | |
| 284, | |
| 251, | |
| 389, | |
| 356, | |
| 454, | |
| 323, | |
| 361, | |
| 288, | |
| 397, | |
| 365, | |
| 379, | |
| 378, | |
| 400, | |
| 377, | |
| 152, | |
| 148, | |
| 176, | |
| 149, | |
| 150, | |
| 136, | |
| 172, | |
| 58, | |
| 132, | |
| 93, | |
| 234, | |
| 127, | |
| 162, | |
| 21, | |
| 54, | |
| 103, | |
| 67, | |
| 109, | |
| ], | |
| "eye_left": [ | |
| 33, | |
| 7, | |
| 163, | |
| 144, | |
| 145, | |
| 153, | |
| 154, | |
| 155, | |
| 133, | |
| 173, | |
| 157, | |
| 158, | |
| 159, | |
| 160, | |
| 161, | |
| 246, | |
| ], | |
| "eye_right": [ | |
| 362, | |
| 382, | |
| 381, | |
| 380, | |
| 374, | |
| 373, | |
| 390, | |
| 249, | |
| 263, | |
| 466, | |
| 388, | |
| 387, | |
| 386, | |
| 385, | |
| 384, | |
| 398, | |
| ], | |
| "eyebrow_left": [70, 63, 105, 66, 107, 55, 65, 52, 53, 46], | |
| "eyebrow_right": [300, 293, 334, 296, 336, 285, 295, 282, 283, 276], | |
| "nose": [ | |
| 1, | |
| 2, | |
| 4, | |
| 5, | |
| 6, | |
| 19, | |
| 94, | |
| 141, | |
| 168, | |
| 195, | |
| 197, | |
| 236, | |
| 240, | |
| 274, | |
| 275, | |
| 278, | |
| 279, | |
| 294, | |
| 326, | |
| 327, | |
| 360, | |
| 363, | |
| 370, | |
| 456, | |
| 460, | |
| ], | |
| "lips": [ | |
| 61, | |
| 146, | |
| 91, | |
| 181, | |
| 84, | |
| 17, | |
| 314, | |
| 405, | |
| 321, | |
| 375, | |
| 291, | |
| 308, | |
| 324, | |
| 318, | |
| 402, | |
| 317, | |
| 14, | |
| 87, | |
| 178, | |
| 88, | |
| 95, | |
| 78, | |
| ], | |
| "iris_left": [468, 469, 470, 471, 472], | |
| "iris_right": [473, 474, 475, 476, 477], | |
| } | |
| class FaceLandmarks: | |
| """Extracted facial landmarks with metadata.""" | |
| landmarks: np.ndarray # (478, 3) normalized (x, y, z) | |
| image_width: int | |
| image_height: int | |
| confidence: float | |
| def pixel_coords(self) -> np.ndarray: | |
| """Convert normalized landmarks to pixel coordinates (478, 2). | |
| Coordinates are clamped to valid image bounds so that extreme | |
| head poses do not produce out-of-range indices. | |
| """ | |
| coords = self.landmarks[:, :2].copy() | |
| coords[:, 0] *= self.image_width | |
| coords[:, 1] *= self.image_height | |
| coords[:, 0] = np.clip(coords[:, 0], 0, self.image_width - 1) | |
| coords[:, 1] = np.clip(coords[:, 1], 0, self.image_height - 1) | |
| return coords | |
| def pixel_coords_at(self, width: int, height: int) -> np.ndarray: | |
| """Convert normalized landmarks to pixel coordinates at a given size. | |
| Use this when the image has been resized after landmark extraction. | |
| Coordinates are clamped to [0, width-1] x [0, height-1]. | |
| """ | |
| coords = self.landmarks[:, :2].copy() | |
| coords[:, 0] *= width | |
| coords[:, 1] *= height | |
| coords[:, 0] = np.clip(coords[:, 0], 0, width - 1) | |
| coords[:, 1] = np.clip(coords[:, 1], 0, height - 1) | |
| return coords | |
| def rescale(self, width: int, height: int) -> FaceLandmarks: | |
| """Return a copy with updated image dimensions. | |
| Landmarks stay in normalized [0,1] space; only the stored | |
| width/height change, so ``pixel_coords`` returns values at | |
| the new resolution. | |
| """ | |
| return FaceLandmarks( | |
| landmarks=self.landmarks.copy(), | |
| image_width=width, | |
| image_height=height, | |
| confidence=self.confidence, | |
| ) | |
| def get_region(self, region: str) -> np.ndarray: | |
| """Get landmark indices for a named region.""" | |
| indices = LANDMARK_REGIONS.get(region, []) | |
| return self.landmarks[indices] | |
| def extract_landmarks( | |
| image: np.ndarray, | |
| min_detection_confidence: float = 0.5, | |
| min_tracking_confidence: float = 0.5, | |
| ) -> FaceLandmarks | None: | |
| """Extract 478 facial landmarks from an image using MediaPipe Face Mesh. | |
| Args: | |
| image: BGR image as numpy array. | |
| min_detection_confidence: Minimum face detection confidence. | |
| min_tracking_confidence: Minimum landmark tracking confidence. | |
| Returns: | |
| FaceLandmarks if a face is detected, None otherwise. | |
| """ | |
| h, w = image.shape[:2] | |
| rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| # Try new Tasks API first (mediapipe >= 0.10.20), fall back to legacy solutions API | |
| try: | |
| landmarks, confidence = _extract_tasks_api(rgb, min_detection_confidence) | |
| except Exception: | |
| logger.debug("Tasks API unavailable, trying Solutions API", exc_info=True) | |
| try: | |
| landmarks, confidence = _extract_solutions_api( | |
| rgb, min_detection_confidence, min_tracking_confidence | |
| ) | |
| except Exception: | |
| logger.debug("Both MediaPipe APIs failed", exc_info=True) | |
| return None | |
| if landmarks is None: | |
| return None | |
| return FaceLandmarks( | |
| landmarks=landmarks, | |
| image_width=w, | |
| image_height=h, | |
| confidence=confidence, | |
| ) | |
| def _extract_tasks_api( | |
| rgb: np.ndarray, | |
| min_confidence: float, | |
| ) -> tuple[np.ndarray | None, float]: | |
| """Extract landmarks using MediaPipe Tasks API (>= 0.10.20).""" | |
| FaceLandmarker = mp.tasks.vision.FaceLandmarker | |
| FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions | |
| RunningMode = mp.tasks.vision.RunningMode | |
| BaseOptions = mp.tasks.BaseOptions | |
| import tempfile | |
| import urllib.request | |
| # Download model if not cached | |
| model_path = Path(tempfile.gettempdir()) / "face_landmarker_v2_with_blendshapes.task" | |
| if not model_path.exists(): | |
| url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task" | |
| urllib.request.urlretrieve(url, str(model_path)) | |
| options = FaceLandmarkerOptions( | |
| base_options=BaseOptions(model_asset_path=str(model_path)), | |
| running_mode=RunningMode.IMAGE, | |
| num_faces=1, | |
| min_face_detection_confidence=min_confidence, | |
| output_face_blendshapes=False, | |
| output_facial_transformation_matrixes=False, | |
| ) | |
| with FaceLandmarker.create_from_options(options) as landmarker: | |
| mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb) | |
| result = landmarker.detect(mp_image) | |
| if not result.face_landmarks: | |
| return None, 0.0 | |
| face_lms = result.face_landmarks[0] | |
| landmarks = np.array( | |
| [(lm.x, lm.y, lm.z) for lm in face_lms], | |
| dtype=np.float32, | |
| ) | |
| # MediaPipe Tasks API doesn't expose per-landmark detection confidence; | |
| # return 1.0 to indicate successful detection | |
| return landmarks, 1.0 | |
| def _extract_solutions_api( | |
| rgb: np.ndarray, | |
| min_detection_confidence: float, | |
| min_tracking_confidence: float, | |
| ) -> tuple[np.ndarray | None, float]: | |
| """Extract landmarks using legacy MediaPipe Solutions API.""" | |
| with mp.solutions.face_mesh.FaceMesh( | |
| static_image_mode=True, | |
| max_num_faces=1, | |
| refine_landmarks=True, | |
| min_detection_confidence=min_detection_confidence, | |
| min_tracking_confidence=min_tracking_confidence, | |
| ) as face_mesh: | |
| results = face_mesh.process(rgb) | |
| if not results.multi_face_landmarks: | |
| return None, 0.0 | |
| face = results.multi_face_landmarks[0] | |
| landmarks = np.array( | |
| [(lm.x, lm.y, lm.z) for lm in face.landmark], | |
| dtype=np.float32, | |
| ) | |
| # Legacy API doesn't expose detection confidence; return 1.0 for success | |
| return landmarks, 1.0 | |
| def visualize_landmarks( | |
| image: np.ndarray, | |
| face: FaceLandmarks, | |
| radius: int = 1, | |
| draw_regions: bool = True, | |
| ) -> np.ndarray: | |
| """Draw colored landmark dots on image by anatomical region. | |
| Args: | |
| image: BGR image to draw on (will be copied). | |
| face: Extracted face landmarks. | |
| radius: Dot radius in pixels. | |
| draw_regions: If True, color by region. Otherwise all white. | |
| Returns: | |
| Annotated image copy. | |
| """ | |
| canvas = image.copy() | |
| coords = face.pixel_coords | |
| if draw_regions: | |
| # Build index -> color mapping | |
| idx_to_color: dict[int, tuple[int, int, int]] = {} | |
| for region, indices in LANDMARK_REGIONS.items(): | |
| color = REGION_COLORS.get(region, (255, 255, 255)) | |
| for idx in indices: | |
| idx_to_color[idx] = color | |
| for i, (x, y) in enumerate(coords): | |
| color = idx_to_color.get(i, (128, 128, 128)) | |
| cv2.circle(canvas, (int(x), int(y)), radius, color, -1) | |
| else: | |
| for x, y in coords: | |
| cv2.circle(canvas, (int(x), int(y)), radius, (255, 255, 255), -1) | |
| return canvas | |
| def render_landmark_image( | |
| face: FaceLandmarks, | |
| width: int | None = None, | |
| height: int | None = None, | |
| radius: int = 2, | |
| ) -> np.ndarray: | |
| """Render MediaPipe face mesh tessellation on black canvas. | |
| Draws the full 2556-edge tessellation mesh that CrucibleAI/ControlNetMediaPipeFace | |
| was pre-trained on. This is critical -- the ControlNet expects dense triangulated | |
| wireframes, not sparse dots. | |
| Falls back to colored dots if tessellation connections aren't available. | |
| Args: | |
| face: Extracted face landmarks. | |
| width: Canvas width (defaults to face.image_width). | |
| height: Canvas height (defaults to face.image_height). | |
| radius: Dot radius (used for key landmark dots overlay). | |
| Returns: | |
| BGR image with face mesh on black background. | |
| """ | |
| w = width or face.image_width | |
| h = height or face.image_height | |
| canvas = np.zeros((h, w, 3), dtype=np.uint8) | |
| coords = face.landmarks[:, :2].copy() | |
| coords[:, 0] *= w | |
| coords[:, 1] *= h | |
| pts = coords.astype(np.int32) | |
| # Draw tessellation mesh (what CrucibleAI ControlNet expects) | |
| try: | |
| from mediapipe.tasks.python.vision.face_landmarker import FaceLandmarksConnections | |
| tessellation = FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION | |
| contours = FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS | |
| # Draw tessellation edges (thin, gray-white) | |
| for conn in tessellation: | |
| p1 = tuple(pts[conn.start]) | |
| p2 = tuple(pts[conn.end]) | |
| cv2.line(canvas, p1, p2, (192, 192, 192), 1, cv2.LINE_AA) | |
| # Draw contour edges on top (brighter, key features) | |
| for conn in contours: | |
| p1 = tuple(pts[conn.start]) | |
| p2 = tuple(pts[conn.end]) | |
| cv2.line(canvas, p1, p2, (255, 255, 255), 1, cv2.LINE_AA) | |
| except (ImportError, AttributeError): | |
| # Fallback: draw colored dots if tessellation not available | |
| idx_to_color: dict[int, tuple[int, int, int]] = {} | |
| for region, indices in LANDMARK_REGIONS.items(): | |
| color = REGION_COLORS.get(region, (128, 128, 128)) | |
| for idx in indices: | |
| idx_to_color[idx] = color | |
| for i, (x, y) in enumerate(coords): | |
| color = idx_to_color.get(i, (128, 128, 128)) | |
| cv2.circle(canvas, (int(x), int(y)), radius, color, -1) | |
| return canvas | |
| def load_image(path: str | Path) -> np.ndarray: | |
| """Load an image from disk as BGR numpy array.""" | |
| img = cv2.imread(str(path)) | |
| if img is None: | |
| raise FileNotFoundError(f"Could not load image: {path}") | |
| return img | |