LandmarkDiff / landmarkdiff /landmarks.py
dreamlessx's picture
Update landmarkdiff/landmarks.py to v0.3.2
a82aad5 verified
"""Facial landmark extraction using MediaPipe Face Mesh v2."""
from __future__ import annotations
import logging
from dataclasses import dataclass
from pathlib import Path
import cv2
import mediapipe as mp
import numpy as np
logger = logging.getLogger(__name__)
# Region color map for visualization (BGR)
REGION_COLORS: dict[str, tuple[int, int, int]] = {
"jawline": (255, 255, 255), # white
"eyebrow_left": (0, 255, 0), # green
"eyebrow_right": (0, 255, 0),
"eye_left": (255, 255, 0), # cyan
"eye_right": (255, 255, 0),
"nose": (0, 255, 255), # yellow
"lips": (0, 0, 255), # red
"iris_left": (255, 0, 255), # magenta
"iris_right": (255, 0, 255),
}
# MediaPipe landmark index groups by anatomical region
LANDMARK_REGIONS: dict[str, list[int]] = {
"jawline": [
10,
338,
297,
332,
284,
251,
389,
356,
454,
323,
361,
288,
397,
365,
379,
378,
400,
377,
152,
148,
176,
149,
150,
136,
172,
58,
132,
93,
234,
127,
162,
21,
54,
103,
67,
109,
],
"eye_left": [
33,
7,
163,
144,
145,
153,
154,
155,
133,
173,
157,
158,
159,
160,
161,
246,
],
"eye_right": [
362,
382,
381,
380,
374,
373,
390,
249,
263,
466,
388,
387,
386,
385,
384,
398,
],
"eyebrow_left": [70, 63, 105, 66, 107, 55, 65, 52, 53, 46],
"eyebrow_right": [300, 293, 334, 296, 336, 285, 295, 282, 283, 276],
"nose": [
1,
2,
4,
5,
6,
19,
94,
141,
168,
195,
197,
236,
240,
274,
275,
278,
279,
294,
326,
327,
360,
363,
370,
456,
460,
],
"lips": [
61,
146,
91,
181,
84,
17,
314,
405,
321,
375,
291,
308,
324,
318,
402,
317,
14,
87,
178,
88,
95,
78,
],
"iris_left": [468, 469, 470, 471, 472],
"iris_right": [473, 474, 475, 476, 477],
}
@dataclass(frozen=True)
class FaceLandmarks:
"""Extracted facial landmarks with metadata."""
landmarks: np.ndarray # (478, 3) normalized (x, y, z)
image_width: int
image_height: int
confidence: float
@property
def pixel_coords(self) -> np.ndarray:
"""Convert normalized landmarks to pixel coordinates (478, 2).
Coordinates are clamped to valid image bounds so that extreme
head poses do not produce out-of-range indices.
"""
coords = self.landmarks[:, :2].copy()
coords[:, 0] *= self.image_width
coords[:, 1] *= self.image_height
coords[:, 0] = np.clip(coords[:, 0], 0, self.image_width - 1)
coords[:, 1] = np.clip(coords[:, 1], 0, self.image_height - 1)
return coords
def pixel_coords_at(self, width: int, height: int) -> np.ndarray:
"""Convert normalized landmarks to pixel coordinates at a given size.
Use this when the image has been resized after landmark extraction.
Coordinates are clamped to [0, width-1] x [0, height-1].
"""
coords = self.landmarks[:, :2].copy()
coords[:, 0] *= width
coords[:, 1] *= height
coords[:, 0] = np.clip(coords[:, 0], 0, width - 1)
coords[:, 1] = np.clip(coords[:, 1], 0, height - 1)
return coords
def rescale(self, width: int, height: int) -> FaceLandmarks:
"""Return a copy with updated image dimensions.
Landmarks stay in normalized [0,1] space; only the stored
width/height change, so ``pixel_coords`` returns values at
the new resolution.
"""
return FaceLandmarks(
landmarks=self.landmarks.copy(),
image_width=width,
image_height=height,
confidence=self.confidence,
)
def get_region(self, region: str) -> np.ndarray:
"""Get landmark indices for a named region."""
indices = LANDMARK_REGIONS.get(region, [])
return self.landmarks[indices]
def extract_landmarks(
image: np.ndarray,
min_detection_confidence: float = 0.5,
min_tracking_confidence: float = 0.5,
) -> FaceLandmarks | None:
"""Extract 478 facial landmarks from an image using MediaPipe Face Mesh.
Args:
image: BGR image as numpy array.
min_detection_confidence: Minimum face detection confidence.
min_tracking_confidence: Minimum landmark tracking confidence.
Returns:
FaceLandmarks if a face is detected, None otherwise.
"""
h, w = image.shape[:2]
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Try new Tasks API first (mediapipe >= 0.10.20), fall back to legacy solutions API
try:
landmarks, confidence = _extract_tasks_api(rgb, min_detection_confidence)
except Exception:
logger.debug("Tasks API unavailable, trying Solutions API", exc_info=True)
try:
landmarks, confidence = _extract_solutions_api(
rgb, min_detection_confidence, min_tracking_confidence
)
except Exception:
logger.debug("Both MediaPipe APIs failed", exc_info=True)
return None
if landmarks is None:
return None
return FaceLandmarks(
landmarks=landmarks,
image_width=w,
image_height=h,
confidence=confidence,
)
def _extract_tasks_api(
rgb: np.ndarray,
min_confidence: float,
) -> tuple[np.ndarray | None, float]:
"""Extract landmarks using MediaPipe Tasks API (>= 0.10.20)."""
FaceLandmarker = mp.tasks.vision.FaceLandmarker
FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
RunningMode = mp.tasks.vision.RunningMode
BaseOptions = mp.tasks.BaseOptions
import tempfile
import urllib.request
# Download model if not cached
model_path = Path(tempfile.gettempdir()) / "face_landmarker_v2_with_blendshapes.task"
if not model_path.exists():
url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task"
urllib.request.urlretrieve(url, str(model_path))
options = FaceLandmarkerOptions(
base_options=BaseOptions(model_asset_path=str(model_path)),
running_mode=RunningMode.IMAGE,
num_faces=1,
min_face_detection_confidence=min_confidence,
output_face_blendshapes=False,
output_facial_transformation_matrixes=False,
)
with FaceLandmarker.create_from_options(options) as landmarker:
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
result = landmarker.detect(mp_image)
if not result.face_landmarks:
return None, 0.0
face_lms = result.face_landmarks[0]
landmarks = np.array(
[(lm.x, lm.y, lm.z) for lm in face_lms],
dtype=np.float32,
)
# MediaPipe Tasks API doesn't expose per-landmark detection confidence;
# return 1.0 to indicate successful detection
return landmarks, 1.0
def _extract_solutions_api(
rgb: np.ndarray,
min_detection_confidence: float,
min_tracking_confidence: float,
) -> tuple[np.ndarray | None, float]:
"""Extract landmarks using legacy MediaPipe Solutions API."""
with mp.solutions.face_mesh.FaceMesh(
static_image_mode=True,
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=min_detection_confidence,
min_tracking_confidence=min_tracking_confidence,
) as face_mesh:
results = face_mesh.process(rgb)
if not results.multi_face_landmarks:
return None, 0.0
face = results.multi_face_landmarks[0]
landmarks = np.array(
[(lm.x, lm.y, lm.z) for lm in face.landmark],
dtype=np.float32,
)
# Legacy API doesn't expose detection confidence; return 1.0 for success
return landmarks, 1.0
def visualize_landmarks(
image: np.ndarray,
face: FaceLandmarks,
radius: int = 1,
draw_regions: bool = True,
) -> np.ndarray:
"""Draw colored landmark dots on image by anatomical region.
Args:
image: BGR image to draw on (will be copied).
face: Extracted face landmarks.
radius: Dot radius in pixels.
draw_regions: If True, color by region. Otherwise all white.
Returns:
Annotated image copy.
"""
canvas = image.copy()
coords = face.pixel_coords
if draw_regions:
# Build index -> color mapping
idx_to_color: dict[int, tuple[int, int, int]] = {}
for region, indices in LANDMARK_REGIONS.items():
color = REGION_COLORS.get(region, (255, 255, 255))
for idx in indices:
idx_to_color[idx] = color
for i, (x, y) in enumerate(coords):
color = idx_to_color.get(i, (128, 128, 128))
cv2.circle(canvas, (int(x), int(y)), radius, color, -1)
else:
for x, y in coords:
cv2.circle(canvas, (int(x), int(y)), radius, (255, 255, 255), -1)
return canvas
def render_landmark_image(
face: FaceLandmarks,
width: int | None = None,
height: int | None = None,
radius: int = 2,
) -> np.ndarray:
"""Render MediaPipe face mesh tessellation on black canvas.
Draws the full 2556-edge tessellation mesh that CrucibleAI/ControlNetMediaPipeFace
was pre-trained on. This is critical -- the ControlNet expects dense triangulated
wireframes, not sparse dots.
Falls back to colored dots if tessellation connections aren't available.
Args:
face: Extracted face landmarks.
width: Canvas width (defaults to face.image_width).
height: Canvas height (defaults to face.image_height).
radius: Dot radius (used for key landmark dots overlay).
Returns:
BGR image with face mesh on black background.
"""
w = width or face.image_width
h = height or face.image_height
canvas = np.zeros((h, w, 3), dtype=np.uint8)
coords = face.landmarks[:, :2].copy()
coords[:, 0] *= w
coords[:, 1] *= h
pts = coords.astype(np.int32)
# Draw tessellation mesh (what CrucibleAI ControlNet expects)
try:
from mediapipe.tasks.python.vision.face_landmarker import FaceLandmarksConnections
tessellation = FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION
contours = FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS
# Draw tessellation edges (thin, gray-white)
for conn in tessellation:
p1 = tuple(pts[conn.start])
p2 = tuple(pts[conn.end])
cv2.line(canvas, p1, p2, (192, 192, 192), 1, cv2.LINE_AA)
# Draw contour edges on top (brighter, key features)
for conn in contours:
p1 = tuple(pts[conn.start])
p2 = tuple(pts[conn.end])
cv2.line(canvas, p1, p2, (255, 255, 255), 1, cv2.LINE_AA)
except (ImportError, AttributeError):
# Fallback: draw colored dots if tessellation not available
idx_to_color: dict[int, tuple[int, int, int]] = {}
for region, indices in LANDMARK_REGIONS.items():
color = REGION_COLORS.get(region, (128, 128, 128))
for idx in indices:
idx_to_color[idx] = color
for i, (x, y) in enumerate(coords):
color = idx_to_color.get(i, (128, 128, 128))
cv2.circle(canvas, (int(x), int(y)), radius, color, -1)
return canvas
def load_image(path: str | Path) -> np.ndarray:
"""Load an image from disk as BGR numpy array."""
img = cv2.imread(str(path))
if img is None:
raise FileNotFoundError(f"Could not load image: {path}")
return img