Spaces:

dreamlessx
/

LandmarkDiff

Running

App Files Files Community

LandmarkDiff / landmarkdiff /landmarks.py

dreamlessx

Update landmarkdiff/landmarks.py to v0.3.2

a82aad5 verified 17 days ago

raw

history blame contribute delete

12.2 kB

	"""Facial landmark extraction using MediaPipe Face Mesh v2."""

	from __future__ import annotations

	import logging
	from dataclasses import dataclass
	from pathlib import Path

	import cv2
	import mediapipe as mp
	import numpy as np

	logger = logging.getLogger(__name__)

	# Region color map for visualization (BGR)
	REGION_COLORS: dict[str, tuple[int, int, int]] = {
	"jawline": (255, 255, 255), # white
	"eyebrow_left": (0, 255, 0), # green
	"eyebrow_right": (0, 255, 0),
	"eye_left": (255, 255, 0), # cyan
	"eye_right": (255, 255, 0),
	"nose": (0, 255, 255), # yellow
	"lips": (0, 0, 255), # red
	"iris_left": (255, 0, 255), # magenta
	"iris_right": (255, 0, 255),
	}

	# MediaPipe landmark index groups by anatomical region
	LANDMARK_REGIONS: dict[str, list[int]] = {
	"jawline": [
	10,
	338,
	297,
	332,
	284,
	251,
	389,
	356,
	454,
	323,
	361,
	288,
	397,
	365,
	379,
	378,
	400,
	377,
	152,
	148,
	176,
	149,
	150,
	136,
	172,
	58,
	132,
	93,
	234,
	127,
	162,
	21,
	54,
	103,
	67,
	109,
	],
	"eye_left": [
	33,
	7,
	163,
	144,
	145,
	153,
	154,
	155,
	133,
	173,
	157,
	158,
	159,
	160,
	161,
	246,
	],
	"eye_right": [
	362,
	382,
	381,
	380,
	374,
	373,
	390,
	249,
	263,
	466,
	388,
	387,
	386,
	385,
	384,
	398,
	],
	"eyebrow_left": [70, 63, 105, 66, 107, 55, 65, 52, 53, 46],
	"eyebrow_right": [300, 293, 334, 296, 336, 285, 295, 282, 283, 276],
	"nose": [
	1,
	2,
	4,
	5,
	6,
	19,
	94,
	141,
	168,
	195,
	197,
	236,
	240,
	274,
	275,
	278,
	279,
	294,
	326,
	327,
	360,
	363,
	370,
	456,
	460,
	],
	"lips": [
	61,
	146,
	91,
	181,
	84,
	17,
	314,
	405,
	321,
	375,
	291,
	308,
	324,
	318,
	402,
	317,
	14,
	87,
	178,
	88,
	95,
	78,
	],
	"iris_left": [468, 469, 470, 471, 472],
	"iris_right": [473, 474, 475, 476, 477],
	}


	@dataclass(frozen=True)
	class FaceLandmarks:
	"""Extracted facial landmarks with metadata."""

	landmarks: np.ndarray # (478, 3) normalized (x, y, z)
	image_width: int
	image_height: int
	confidence: float

	@property
	def pixel_coords(self) -> np.ndarray:
	"""Convert normalized landmarks to pixel coordinates (478, 2).

	Coordinates are clamped to valid image bounds so that extreme
	head poses do not produce out-of-range indices.
	"""
	coords = self.landmarks[:, :2].copy()
	coords[:, 0] *= self.image_width
	coords[:, 1] *= self.image_height
	coords[:, 0] = np.clip(coords[:, 0], 0, self.image_width - 1)
	coords[:, 1] = np.clip(coords[:, 1], 0, self.image_height - 1)
	return coords

	def pixel_coords_at(self, width: int, height: int) -> np.ndarray:
	"""Convert normalized landmarks to pixel coordinates at a given size.

	Use this when the image has been resized after landmark extraction.
	Coordinates are clamped to [0, width-1] x [0, height-1].
	"""
	coords = self.landmarks[:, :2].copy()
	coords[:, 0] *= width
	coords[:, 1] *= height
	coords[:, 0] = np.clip(coords[:, 0], 0, width - 1)
	coords[:, 1] = np.clip(coords[:, 1], 0, height - 1)
	return coords

	def rescale(self, width: int, height: int) -> FaceLandmarks:
	"""Return a copy with updated image dimensions.

	Landmarks stay in normalized [0,1] space; only the stored
	width/height change, so ``pixel_coords`` returns values at
	the new resolution.
	"""
	return FaceLandmarks(
	landmarks=self.landmarks.copy(),
	image_width=width,
	image_height=height,
	confidence=self.confidence,
	)

	def get_region(self, region: str) -> np.ndarray:
	"""Get landmark indices for a named region."""
	indices = LANDMARK_REGIONS.get(region, [])
	return self.landmarks[indices]


	def extract_landmarks(
	image: np.ndarray,
	min_detection_confidence: float = 0.5,
	min_tracking_confidence: float = 0.5,
	) -> FaceLandmarks \| None:
	"""Extract 478 facial landmarks from an image using MediaPipe Face Mesh.

	Args:
	image: BGR image as numpy array.
	min_detection_confidence: Minimum face detection confidence.
	min_tracking_confidence: Minimum landmark tracking confidence.

	Returns:
	FaceLandmarks if a face is detected, None otherwise.
	"""
	h, w = image.shape[:2]
	rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	# Try new Tasks API first (mediapipe >= 0.10.20), fall back to legacy solutions API
	try:
	landmarks, confidence = _extract_tasks_api(rgb, min_detection_confidence)
	except Exception:
	logger.debug("Tasks API unavailable, trying Solutions API", exc_info=True)
	try:
	landmarks, confidence = _extract_solutions_api(
	rgb, min_detection_confidence, min_tracking_confidence
	)
	except Exception:
	logger.debug("Both MediaPipe APIs failed", exc_info=True)
	return None

	if landmarks is None:
	return None

	return FaceLandmarks(
	landmarks=landmarks,
	image_width=w,
	image_height=h,
	confidence=confidence,
	)


	def _extract_tasks_api(
	rgb: np.ndarray,
	min_confidence: float,
	) -> tuple[np.ndarray \| None, float]:
	"""Extract landmarks using MediaPipe Tasks API (>= 0.10.20)."""
	FaceLandmarker = mp.tasks.vision.FaceLandmarker
	FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
	RunningMode = mp.tasks.vision.RunningMode
	BaseOptions = mp.tasks.BaseOptions
	import tempfile
	import urllib.request

	# Download model if not cached
	model_path = Path(tempfile.gettempdir()) / "face_landmarker_v2_with_blendshapes.task"
	if not model_path.exists():
	url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task"
	urllib.request.urlretrieve(url, str(model_path))

	options = FaceLandmarkerOptions(
	base_options=BaseOptions(model_asset_path=str(model_path)),
	running_mode=RunningMode.IMAGE,
	num_faces=1,
	min_face_detection_confidence=min_confidence,
	output_face_blendshapes=False,
	output_facial_transformation_matrixes=False,
	)

	with FaceLandmarker.create_from_options(options) as landmarker:
	mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
	result = landmarker.detect(mp_image)

	if not result.face_landmarks:
	return None, 0.0

	face_lms = result.face_landmarks[0]
	landmarks = np.array(
	[(lm.x, lm.y, lm.z) for lm in face_lms],
	dtype=np.float32,
	)

	# MediaPipe Tasks API doesn't expose per-landmark detection confidence;
	# return 1.0 to indicate successful detection
	return landmarks, 1.0


	def _extract_solutions_api(
	rgb: np.ndarray,
	min_detection_confidence: float,
	min_tracking_confidence: float,
	) -> tuple[np.ndarray \| None, float]:
	"""Extract landmarks using legacy MediaPipe Solutions API."""
	with mp.solutions.face_mesh.FaceMesh(
	static_image_mode=True,
	max_num_faces=1,
	refine_landmarks=True,
	min_detection_confidence=min_detection_confidence,
	min_tracking_confidence=min_tracking_confidence,
	) as face_mesh:
	results = face_mesh.process(rgb)

	if not results.multi_face_landmarks:
	return None, 0.0

	face = results.multi_face_landmarks[0]
	landmarks = np.array(
	[(lm.x, lm.y, lm.z) for lm in face.landmark],
	dtype=np.float32,
	)
	# Legacy API doesn't expose detection confidence; return 1.0 for success
	return landmarks, 1.0


	def visualize_landmarks(
	image: np.ndarray,
	face: FaceLandmarks,
	radius: int = 1,
	draw_regions: bool = True,
	) -> np.ndarray:
	"""Draw colored landmark dots on image by anatomical region.

	Args:
	image: BGR image to draw on (will be copied).
	face: Extracted face landmarks.
	radius: Dot radius in pixels.
	draw_regions: If True, color by region. Otherwise all white.

	Returns:
	Annotated image copy.
	"""
	canvas = image.copy()
	coords = face.pixel_coords

	if draw_regions:
	# Build index -> color mapping
	idx_to_color: dict[int, tuple[int, int, int]] = {}
	for region, indices in LANDMARK_REGIONS.items():
	color = REGION_COLORS.get(region, (255, 255, 255))
	for idx in indices:
	idx_to_color[idx] = color

	for i, (x, y) in enumerate(coords):
	color = idx_to_color.get(i, (128, 128, 128))
	cv2.circle(canvas, (int(x), int(y)), radius, color, -1)
	else:
	for x, y in coords:
	cv2.circle(canvas, (int(x), int(y)), radius, (255, 255, 255), -1)

	return canvas


	def render_landmark_image(
	face: FaceLandmarks,
	width: int \| None = None,
	height: int \| None = None,
	radius: int = 2,
	) -> np.ndarray:
	"""Render MediaPipe face mesh tessellation on black canvas.

	Draws the full 2556-edge tessellation mesh that CrucibleAI/ControlNetMediaPipeFace
	was pre-trained on. This is critical -- the ControlNet expects dense triangulated
	wireframes, not sparse dots.

	Falls back to colored dots if tessellation connections aren't available.

	Args:
	face: Extracted face landmarks.
	width: Canvas width (defaults to face.image_width).
	height: Canvas height (defaults to face.image_height).
	radius: Dot radius (used for key landmark dots overlay).

	Returns:
	BGR image with face mesh on black background.
	"""
	w = width or face.image_width
	h = height or face.image_height
	canvas = np.zeros((h, w, 3), dtype=np.uint8)

	coords = face.landmarks[:, :2].copy()
	coords[:, 0] *= w
	coords[:, 1] *= h
	pts = coords.astype(np.int32)

	# Draw tessellation mesh (what CrucibleAI ControlNet expects)
	try:
	from mediapipe.tasks.python.vision.face_landmarker import FaceLandmarksConnections

	tessellation = FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION
	contours = FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS

	# Draw tessellation edges (thin, gray-white)
	for conn in tessellation:
	p1 = tuple(pts[conn.start])
	p2 = tuple(pts[conn.end])
	cv2.line(canvas, p1, p2, (192, 192, 192), 1, cv2.LINE_AA)

	# Draw contour edges on top (brighter, key features)
	for conn in contours:
	p1 = tuple(pts[conn.start])
	p2 = tuple(pts[conn.end])
	cv2.line(canvas, p1, p2, (255, 255, 255), 1, cv2.LINE_AA)

	except (ImportError, AttributeError):
	# Fallback: draw colored dots if tessellation not available
	idx_to_color: dict[int, tuple[int, int, int]] = {}
	for region, indices in LANDMARK_REGIONS.items():
	color = REGION_COLORS.get(region, (128, 128, 128))
	for idx in indices:
	idx_to_color[idx] = color

	for i, (x, y) in enumerate(coords):
	color = idx_to_color.get(i, (128, 128, 128))
	cv2.circle(canvas, (int(x), int(y)), radius, color, -1)

	return canvas


	def load_image(path: str \| Path) -> np.ndarray:
	"""Load an image from disk as BGR numpy array."""
	img = cv2.imread(str(path))
	if img is None:
	raise FileNotFoundError(f"Could not load image: {path}")
	return img