Spaces:
Running
Running
File size: 4,919 Bytes
fd53eef cfdd827 fd53eef cfdd827 a76de72 cfdd827 a76de72 cfdd827 a76de72 cfdd827 fd53eef cfdd827 a76de72 cfdd827 a76de72 cfdd827 fd53eef cfdd827 fd53eef cfdd827 fd53eef cfdd827 fd53eef cfdd827 fd53eef cfdd827 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | """Conditioning signal generation: static adjacency wireframe + auto-Canny.
Uses a pre-defined anatomical adjacency matrix (NOT dynamic Delaunay) to prevent
triangle inversion on drastic landmark displacements. Auto-Canny adapts thresholds
to skin tone (Fitzpatrick I-VI safe).
"""
from __future__ import annotations
import cv2
import numpy as np
from landmarkdiff.landmarks import FaceLandmarks
# Static anatomical adjacency for MediaPipe 478 landmarks.
# Connects landmarks along anatomically meaningful contours:
# jawline, nasal dorsum, orbital rim, lip vermilion, eyebrow arch.
# This is invariant to landmark displacement (unlike Delaunay).
JAWLINE_CONTOUR = [
10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
172, 58, 132, 93, 234, 127, 162, 21, 54, 103, 67, 109, 10,
]
LEFT_EYE_CONTOUR = [
33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246, 33,
]
RIGHT_EYE_CONTOUR = [
362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398, 362,
]
LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
RIGHT_EYEBROW = [300, 293, 334, 296, 336, 285, 295, 282, 283, 276]
NOSE_BRIDGE = [168, 6, 197, 195, 5, 4, 1]
NOSE_TIP = [94, 2, 326, 327, 294, 278, 279, 275, 274, 460, 456, 363, 370]
NOSE_BOTTOM = [19, 1, 274, 275, 440, 344, 278, 294, 460, 305, 289, 392]
OUTER_LIPS = [
61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291,
308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78, 61,
]
INNER_LIPS = [
78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308,
324, 318, 402, 317, 14, 87, 178, 88, 95, 78,
]
ALL_CONTOURS = [
JAWLINE_CONTOUR,
LEFT_EYE_CONTOUR,
RIGHT_EYE_CONTOUR,
LEFT_EYEBROW,
RIGHT_EYEBROW,
NOSE_BRIDGE,
NOSE_TIP,
NOSE_BOTTOM,
OUTER_LIPS,
INNER_LIPS,
]
def render_wireframe(
face: FaceLandmarks,
width: int | None = None,
height: int | None = None,
thickness: int = 1,
) -> np.ndarray:
"""Render static anatomical adjacency wireframe on black canvas.
Args:
face: Facial landmarks (normalized coordinates).
width: Canvas width.
height: Canvas height.
thickness: Line thickness in pixels.
Returns:
Grayscale wireframe image.
"""
w = width or face.image_width
h = height or face.image_height
canvas = np.zeros((h, w), dtype=np.uint8)
coords = face.landmarks[:, :2].copy()
coords[:, 0] *= w
coords[:, 1] *= h
pts = coords.astype(np.int32)
for contour in ALL_CONTOURS:
for i in range(len(contour) - 1):
p1 = tuple(pts[contour[i]])
p2 = tuple(pts[contour[i + 1]])
cv2.line(canvas, p1, p2, 255, thickness)
return canvas
def auto_canny(image: np.ndarray) -> np.ndarray:
"""Auto-Canny edge detection with adaptive thresholds.
Uses median-based thresholds (0.66*median, 1.33*median) instead of
hardcoded 50/150 to handle all Fitzpatrick skin types.
Post-processes with morphological skeletonization for 1-pixel edges.
Args:
image: Grayscale input image.
Returns:
Binary edge map (uint8, 0 or 255).
"""
median = np.median(image[image > 0]) if np.any(image > 0) else 128.0
low = int(max(0, 0.66 * median))
high = int(min(255, 1.33 * median))
edges = cv2.Canny(image, low, high)
# Morphological skeletonization for guaranteed 1-pixel thickness
# ControlNet blurs on 2+ pixel edges
skeleton = np.zeros_like(edges)
element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
temp = edges.copy()
max_iterations = max(edges.shape[0], edges.shape[1])
for _ in range(max_iterations):
eroded = cv2.erode(temp, element)
dilated = cv2.dilate(eroded, element)
diff = cv2.subtract(temp, dilated)
skeleton = cv2.bitwise_or(skeleton, diff)
temp = eroded.copy()
if cv2.countNonZero(temp) == 0:
break
return skeleton
def generate_conditioning(
face: FaceLandmarks,
width: int | None = None,
height: int | None = None,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Generate full conditioning signal for ControlNet.
Returns three channels per the spec:
1. Rendered landmark dots (colored, BGR)
2. Canny edge map from static wireframe (grayscale)
3. Wireframe rendering (grayscale)
Args:
face: Extracted facial landmarks.
width: Output width.
height: Output height.
Returns:
Tuple of (landmark_image, canny_edges, wireframe).
"""
from landmarkdiff.landmarks import render_landmark_image
w = width or face.image_width
h = height or face.image_height
landmark_img = render_landmark_image(face, w, h)
wireframe = render_wireframe(face, w, h)
canny = auto_canny(wireframe)
return landmark_img, canny, wireframe
|