File size: 4,919 Bytes
fd53eef
cfdd827
fd53eef
 
 
cfdd827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a76de72
 
 
cfdd827
 
 
a76de72
cfdd827
 
 
a76de72
cfdd827
 
 
 
 
 
 
fd53eef
cfdd827
 
a76de72
 
cfdd827
 
 
a76de72
 
cfdd827
 
 
 
 
 
 
 
 
 
fd53eef
cfdd827
 
 
 
 
 
 
 
 
 
 
fd53eef
 
 
 
 
 
 
 
 
 
 
cfdd827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd53eef
 
 
 
 
 
 
 
 
 
 
 
cfdd827
 
 
 
 
 
 
 
 
 
 
 
fd53eef
 
cfdd827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd53eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfdd827
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""Conditioning signal generation: static adjacency wireframe + auto-Canny.

Uses a pre-defined anatomical adjacency matrix (NOT dynamic Delaunay) to prevent
triangle inversion on drastic landmark displacements. Auto-Canny adapts thresholds
to skin tone (Fitzpatrick I-VI safe).
"""

from __future__ import annotations

import cv2
import numpy as np

from landmarkdiff.landmarks import FaceLandmarks

# Static anatomical adjacency for MediaPipe 478 landmarks.
# Connects landmarks along anatomically meaningful contours:
# jawline, nasal dorsum, orbital rim, lip vermilion, eyebrow arch.
# This is invariant to landmark displacement (unlike Delaunay).

JAWLINE_CONTOUR = [
    10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
    397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
    172, 58, 132, 93, 234, 127, 162, 21, 54, 103, 67, 109, 10,
]

LEFT_EYE_CONTOUR = [
    33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246, 33,
]

RIGHT_EYE_CONTOUR = [
    362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398, 362,
]

LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
RIGHT_EYEBROW = [300, 293, 334, 296, 336, 285, 295, 282, 283, 276]

NOSE_BRIDGE = [168, 6, 197, 195, 5, 4, 1]
NOSE_TIP = [94, 2, 326, 327, 294, 278, 279, 275, 274, 460, 456, 363, 370]
NOSE_BOTTOM = [19, 1, 274, 275, 440, 344, 278, 294, 460, 305, 289, 392]

OUTER_LIPS = [
    61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291,
    308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78, 61,
]

INNER_LIPS = [
    78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308,
    324, 318, 402, 317, 14, 87, 178, 88, 95, 78,
]

ALL_CONTOURS = [
    JAWLINE_CONTOUR,
    LEFT_EYE_CONTOUR,
    RIGHT_EYE_CONTOUR,
    LEFT_EYEBROW,
    RIGHT_EYEBROW,
    NOSE_BRIDGE,
    NOSE_TIP,
    NOSE_BOTTOM,
    OUTER_LIPS,
    INNER_LIPS,
]


def render_wireframe(
    face: FaceLandmarks,
    width: int | None = None,
    height: int | None = None,
    thickness: int = 1,
) -> np.ndarray:
    """Render static anatomical adjacency wireframe on black canvas.

    Args:
        face: Facial landmarks (normalized coordinates).
        width: Canvas width.
        height: Canvas height.
        thickness: Line thickness in pixels.

    Returns:
        Grayscale wireframe image.
    """
    w = width or face.image_width
    h = height or face.image_height
    canvas = np.zeros((h, w), dtype=np.uint8)

    coords = face.landmarks[:, :2].copy()
    coords[:, 0] *= w
    coords[:, 1] *= h
    pts = coords.astype(np.int32)

    for contour in ALL_CONTOURS:
        for i in range(len(contour) - 1):
            p1 = tuple(pts[contour[i]])
            p2 = tuple(pts[contour[i + 1]])
            cv2.line(canvas, p1, p2, 255, thickness)

    return canvas


def auto_canny(image: np.ndarray) -> np.ndarray:
    """Auto-Canny edge detection with adaptive thresholds.

    Uses median-based thresholds (0.66*median, 1.33*median) instead of
    hardcoded 50/150 to handle all Fitzpatrick skin types.
    Post-processes with morphological skeletonization for 1-pixel edges.

    Args:
        image: Grayscale input image.

    Returns:
        Binary edge map (uint8, 0 or 255).
    """
    median = np.median(image[image > 0]) if np.any(image > 0) else 128.0
    low = int(max(0, 0.66 * median))
    high = int(min(255, 1.33 * median))

    edges = cv2.Canny(image, low, high)

    # Morphological skeletonization for guaranteed 1-pixel thickness
    # ControlNet blurs on 2+ pixel edges
    skeleton = np.zeros_like(edges)
    element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
    temp = edges.copy()

    max_iterations = max(edges.shape[0], edges.shape[1])
    for _ in range(max_iterations):
        eroded = cv2.erode(temp, element)
        dilated = cv2.dilate(eroded, element)
        diff = cv2.subtract(temp, dilated)
        skeleton = cv2.bitwise_or(skeleton, diff)
        temp = eroded.copy()
        if cv2.countNonZero(temp) == 0:
            break

    return skeleton


def generate_conditioning(
    face: FaceLandmarks,
    width: int | None = None,
    height: int | None = None,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Generate full conditioning signal for ControlNet.

    Returns three channels per the spec:
    1. Rendered landmark dots (colored, BGR)
    2. Canny edge map from static wireframe (grayscale)
    3. Wireframe rendering (grayscale)

    Args:
        face: Extracted facial landmarks.
        width: Output width.
        height: Output height.

    Returns:
        Tuple of (landmark_image, canny_edges, wireframe).
    """
    from landmarkdiff.landmarks import render_landmark_image

    w = width or face.image_width
    h = height or face.image_height

    landmark_img = render_landmark_image(face, w, h)
    wireframe = render_wireframe(face, w, h)
    canny = auto_canny(wireframe)

    return landmark_img, canny, wireframe