Spaces:

dreamlessx
/

LandmarkDiff

Running

File size: 4,919 Bytes

fd53eef
cfdd827
fd53eef
 
 
cfdd827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a76de72
 
 
cfdd827
 
 
a76de72
cfdd827
 
 
a76de72
cfdd827
 
 
 
 
 
 
fd53eef
cfdd827
 
a76de72
 
cfdd827
 
 
a76de72
 
cfdd827
 
 
 
 
 
 
 
 
 
fd53eef
cfdd827
 
 
 
 
 
 
 
 
 
 
fd53eef
 
 
 
 
 
 
 
 
 
 
cfdd827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd53eef
 
 
 
 
 
 
 
 
 
 
 
cfdd827
 
 
 
 
 
 
 
 
 
 
 
fd53eef
 
cfdd827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd53eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfdd827

"""Conditioning signal generation: static adjacency wireframe + auto-Canny.

Uses a pre-defined anatomical adjacency matrix (NOT dynamic Delaunay) to prevent
triangle inversion on drastic landmark displacements. Auto-Canny adapts thresholds
to skin tone (Fitzpatrick I-VI safe).
"""

from __future__ import annotations

import cv2
import numpy as np

from landmarkdiff.landmarks import FaceLandmarks

# Static anatomical adjacency for MediaPipe 478 landmarks.
# Connects landmarks along anatomically meaningful contours:
# jawline, nasal dorsum, orbital rim, lip vermilion, eyebrow arch.
# This is invariant to landmark displacement (unlike Delaunay).

JAWLINE_CONTOUR = [
    10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
    397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
    172, 58, 132, 93, 234, 127, 162, 21, 54, 103, 67, 109, 10,
]

LEFT_EYE_CONTOUR = [
    33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246, 33,
]

RIGHT_EYE_CONTOUR = [
    362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398, 362,
]

LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
RIGHT_EYEBROW = [300, 293, 334, 296, 336, 285, 295, 282, 283, 276]

NOSE_BRIDGE = [168, 6, 197, 195, 5, 4, 1]
NOSE_TIP = [94, 2, 326, 327, 294, 278, 279, 275, 274, 460, 456, 363, 370]
NOSE_BOTTOM = [19, 1, 274, 275, 440, 344, 278, 294, 460, 305, 289, 392]

OUTER_LIPS = [
    61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291,
    308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78, 61,
]

INNER_LIPS = [
    78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308,
    324, 318, 402, 317, 14, 87, 178, 88, 95, 78,
]

ALL_CONTOURS = [
    JAWLINE_CONTOUR,
    LEFT_EYE_CONTOUR,
    RIGHT_EYE_CONTOUR,
    LEFT_EYEBROW,
    RIGHT_EYEBROW,
    NOSE_BRIDGE,
    NOSE_TIP,
    NOSE_BOTTOM,
    OUTER_LIPS,
    INNER_LIPS,
]


def render_wireframe(
    face: FaceLandmarks,
    width: int | None = None,
    height: int | None = None,
    thickness: int = 1,
) -> np.ndarray:
    """Render static anatomical adjacency wireframe on black canvas.

    Args:
        face: Facial landmarks (normalized coordinates).
        width: Canvas width.
        height: Canvas height.
        thickness: Line thickness in pixels.

    Returns:
        Grayscale wireframe image.
    """
    w = width or face.image_width
    h = height or face.image_height
    canvas = np.zeros((h, w), dtype=np.uint8)

    coords = face.landmarks[:, :2].copy()
    coords[:, 0] *= w
    coords[:, 1] *= h
    pts = coords.astype(np.int32)

    for contour in ALL_CONTOURS:
        for i in range(len(contour) - 1):
            p1 = tuple(pts[contour[i]])
            p2 = tuple(pts[contour[i + 1]])
            cv2.line(canvas, p1, p2, 255, thickness)

    return canvas


def auto_canny(image: np.ndarray) -> np.ndarray:
    """Auto-Canny edge detection with adaptive thresholds.

    Uses median-based thresholds (0.66*median, 1.33*median) instead of
    hardcoded 50/150 to handle all Fitzpatrick skin types.
    Post-processes with morphological skeletonization for 1-pixel edges.

    Args:
        image: Grayscale input image.

    Returns:
        Binary edge map (uint8, 0 or 255).
    """
    median = np.median(image[image > 0]) if np.any(image > 0) else 128.0
    low = int(max(0, 0.66 * median))
    high = int(min(255, 1.33 * median))

    edges = cv2.Canny(image, low, high)

    # Morphological skeletonization for guaranteed 1-pixel thickness
    # ControlNet blurs on 2+ pixel edges
    skeleton = np.zeros_like(edges)
    element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
    temp = edges.copy()

    max_iterations = max(edges.shape[0], edges.shape[1])
    for _ in range(max_iterations):
        eroded = cv2.erode(temp, element)
        dilated = cv2.dilate(eroded, element)
        diff = cv2.subtract(temp, dilated)
        skeleton = cv2.bitwise_or(skeleton, diff)
        temp = eroded.copy()
        if cv2.countNonZero(temp) == 0:
            break

    return skeleton


def generate_conditioning(
    face: FaceLandmarks,
    width: int | None = None,
    height: int | None = None,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Generate full conditioning signal for ControlNet.

    Returns three channels per the spec:
    1. Rendered landmark dots (colored, BGR)
    2. Canny edge map from static wireframe (grayscale)
    3. Wireframe rendering (grayscale)

    Args:
        face: Extracted facial landmarks.
        width: Output width.
        height: Output height.

    Returns:
        Tuple of (landmark_image, canny_edges, wireframe).
    """
    from landmarkdiff.landmarks import render_landmark_image

    w = width or face.image_width
    h = height or face.image_height

    landmark_img = render_landmark_image(face, w, h)
    wireframe = render_wireframe(face, w, h)
    canny = auto_canny(wireframe)

    return landmark_img, canny, wireframe