| import mediapipe as mp |
| from mediapipe import solutions |
| from mediapipe.framework.formats import landmark_pb2 |
| import numpy as np |
| import cv2 |
|
|
|
|
| def convert_bbox_to_square_bbox(bbox, max_h, max_w, scale=1.0): |
| |
| width = bbox[1][0] - bbox[0][0] |
| height = bbox[1][1] - bbox[0][1] |
| max_size = max(width, height) * scale |
|
|
| |
| center_x = (bbox[0][0] + bbox[1][0]) / 2 |
| center_y = (bbox[0][1] + bbox[1][1]) / 2 |
|
|
| |
| half_size = max_size / 2 |
| left_top = [int(center_x - half_size), int(center_y - half_size)] |
| right_bottom = [int(center_x + half_size), int(center_y + half_size)] |
|
|
| |
| left_top[0] = max(0, left_top[0]) |
| left_top[1] = max(0, left_top[1]) |
| right_bottom[0] = min(max_w, right_bottom[0]) |
| right_bottom[1] = min(max_h, right_bottom[1]) |
|
|
| |
| return [left_top[0], left_top[1], right_bottom[0], right_bottom[1]] |
|
|
|
|
| def draw_landmarks_on_image(rgb_image, detection_result): |
| face_landmarks_list = detection_result.face_landmarks |
| annotated_image = np.copy(rgb_image) |
|
|
| |
| for idx in range(len(face_landmarks_list)): |
| face_landmarks = face_landmarks_list[idx] |
|
|
| |
| face_landmarks_proto = landmark_pb2.NormalizedLandmarkList() |
| face_landmarks_proto.landmark.extend( |
| [ |
| landmark_pb2.NormalizedLandmark( |
| x=landmark.x, y=landmark.y, z=landmark.z |
| ) |
| for landmark in face_landmarks |
| ] |
| ) |
|
|
| solutions.drawing_utils.draw_landmarks( |
| image=annotated_image, |
| landmark_list=face_landmarks_proto, |
| connections=mp.solutions.face_mesh.FACEMESH_TESSELATION, |
| landmark_drawing_spec=None, |
| connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_tesselation_style(), |
| ) |
| solutions.drawing_utils.draw_landmarks( |
| image=annotated_image, |
| landmark_list=face_landmarks_proto, |
| connections=mp.solutions.face_mesh.FACEMESH_CONTOURS, |
| landmark_drawing_spec=None, |
| connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_contours_style(), |
| ) |
| solutions.drawing_utils.draw_landmarks( |
| image=annotated_image, |
| landmark_list=face_landmarks_proto, |
| connections=mp.solutions.face_mesh.FACEMESH_IRISES, |
| landmark_drawing_spec=None, |
| connection_drawing_spec=mp.solutions.drawing_styles.get_default_face_mesh_iris_connections_style(), |
| ) |
|
|
| return annotated_image |
|
|
|
|
| class FaceDetector: |
| def __init__(self, mediapipe_model_asset_path, delegate=1, face_detection_confidence=0.5, num_faces=5): |
| |
| options = mp.tasks.vision.FaceLandmarkerOptions( |
| base_options=mp.tasks.BaseOptions( |
| model_asset_path=mediapipe_model_asset_path, |
| |
| |
| delegate=delegate, |
| ), |
| running_mode=mp.tasks.vision.RunningMode.IMAGE, |
| num_faces=num_faces, |
| output_face_blendshapes=True, |
| output_facial_transformation_matrixes=True, |
| min_face_detection_confidence=face_detection_confidence, |
| min_face_presence_confidence=face_detection_confidence, |
| min_tracking_confidence=face_detection_confidence, |
| ) |
| self.detector = mp.tasks.vision.FaceLandmarker.create_from_options(options) |
|
|
| def get_one_face_xy_rotation_and_keypoints(self, image, mouth_bbox_scale = 1.2, eye_bbox_scale = 1.5, annotate_image: bool = False, save_vis=False): |
| mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image) |
| |
| |
| results = self.detector.detect(mp_image) |
| max_h, max_w = image.shape[:2] |
| |
| if annotate_image: |
| annotated_image = draw_landmarks_on_image(image, results) |
| else: |
| annotated_image = None |
|
|
| all_x = [] |
| all_y = [] |
| all_orientation = [] |
| all_keypoints = [] |
| all_bounding_box = [] |
| all_mouth_bounding_box = [] |
| all_eye_bounding_box = [] |
| all_face_contour = [] |
| all_eyeball = [] |
| all_eyeball_mask = [] |
| all_blendshapes = [] |
| all_mouth_p = [] |
| all_nose_p = [] |
| all_left_eye_p = [] |
| all_right_eye_p = [] |
| num_faces = len(results.face_landmarks) |
|
|
| for face_blendshapes in results.face_blendshapes: |
| blendshapes = [item.score for item in face_blendshapes] |
| all_blendshapes.append(blendshapes) |
|
|
| all_facial_transformation_matrices = results.facial_transformation_matrixes |
| |
| for face_landmarks in results.face_landmarks: |
| keypoints = [] |
| bounding_box = [] |
|
|
| h, w = image.shape[0], image.shape[1] |
| cx_min, cy_min = w, h |
| cx_max, cy_max = 0, 0 |
| for idx, lm in enumerate(face_landmarks): |
| |
| cx, cy = int(np.clip(lm.x, 0, 1) * w), int(np.clip(lm.y, 0, 1) * h) |
|
|
| if cx < cx_min: |
| cx_min = cx |
| if cy < cy_min: |
| cy_min = cy |
| if cx > cx_max: |
| cx_max = cx |
| if cy > cy_max: |
| cy_max = cy |
|
|
| keypoints.append((lm.x, lm.y, lm.z)) |
|
|
| if idx == 137: |
| right_cheek = (lm.x, lm.y, lm.z) |
| if idx == 366: |
| left_cheek = (lm.x, lm.y, lm.z) |
| if idx == 4: |
| nose = (lm.x, lm.y, lm.z) |
|
|
| |
| face_middle = ( |
| (right_cheek[0] + left_cheek[0]) / 2.0, |
| (right_cheek[1] + left_cheek[1]) / 2.0, |
| ) |
|
|
| x = nose[0] - face_middle[0] |
| y = nose[1] - face_middle[1] |
|
|
| if x > 0.15: |
| orientation = "left" |
| elif x < -0.15: |
| orientation = "right" |
| else: |
| orientation = "forward" |
|
|
| bounding_box = [(cx_min, cy_min), (cx_max, cy_max)] |
|
|
| all_keypoints.append(keypoints) |
| all_bounding_box.append(bounding_box) |
| all_x.append(x) |
| all_y.append(y) |
| all_orientation.append(orientation) |
|
|
| |
| mouth_landmarks = [ |
| 61, |
| 146, |
| 146, |
| 91, |
| 91, |
| 181, |
| 181, |
| 84, |
| 84, |
| 17, |
| 17, |
| 314, |
| 314, |
| 405, |
| 405, |
| 321, |
| 321, |
| 375, |
| 375, |
| 291, |
| 61, |
| 185, |
| 185, |
| 40, |
| 40, |
| 39, |
| 39, |
| 37, |
| 37, |
| 0, |
| 0, |
| 267, |
| 267, |
| 269, |
| 269, |
| 270, |
| 270, |
| 409, |
| 409, |
| 291, |
| 78, |
| 95, |
| 95, |
| 88, |
| 88, |
| 178, |
| 178, |
| 87, |
| 87, |
| 14, |
| 14, |
| 317, |
| 317, |
| 402, |
| 402, |
| 318, |
| 318, |
| 324, |
| 324, |
| 308, |
| 78, |
| 191, |
| 191, |
| 80, |
| 80, |
| 81, |
| 81, |
| 82, |
| 82, |
| 13, |
| 13, |
| 312, |
| 312, |
| 311, |
| 311, |
| 310, |
| 310, |
| 415, |
| 415, |
| 308, |
| ] |
| |
| mouth_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in mouth_landmarks] |
| mouth_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in mouth_landmarks] |
| mouth_bbox = [(min(mouth_x), min(mouth_y)), (max(mouth_x), max(mouth_y))] |
| mouth_p = np.array([(mouth_bbox[0][0] + mouth_bbox[1][0]) / 2, (mouth_bbox[1][0] + mouth_bbox[1][1]) / 2]) |
| mouth_bbox = convert_bbox_to_square_bbox(mouth_bbox, max_h, max_w, scale=mouth_bbox_scale) |
|
|
| nose_landmarks = [48, 115, 220, 45, 4, 275, 440, 344, 278] |
| nose_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in nose_landmarks] |
| nose_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in nose_landmarks] |
| nose_bbox = [(min(nose_x), min(nose_y)), (max(nose_x), max(nose_y))] |
| nose_p = np.array([(nose_bbox[0][0] + nose_bbox[1][0]) / 2, (nose_bbox[1][0] + nose_bbox[1][1]) / 2]) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| all_mouth_bounding_box.append(mouth_bbox) |
|
|
| |
| left_eye_landmarks = [362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382] |
| right_eye_landmarks = [33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7] |
| |
| left_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in left_eye_landmarks] |
| left_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in left_eye_landmarks] |
| left_eye_bbox = [(min(left_eye_x), min(left_eye_y)), (max(left_eye_x), max(left_eye_y))] |
| left_size = max(left_eye_y) - min(left_eye_y) |
| left_eye_p = np.array([(left_eye_bbox[0][0] + left_eye_bbox[1][0]) / 2, (left_eye_bbox[1][0] + left_eye_bbox[1][1]) / 2]) |
| left_eye_bbox = convert_bbox_to_square_bbox(left_eye_bbox, max_h, max_w, scale=eye_bbox_scale) |
| |
| right_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in right_eye_landmarks] |
| right_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in right_eye_landmarks] |
| right_eye_bbox = [(min(right_eye_x), min(right_eye_y)), (max(right_eye_x), max(right_eye_y))] |
| right_size = max(right_eye_y) - min(right_eye_y) |
| right_eye_p = np.array([(right_eye_bbox[0][0] + right_eye_bbox[1][0]) / 2, (right_eye_bbox[1][0] + right_eye_bbox[1][1]) / 2]) |
| right_eye_bbox = convert_bbox_to_square_bbox(right_eye_bbox, max_h, max_w, scale=eye_bbox_scale) |
|
|
| eye_bbox = {"left_eye": left_eye_bbox, "right_eye": right_eye_bbox} |
| |
| all_eye_bounding_box.append(eye_bbox) |
| |
| face_contour = np.zeros_like(image) |
| for landmark_id, landmark in enumerate(face_landmarks): |
| cx, cy = int(landmark.x * w), int(landmark.y * h) |
| if cy >= max_h or cx >= max_w: continue |
| if cy < 0 or cx < 0: continue |
| face_contour[cy, cx] = (255, 255, 255) |
| |
| eyeball = np.zeros_like(image) |
| for landmark_id, landmark in enumerate(face_landmarks): |
| cx, cy = int(landmark.x * w), int(landmark.y * h) |
| if landmark_id not in [468, 473]: continue |
| if cy >= max_h or cx >= max_w: continue |
| if cy < 0 or cx < 0: continue |
| radius = int(left_size // 3) if landmark_id == 468 else int(right_size // 3) |
| cv2.circle(eyeball, (cx, cy), radius=radius, color=(255, 0, 0), thickness=-1) |
| eyeball_mask = (eyeball.sum(axis=2) != 0)[:, :, None] |
| |
| all_eyeball.append(eyeball) |
| all_eyeball_mask.append(eyeball_mask) |
| all_face_contour.append(face_contour) |
| all_mouth_p.append(mouth_p) |
| all_nose_p.append(nose_p) |
| all_left_eye_p.append(left_eye_p) |
| all_right_eye_p.append(right_eye_p) |
| |
| if save_vis: |
| x_min, y_min, x_max, y_max = mouth_bbox |
| cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2) |
| |
| for eye_key, bbox in eye_bbox.items(): |
| x_min, y_min, x_max, y_max = bbox |
| color = (0, 0, 255) |
| cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 2) |
| |
| for landmark_id, landmark in enumerate(face_landmarks): |
| cx, cy = int(landmark.x * w), int(landmark.y * h) |
| circle_size = 2 |
| if landmark_id in mouth_landmarks: |
| cv2.circle(image, (cx, cy), circle_size, (0, 0, 255), -1) |
| elif landmark_id in left_eye_landmarks+right_eye_landmarks: |
| cv2.circle(image, (cx, cy), circle_size, (0, 255, 0), -1) |
| else: |
| cv2.circle(image, (cx, cy), circle_size, (255, 255, 255), -1) |
| cv2.imwrite('image_detect.png', image[:,:,::-1]) |
| |
|
|
| return ( |
| all_x, |
| all_y, |
| all_orientation, |
| num_faces, |
| all_keypoints, |
| all_bounding_box, |
| all_mouth_bounding_box, |
| all_eye_bounding_box, |
| all_face_contour, |
| all_blendshapes, |
| all_facial_transformation_matrices, |
| annotated_image, |
| all_mouth_p, |
| all_nose_p, |
| all_left_eye_p, |
| all_right_eye_p, |
| all_eyeball, |
| all_eyeball_mask, |
| ) |
|
|
| def get_face_xy_rotation_and_keypoints(self, image, mouth_bbox_scale = 1.2, eye_bbox_scale = 1.5, annotate_image: bool = False, save_vis=False): |
| mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image) |
| |
| |
| results = self.detector.detect(mp_image) |
| max_h, max_w = image.shape[:2] |
| |
| if annotate_image: |
| annotated_image = draw_landmarks_on_image(image, results) |
| else: |
| annotated_image = None |
|
|
| all_x = [] |
| all_y = [] |
| all_orientation = [] |
| all_keypoints = [] |
| all_bounding_box = [] |
| all_mouth_bounding_box = [] |
| all_eye_bounding_box = [] |
| all_face_contour = [] |
| all_blendshapes = [] |
| num_faces = len(results.face_landmarks) |
|
|
| for face_blendshapes in results.face_blendshapes: |
| blendshapes = [item.score for item in face_blendshapes] |
| all_blendshapes.append(blendshapes) |
|
|
| all_facial_transformation_matrices = results.facial_transformation_matrixes |
|
|
| for face_landmarks in results.face_landmarks: |
| keypoints = [] |
| bounding_box = [] |
|
|
| h, w = image.shape[0], image.shape[1] |
| cx_min, cy_min = w, h |
| cx_max, cy_max = 0, 0 |
| for idx, lm in enumerate(face_landmarks): |
| |
| cx, cy = int(np.clip(lm.x, 0, 1) * w), int(np.clip(lm.y, 0, 1) * h) |
|
|
| if cx < cx_min: |
| cx_min = cx |
| if cy < cy_min: |
| cy_min = cy |
| if cx > cx_max: |
| cx_max = cx |
| if cy > cy_max: |
| cy_max = cy |
|
|
| keypoints.append((lm.x, lm.y, lm.z)) |
|
|
| if idx == 137: |
| right_cheek = (lm.x, lm.y, lm.z) |
| if idx == 366: |
| left_cheek = (lm.x, lm.y, lm.z) |
| if idx == 4: |
| nose = (lm.x, lm.y, lm.z) |
|
|
| |
| face_middle = ( |
| (right_cheek[0] + left_cheek[0]) / 2.0, |
| (right_cheek[1] + left_cheek[1]) / 2.0, |
| ) |
|
|
| x = nose[0] - face_middle[0] |
| y = nose[1] - face_middle[1] |
|
|
| if x > 0.15: |
| orientation = "left" |
| elif x < -0.15: |
| orientation = "right" |
| else: |
| orientation = "forward" |
|
|
| bounding_box = [(cx_min, cy_min), (cx_max, cy_max)] |
|
|
| all_keypoints.append(keypoints) |
| all_bounding_box.append(bounding_box) |
| all_x.append(x) |
| all_y.append(y) |
| all_orientation.append(orientation) |
|
|
| |
| mouth_landmarks = [ |
| 61, |
| 146, |
| 146, |
| 91, |
| 91, |
| 181, |
| 181, |
| 84, |
| 84, |
| 17, |
| 17, |
| 314, |
| 314, |
| 405, |
| 405, |
| 321, |
| 321, |
| 375, |
| 375, |
| 291, |
| 61, |
| 185, |
| 185, |
| 40, |
| 40, |
| 39, |
| 39, |
| 37, |
| 37, |
| 0, |
| 0, |
| 267, |
| 267, |
| 269, |
| 269, |
| 270, |
| 270, |
| 409, |
| 409, |
| 291, |
| 78, |
| 95, |
| 95, |
| 88, |
| 88, |
| 178, |
| 178, |
| 87, |
| 87, |
| 14, |
| 14, |
| 317, |
| 317, |
| 402, |
| 402, |
| 318, |
| 318, |
| 324, |
| 324, |
| 308, |
| 78, |
| 191, |
| 191, |
| 80, |
| 80, |
| 81, |
| 81, |
| 82, |
| 82, |
| 13, |
| 13, |
| 312, |
| 312, |
| 311, |
| 311, |
| 310, |
| 310, |
| 415, |
| 415, |
| 308, |
| ] |
| |
| mouth_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in mouth_landmarks] |
| mouth_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in mouth_landmarks] |
| mouth_bbox = [(min(mouth_x), min(mouth_y)), (max(mouth_x), max(mouth_y))] |
| mouth_bbox = convert_bbox_to_square_bbox(mouth_bbox, max_h, max_w, scale=mouth_bbox_scale) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| all_mouth_bounding_box.append(mouth_bbox) |
|
|
| |
| left_eye_landmarks = [362, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374, 380, 381, 382] |
| right_eye_landmarks = [33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7] |
| |
| left_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in left_eye_landmarks] |
| left_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in left_eye_landmarks] |
| left_eye_bbox = [(min(left_eye_x), min(left_eye_y)), (max(left_eye_x), max(left_eye_y))] |
| left_eye_bbox = convert_bbox_to_square_bbox(left_eye_bbox, max_h, max_w, scale=eye_bbox_scale) |
| |
| right_eye_x = [int(np.clip(face_landmarks[idx].x, 0, 1) * w) for idx in right_eye_landmarks] |
| right_eye_y = [int(np.clip(face_landmarks[idx].y, 0, 1) * h) for idx in right_eye_landmarks] |
| right_eye_bbox = [(min(right_eye_x), min(right_eye_y)), (max(right_eye_x), max(right_eye_y))] |
| right_eye_bbox = convert_bbox_to_square_bbox(right_eye_bbox, max_h, max_w, scale=eye_bbox_scale) |
|
|
| eye_bbox = {"left_eye": left_eye_bbox, "right_eye": right_eye_bbox} |
| |
| all_eye_bounding_box.append(eye_bbox) |
| |
| face_contour = np.zeros_like(image) |
| for landmark_id, landmark in enumerate(face_landmarks): |
| cx, cy = int(landmark.x * w), int(landmark.y * h) |
| if cy >= max_h or cx >= max_w: continue |
| if cy < 0 or cx < 0: continue |
| face_contour[cy, cx] = (255, 255, 255) |
| all_face_contour.append(face_contour) |
| |
| if save_vis: |
| import cv2 |
| x_min, y_min, x_max, y_max = mouth_bbox |
| cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2) |
| |
| for eye_key, bbox in eye_bbox.items(): |
| x_min, y_min, x_max, y_max = bbox |
| color = (0, 0, 255) |
| cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 2) |
| |
| for landmark_id, landmark in enumerate(face_landmarks): |
| cx, cy = int(landmark.x * w), int(landmark.y * h) |
| circle_size = 2 |
| if landmark_id in mouth_landmarks: |
| cv2.circle(image, (cx, cy), circle_size, (0, 0, 255), -1) |
| elif landmark_id in left_eye_landmarks+right_eye_landmarks: |
| cv2.circle(image, (cx, cy), circle_size, (0, 255, 0), -1) |
| else: |
| cv2.circle(image, (cx, cy), circle_size, (255, 255, 255), -1) |
| cv2.imwrite('image_detect.png', image[:,:,::-1]) |
| |
|
|
| return ( |
| all_x, |
| all_y, |
| all_orientation, |
| num_faces, |
| all_keypoints, |
| all_bounding_box, |
| all_mouth_bounding_box, |
| all_eye_bounding_box, |
| all_face_contour, |
| all_blendshapes, |
| all_facial_transformation_matrices, |
| annotated_image, |
| ) |