| """This module contains simple helper functions and classes for preprocessing """ |
| import numpy as np |
| import cv2 |
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| from pytorch3d.renderer import ( |
| SfMPerspectiveCameras, |
| RasterizationSettings, |
| MeshRenderer, |
| MeshRasterizer, |
| SoftPhongShader, |
| PointLights, |
| ) |
| from pytorch3d.structures import Meshes |
| from pytorch3d.renderer.mesh import Textures |
| DEFAULT_DTYPE = torch.float32 |
| INVALID_TRANS=np.ones(3)*-1 |
|
|
| def smpl_to_pose(model_type='smplx', use_hands=True, use_face=True, |
| use_face_contour=False, openpose_format='coco25'): |
| ''' Returns the indices of the permutation that maps OpenPose to SMPL |
| Parameters |
| ---------- |
| model_type: str, optional |
| The type of SMPL-like model that is used. The default mapping |
| returned is for the SMPLX model |
| use_hands: bool, optional |
| Flag for adding to the returned permutation the mapping for the |
| hand keypoints. Defaults to True |
| use_face: bool, optional |
| Flag for adding to the returned permutation the mapping for the |
| face keypoints. Defaults to True |
| use_face_contour: bool, optional |
| Flag for appending the facial contour keypoints. Defaults to False |
| openpose_format: bool, optional |
| The output format of OpenPose. For now only COCO-25 and COCO-19 is |
| supported. Defaults to 'coco25' |
| ''' |
| if openpose_format.lower() == 'coco25': |
| if model_type == 'smpl': |
| return np.array([24, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, 8, 1, 4, |
| 7, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], |
| dtype=np.int32) |
| elif model_type == 'smplh': |
| body_mapping = np.array([52, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, |
| 8, 1, 4, 7, 53, 54, 55, 56, 57, 58, 59, |
| 60, 61, 62], dtype=np.int32) |
| mapping = [body_mapping] |
| if use_hands: |
| lhand_mapping = np.array([20, 34, 35, 36, 63, 22, 23, 24, 64, |
| 25, 26, 27, 65, 31, 32, 33, 66, 28, |
| 29, 30, 67], dtype=np.int32) |
| rhand_mapping = np.array([21, 49, 50, 51, 68, 37, 38, 39, 69, |
| 40, 41, 42, 70, 46, 47, 48, 71, 43, |
| 44, 45, 72], dtype=np.int32) |
| mapping += [lhand_mapping, rhand_mapping] |
| return np.concatenate(mapping) |
| |
| elif model_type == 'smplx': |
| body_mapping = np.array([55, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, |
| 8, 1, 4, 7, 56, 57, 58, 59, 60, 61, 62, |
| 63, 64, 65], dtype=np.int32) |
| mapping = [body_mapping] |
| if use_hands: |
| lhand_mapping = np.array([20, 37, 38, 39, 66, 25, 26, 27, |
| 67, 28, 29, 30, 68, 34, 35, 36, 69, |
| 31, 32, 33, 70], dtype=np.int32) |
| rhand_mapping = np.array([21, 52, 53, 54, 71, 40, 41, 42, 72, |
| 43, 44, 45, 73, 49, 50, 51, 74, 46, |
| 47, 48, 75], dtype=np.int32) |
|
|
| mapping += [lhand_mapping, rhand_mapping] |
|
|
| if use_face: |
| |
| face_mapping = np.arange(76, 127 + 17 * use_face_contour, |
| dtype=np.int32) |
| mapping += [face_mapping] |
|
|
| return np.concatenate(mapping) |
| else: |
| raise ValueError('Unknown model type: {}'.format(model_type)) |
| elif openpose_format == 'coco19': |
| if model_type == 'smpl': |
| return np.array([24, 12, 17, 19, 21, 16, 18, 20, 2, 5, 8, |
| 1, 4, 7, 25, 26, 27, 28], |
| dtype=np.int32) |
| elif model_type == 'smpl_neutral': |
| return np.array([14, 12, 8, 7, 6, 9, 10, 11, 2, 1, 0, 3, 4, 5, 16, 15,18, 17,], |
| dtype=np.int32) |
|
|
| elif model_type == 'smplh': |
| body_mapping = np.array([52, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, |
| 8, 1, 4, 7, 53, 54, 55, 56], |
| dtype=np.int32) |
| mapping = [body_mapping] |
| if use_hands: |
| lhand_mapping = np.array([20, 34, 35, 36, 57, 22, 23, 24, 58, |
| 25, 26, 27, 59, 31, 32, 33, 60, 28, |
| 29, 30, 61], dtype=np.int32) |
| rhand_mapping = np.array([21, 49, 50, 51, 62, 37, 38, 39, 63, |
| 40, 41, 42, 64, 46, 47, 48, 65, 43, |
| 44, 45, 66], dtype=np.int32) |
| mapping += [lhand_mapping, rhand_mapping] |
| return np.concatenate(mapping) |
| |
| elif model_type == 'smplx': |
| body_mapping = np.array([55, 12, 17, 19, 21, 16, 18, 20, 0, 2, 5, |
| 8, 1, 4, 7, 56, 57, 58, 59], |
| dtype=np.int32) |
| mapping = [body_mapping] |
| if use_hands: |
| lhand_mapping = np.array([20, 37, 38, 39, 60, 25, 26, 27, |
| 61, 28, 29, 30, 62, 34, 35, 36, 63, |
| 31, 32, 33, 64], dtype=np.int32) |
| rhand_mapping = np.array([21, 52, 53, 54, 65, 40, 41, 42, 66, |
| 43, 44, 45, 67, 49, 50, 51, 68, 46, |
| 47, 48, 69], dtype=np.int32) |
|
|
| mapping += [lhand_mapping, rhand_mapping] |
| if use_face: |
| face_mapping = np.arange(70, 70 + 51 + |
| 17 * use_face_contour, |
| dtype=np.int32) |
| mapping += [face_mapping] |
|
|
| return np.concatenate(mapping) |
| else: |
| raise ValueError('Unknown model type: {}'.format(model_type)) |
| elif openpose_format == 'h36': |
| if model_type == 'smpl': |
| return np.array([2,5,8,1,4,7,12,24,16,18,20,17,19,21],dtype=np.int32) |
| elif model_type == 'smpl_neutral': |
| |
| return [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10] |
|
|
| else: |
| raise ValueError('Unknown joint format: {}'.format(openpose_format)) |
|
|
| def render_trimesh(renderer,mesh,R,T, mode='np'): |
| |
| verts = torch.tensor(mesh.vertices).cuda().float()[None] |
| faces = torch.tensor(mesh.faces).cuda()[None] |
| colors = torch.tensor(mesh.visual.vertex_colors).float().cuda()[None,...,:3]/255 |
| renderer.set_camera(R,T) |
| image = renderer.render_mesh_recon(verts, faces, colors=colors, mode=mode)[0] |
| image = (255*image).data.cpu().numpy().astype(np.uint8) |
| |
| return image |
|
|
| def estimate_translation_cv2(joints_3d, joints_2d, focal_length=600, img_size=np.array([512.,512.]), proj_mat=None, cam_dist=None): |
| if proj_mat is None: |
| camK = np.eye(3) |
| camK[0,0], camK[1,1] = focal_length, focal_length |
| camK[:2,2] = img_size//2 |
| else: |
| camK = proj_mat |
| _, _, tvec,inliers = cv2.solvePnPRansac(joints_3d, joints_2d, camK, cam_dist,\ |
| flags=cv2.SOLVEPNP_EPNP,reprojectionError=20,iterationsCount=100) |
|
|
| if inliers is None: |
| return INVALID_TRANS |
| else: |
| tra_pred = tvec[:,0] |
| return tra_pred |
|
|
| class JointMapper(nn.Module): |
| def __init__(self, joint_maps=None): |
| super(JointMapper, self).__init__() |
| if joint_maps is None: |
| self.joint_maps = joint_maps |
| else: |
| self.register_buffer('joint_maps', |
| torch.tensor(joint_maps, dtype=torch.long)) |
|
|
| def forward(self, joints, **kwargs): |
| if self.joint_maps is None: |
| return joints |
| else: |
| return torch.index_select(joints, 1, self.joint_maps) |
|
|
| def transform_mat(R, t): |
| ''' Creates a batch of transformation matrices |
| Args: |
| - R: Bx3x3 array of a batch of rotation matrices |
| - t: Bx3x1 array of a batch of translation vectors |
| Returns: |
| - T: Bx4x4 Transformation matrix |
| ''' |
| |
| return torch.cat([F.pad(R, [0, 0, 0, 1]), |
| F.pad(t, [0, 0, 0, 1], value=1)], dim=2) |
|
|
| |
| def transform_smpl(curr_extrinsic, target_extrinsic, smpl_pose, smpl_trans, T_hip): |
| |
| R_root = cv2.Rodrigues(smpl_pose[:3])[0] |
| transf_global_ori = np.linalg.inv(target_extrinsic[:3,:3]) @ curr_extrinsic[:3,:3] @ R_root |
| |
| target_extrinsic[:3, -1] = curr_extrinsic[:3,:3] @ (smpl_trans + T_hip) + curr_extrinsic[:3, -1] - smpl_trans - target_extrinsic[:3,:3] @ T_hip |
|
|
| smpl_pose[:3] = cv2.Rodrigues(transf_global_ori)[0].reshape(3) |
| smpl_trans = np.linalg.inv(target_extrinsic[:3,:3]) @ smpl_trans |
|
|
| return target_extrinsic, smpl_pose, smpl_trans |
|
|
| class GMoF(nn.Module): |
| def __init__(self, rho=1): |
| super(GMoF, self).__init__() |
| self.rho = rho |
|
|
| def extra_repr(self): |
| return 'rho = {}'.format(self.rho) |
|
|
| def forward(self, residual): |
| squared_res = residual ** 2 |
| dist = torch.div(squared_res, squared_res + self.rho ** 2) |
| return self.rho ** 2 * dist |
|
|
| class PerspectiveCamera(nn.Module): |
|
|
| FOCAL_LENGTH = 50*128 |
|
|
| def __init__(self, rotation=None, translation=None, |
| focal_length_x=None, focal_length_y=None, |
| batch_size=1, |
| center=None, dtype=torch.float32): |
| super(PerspectiveCamera, self).__init__() |
| self.batch_size = batch_size |
| self.dtype = dtype |
| |
| |
| self.register_buffer('zero', |
| torch.zeros([batch_size], dtype=dtype)) |
|
|
| if focal_length_x is None or type(focal_length_x) == float: |
| focal_length_x = torch.full( |
| [batch_size], |
| self.FOCAL_LENGTH if focal_length_x is None else |
| focal_length_x, |
| dtype=dtype) |
|
|
| if focal_length_y is None or type(focal_length_y) == float: |
| focal_length_y = torch.full( |
| [batch_size], |
| self.FOCAL_LENGTH if focal_length_y is None else |
| focal_length_y, |
| dtype=dtype) |
|
|
| self.register_buffer('focal_length_x', focal_length_x) |
| self.register_buffer('focal_length_y', focal_length_y) |
|
|
| if center is None: |
| center = torch.zeros([batch_size, 2], dtype=dtype) |
| self.register_buffer('center', center) |
|
|
| if rotation is None: |
| rotation = torch.eye( |
| 3, dtype=dtype).unsqueeze(dim=0).repeat(batch_size, 1, 1) |
|
|
| rotation = nn.Parameter(rotation, requires_grad=False) |
| self.register_parameter('rotation', rotation) |
|
|
| if translation is None: |
| translation = torch.zeros([batch_size, 3], dtype=dtype) |
|
|
| translation = nn.Parameter(translation, |
| requires_grad=True) |
| self.register_parameter('translation', translation) |
|
|
| def forward(self, points): |
| device = points.device |
| with torch.no_grad(): |
| camera_mat = torch.zeros([self.batch_size, 2, 2], |
| dtype=self.dtype, device=points.device) |
| camera_mat[:, 0, 0] = self.focal_length_x |
| camera_mat[:, 1, 1] = self.focal_length_y |
|
|
| camera_transform = transform_mat(self.rotation, |
| self.translation.unsqueeze(dim=-1)) |
|
|
| homog_coord = torch.ones(list(points.shape)[:-1] + [1], |
| dtype=points.dtype, |
| device=device) |
| |
| points_h = torch.cat([points, homog_coord], dim=-1) |
|
|
| projected_points = torch.einsum('bki,bji->bjk', |
| [camera_transform, points_h]) |
|
|
| img_points = torch.div(projected_points[:, :, :2], |
| projected_points[:, :, 2].unsqueeze(dim=-1)) |
| img_points = torch.einsum('bki,bji->bjk', [camera_mat, img_points]) \ |
| + self.center.unsqueeze(dim=1) |
| return img_points |
| |
| class Renderer(): |
| |
| def __init__(self, principal_point=None, img_size=None, cam_intrinsic = None): |
| |
| super().__init__() |
|
|
| self.device = torch.device("cuda:0") |
| torch.cuda.set_device(self.device) |
| self.cam_intrinsic = cam_intrinsic |
| self.image_size = img_size |
| self.render_img_size = np.max(img_size) |
|
|
| principal_point = [-(self.cam_intrinsic[0,2]-self.image_size[1]/2.)/(self.image_size[1]/2.), -(self.cam_intrinsic[1,2]-self.image_size[0]/2.)/(self.image_size[0]/2.)] |
| self.principal_point = torch.tensor(principal_point, device=self.device).unsqueeze(0) |
|
|
| self.cam_R = torch.from_numpy(np.array([[-1., 0., 0.], |
| [0., -1., 0.], |
| [0., 0., 1.]])).cuda().float().unsqueeze(0) |
|
|
| self.cam_T = torch.zeros((1,3)).cuda().float() |
|
|
| half_max_length = max(self.cam_intrinsic[0:2,2]) |
| self.focal_length = torch.tensor([(self.cam_intrinsic[0,0]/half_max_length).astype(np.float32), \ |
| (self.cam_intrinsic[1,1]/half_max_length).astype(np.float32)]).unsqueeze(0) |
| |
| self.cameras = SfMPerspectiveCameras(focal_length=self.focal_length, principal_point=self.principal_point, R=self.cam_R, T=self.cam_T, device=self.device) |
|
|
| self.lights = PointLights(device=self.device,location=[[0.0, 0.0, 0.0]], ambient_color=((1,1,1),),diffuse_color=((0,0,0),),specular_color=((0,0,0),)) |
|
|
| self.raster_settings = RasterizationSettings(image_size=self.render_img_size, faces_per_pixel=10, blur_radius=0, max_faces_per_bin=30000) |
| self.rasterizer = MeshRasterizer(cameras=self.cameras, raster_settings=self.raster_settings) |
|
|
| self.shader = SoftPhongShader(device=self.device, cameras=self.cameras, lights=self.lights) |
|
|
| self.renderer = MeshRenderer(rasterizer=self.rasterizer, shader=self.shader) |
| |
| def set_camera(self, R, T): |
| self.cam_R = R |
| self.cam_T = T |
| self.cam_R[:, :2, :] *= -1.0 |
| self.cam_T[:, :2] *= -1.0 |
| self.cam_R = torch.transpose(self.cam_R,1,2) |
| self.cameras = SfMPerspectiveCameras(focal_length=self.focal_length, principal_point=self.principal_point, R=self.cam_R, T=self.cam_T, device=self.device) |
| self.rasterizer = MeshRasterizer(cameras=self.cameras, raster_settings=self.raster_settings) |
| self.shader = SoftPhongShader(device=self.device, cameras=self.cameras, lights=self.lights) |
| self.renderer = MeshRenderer(rasterizer=self.rasterizer, shader=self.shader) |
|
|
| def render_mesh_recon(self, verts, faces, R=None, T=None, colors=None, mode='npat'): |
| ''' |
| mode: normal, phong, texture |
| ''' |
| with torch.no_grad(): |
|
|
| mesh = Meshes(verts, faces) |
|
|
| normals = torch.stack(mesh.verts_normals_list()) |
| front_light = -torch.tensor([0,0,-1]).float().to(verts.device) |
| shades = (normals * front_light.view(1,1,3)).sum(-1).clamp(min=0).unsqueeze(-1).expand(-1,-1,3) |
| results = [] |
| |
| if 'p' in mode: |
| mesh_shading = Meshes(verts, faces, textures=Textures(verts_rgb=shades)) |
| image_phong = self.renderer(mesh_shading) |
| results.append(image_phong) |
| |
| if 'n' in mode: |
| normals_vis = normals* 0.5 + 0.5 |
| normals_vis = normals_vis[:,:,[2,1,0]] |
| mesh_normal = Meshes(verts, faces, textures=Textures(verts_rgb=normals_vis)) |
| image_normal = self.renderer(mesh_normal) |
| results.append(image_normal) |
| return torch.cat(results, axis=1) |
|
|
|
|