| |
| |
| |
| |
| |
|
|
| import contextlib |
| import gzip |
| import os |
| import unittest |
| from typing import List |
|
|
| import numpy as np |
| import torch |
|
|
| from pytorch3d.implicitron.dataset import types |
| from pytorch3d.implicitron.dataset.dataset_base import FrameData |
| from pytorch3d.implicitron.dataset.frame_data import FrameDataBuilder |
| from pytorch3d.implicitron.dataset.utils import ( |
| get_bbox_from_mask, |
| load_16big_png_depth, |
| load_1bit_png_mask, |
| load_depth, |
| load_depth_mask, |
| load_image, |
| load_mask, |
| safe_as_tensor, |
| transpose_normalize_image, |
| ) |
| from pytorch3d.implicitron.tools.config import get_default_args |
| from pytorch3d.renderer.cameras import PerspectiveCameras |
|
|
| from tests.common_testing import TestCaseMixin |
| from tests.implicitron.common_resources import get_skateboard_data |
|
|
|
|
| class TestFrameDataBuilder(TestCaseMixin, unittest.TestCase): |
| def setUp(self): |
| torch.manual_seed(42) |
|
|
| category = "skateboard" |
| stack = contextlib.ExitStack() |
| self.dataset_root, self.path_manager = stack.enter_context( |
| get_skateboard_data() |
| ) |
| self.addCleanup(stack.close) |
| self.image_height = 768 |
| self.image_width = 512 |
|
|
| self.frame_data_builder = FrameDataBuilder( |
| image_height=self.image_height, |
| image_width=self.image_width, |
| dataset_root=self.dataset_root, |
| path_manager=self.path_manager, |
| ) |
|
|
| |
| frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz") |
| local_file = self.path_manager.get_local_path(frame_file) |
| with gzip.open(local_file, "rt", encoding="utf8") as zipfile: |
| frame_annots_list = types.load_dataclass( |
| zipfile, List[types.FrameAnnotation] |
| ) |
| self.frame_annotation = frame_annots_list[0] |
|
|
| sequence_annotations_file = os.path.join( |
| self.dataset_root, category, "sequence_annotations.jgz" |
| ) |
| local_file = self.path_manager.get_local_path(sequence_annotations_file) |
| with gzip.open(local_file, "rt", encoding="utf8") as zipfile: |
| seq_annots_list = types.load_dataclass( |
| zipfile, List[types.SequenceAnnotation] |
| ) |
| seq_annots = {entry.sequence_name: entry for entry in seq_annots_list} |
| self.seq_annotation = seq_annots[self.frame_annotation.sequence_name] |
|
|
| point_cloud = self.seq_annotation.point_cloud |
| self.frame_data = FrameData( |
| frame_number=safe_as_tensor(self.frame_annotation.frame_number, torch.long), |
| frame_timestamp=safe_as_tensor( |
| self.frame_annotation.frame_timestamp, torch.float |
| ), |
| sequence_name=self.frame_annotation.sequence_name, |
| sequence_category=self.seq_annotation.category, |
| camera_quality_score=safe_as_tensor( |
| self.seq_annotation.viewpoint_quality_score, torch.float |
| ), |
| point_cloud_quality_score=( |
| safe_as_tensor(point_cloud.quality_score, torch.float) |
| if point_cloud is not None |
| else None |
| ), |
| ) |
|
|
| def test_frame_data_builder_args(self): |
| |
| get_default_args(FrameDataBuilder) |
|
|
| def test_fix_point_cloud_path(self): |
| """Some files in Co3Dv2 have an accidental absolute path stored.""" |
| original_path = "some_file_path" |
| modified_path = self.frame_data_builder._fix_point_cloud_path(original_path) |
| self.assertIn(original_path, modified_path) |
| self.assertIn(self.frame_data_builder.dataset_root, modified_path) |
|
|
| def test_load_and_adjust_frame_data(self): |
| self.frame_data.image_size_hw = safe_as_tensor( |
| self.frame_annotation.image.size, torch.long |
| ) |
| self.frame_data.effective_image_size_hw = self.frame_data.image_size_hw |
|
|
| fg_mask_np, mask_path = self.frame_data_builder._load_fg_probability( |
| self.frame_annotation |
| ) |
| self.frame_data.mask_path = mask_path |
| self.frame_data.fg_probability = safe_as_tensor(fg_mask_np, torch.float) |
| mask_thr = self.frame_data_builder.box_crop_mask_thr |
| bbox_xywh = get_bbox_from_mask(fg_mask_np, mask_thr) |
| self.frame_data.bbox_xywh = safe_as_tensor(bbox_xywh, torch.long) |
|
|
| self.assertIsNotNone(self.frame_data.mask_path) |
| self.assertTrue(torch.is_tensor(self.frame_data.fg_probability)) |
| self.assertTrue(torch.is_tensor(self.frame_data.bbox_xywh)) |
| |
| self.assertEqual(self.frame_data.bbox_xywh.shape, torch.Size([4])) |
|
|
| image_path = os.path.join( |
| self.frame_data_builder.dataset_root, self.frame_annotation.image.path |
| ) |
| image_np = load_image(self.frame_data_builder._local_path(image_path)) |
| self.assertIsInstance(image_np, np.ndarray) |
| self.frame_data.image_rgb = self.frame_data_builder._postprocess_image( |
| image_np, self.frame_annotation.image.size, self.frame_data.fg_probability |
| ) |
| self.assertIsInstance(self.frame_data.image_rgb, torch.Tensor) |
|
|
| ( |
| self.frame_data.depth_map, |
| depth_path, |
| self.frame_data.depth_mask, |
| ) = self.frame_data_builder._load_mask_depth( |
| self.frame_annotation, |
| self.frame_data.fg_probability, |
| ) |
| self.assertTrue(torch.is_tensor(self.frame_data.depth_map)) |
| self.assertIsNotNone(depth_path) |
| self.assertTrue(torch.is_tensor(self.frame_data.depth_mask)) |
|
|
| new_size = (self.image_height, self.image_width) |
|
|
| if self.frame_data_builder.box_crop: |
| self.frame_data.crop_by_metadata_bbox_( |
| self.frame_data_builder.box_crop_context, |
| ) |
|
|
| |
| self.frame_data.resize_frame_( |
| new_size_hw=torch.tensor(new_size, dtype=torch.long), |
| ) |
| self.assertEqual( |
| self.frame_data.mask_crop.shape, |
| torch.Size([1, self.image_height, self.image_width]), |
| ) |
| self.assertEqual( |
| self.frame_data.image_rgb.shape, |
| torch.Size([3, self.image_height, self.image_width]), |
| ) |
| self.assertEqual( |
| self.frame_data.mask_crop.shape, |
| torch.Size([1, self.image_height, self.image_width]), |
| ) |
| self.assertEqual( |
| self.frame_data.fg_probability.shape, |
| torch.Size([1, self.image_height, self.image_width]), |
| ) |
| self.assertEqual( |
| self.frame_data.depth_map.shape, |
| torch.Size([1, self.image_height, self.image_width]), |
| ) |
| self.assertEqual( |
| self.frame_data.depth_mask.shape, |
| torch.Size([1, self.image_height, self.image_width]), |
| ) |
| self.frame_data.camera = self.frame_data_builder._get_pytorch3d_camera( |
| self.frame_annotation, |
| ) |
| self.assertEqual(type(self.frame_data.camera), PerspectiveCameras) |
|
|
| def test_transpose_normalize_image(self): |
| def inverse_transpose_normalize_image(image: np.ndarray) -> np.ndarray: |
| im = image * 255.0 |
| return im.transpose((1, 2, 0)).astype(np.uint8) |
|
|
| |
| input_image = np.array( |
| [[10, 20, 30], [40, 50, 60], [70, 80, 90]], dtype=np.uint8 |
| ) |
| expected_input = inverse_transpose_normalize_image( |
| transpose_normalize_image(input_image) |
| ) |
| self.assertClose(input_image[..., None], expected_input) |
|
|
| |
| input_image = np.array( |
| [ |
| [[10, 20, 30], [40, 50, 60], [70, 80, 90]], |
| [[100, 110, 120], [130, 140, 150], [160, 170, 180]], |
| [[190, 200, 210], [220, 230, 240], [250, 255, 255]], |
| ], |
| dtype=np.uint8, |
| ) |
| expected_input = inverse_transpose_normalize_image( |
| transpose_normalize_image(input_image) |
| ) |
| self.assertClose(input_image, expected_input) |
|
|
| def test_load_image(self): |
| path = os.path.join(self.dataset_root, self.frame_annotation.image.path) |
| local_path = self.path_manager.get_local_path(path) |
| image = load_image(local_path) |
| self.assertEqual(image.dtype, np.float32) |
| self.assertLessEqual(np.max(image), 1.0) |
| self.assertGreaterEqual(np.min(image), 0.0) |
|
|
| def test_load_mask(self): |
| path = os.path.join(self.dataset_root, self.frame_annotation.mask.path) |
| path = self.path_manager.get_local_path(path) |
| mask = load_mask(path) |
| self.assertEqual(mask.dtype, np.float32) |
| self.assertLessEqual(np.max(mask), 1.0) |
| self.assertGreaterEqual(np.min(mask), 0.0) |
|
|
| def test_load_depth(self): |
| path = os.path.join(self.dataset_root, self.frame_annotation.depth.path) |
| path = self.path_manager.get_local_path(path) |
| depth_map = load_depth(path, self.frame_annotation.depth.scale_adjustment) |
| self.assertEqual(depth_map.dtype, np.float32) |
| self.assertEqual(len(depth_map.shape), 3) |
|
|
| def test_load_16big_png_depth(self): |
| path = os.path.join(self.dataset_root, self.frame_annotation.depth.path) |
| path = self.path_manager.get_local_path(path) |
| depth_map = load_16big_png_depth(path) |
| self.assertEqual(depth_map.dtype, np.float32) |
| self.assertEqual(len(depth_map.shape), 2) |
|
|
| def test_load_1bit_png_mask(self): |
| mask_path = os.path.join( |
| self.dataset_root, self.frame_annotation.depth.mask_path |
| ) |
| mask_path = self.path_manager.get_local_path(mask_path) |
| mask = load_1bit_png_mask(mask_path) |
| self.assertEqual(mask.dtype, np.float32) |
| self.assertEqual(len(mask.shape), 2) |
|
|
| def test_load_depth_mask(self): |
| mask_path = os.path.join( |
| self.dataset_root, self.frame_annotation.depth.mask_path |
| ) |
| mask_path = self.path_manager.get_local_path(mask_path) |
| mask = load_depth_mask(mask_path) |
| self.assertEqual(mask.dtype, np.float32) |
| self.assertEqual(len(mask.shape), 3) |
|
|