Upload folder using huggingface_hub

6ed4a9c verified 4 months ago

16.6 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import copy
	from os.path import dirname, exists, join

	import numpy as np
	import torch
	from mmengine.config import Config
	from mmengine.dataset import pseudo_collate
	from mmengine.structures import InstanceData, PixelData

	from mmdet.utils.util_random import ensure_rng
	from ..registry import TASK_UTILS
	from ..structures import DetDataSample, TrackDataSample
	from ..structures.bbox import HorizontalBoxes


	def _get_config_directory():
	"""Find the predefined detector config directory."""
	try:
	# Assume we are running in the source mmdetection repo
	repo_dpath = dirname(dirname(dirname(__file__)))
	except NameError:
	# For IPython development when this __file__ is not defined
	import mmdet
	repo_dpath = dirname(dirname(mmdet.__file__))
	config_dpath = join(repo_dpath, 'configs')
	if not exists(config_dpath):
	raise Exception('Cannot find config path')
	return config_dpath


	def _get_config_module(fname):
	"""Load a configuration as a python module."""
	config_dpath = _get_config_directory()
	config_fpath = join(config_dpath, fname)
	config_mod = Config.fromfile(config_fpath)
	return config_mod


	def get_detector_cfg(fname):
	"""Grab configs necessary to create a detector.

	These are deep copied to allow for safe modification of parameters without
	influencing other tests.
	"""
	config = _get_config_module(fname)
	model = copy.deepcopy(config.model)
	return model


	def get_roi_head_cfg(fname):
	"""Grab configs necessary to create a roi_head.

	These are deep copied to allow for safe modification of parameters without
	influencing other tests.
	"""
	config = _get_config_module(fname)
	model = copy.deepcopy(config.model)

	roi_head = model.roi_head
	train_cfg = None if model.train_cfg is None else model.train_cfg.rcnn
	test_cfg = None if model.test_cfg is None else model.test_cfg.rcnn
	roi_head.update(dict(train_cfg=train_cfg, test_cfg=test_cfg))
	return roi_head


	def _rand_bboxes(rng, num_boxes, w, h):
	cx, cy, bw, bh = rng.rand(num_boxes, 4).T

	tl_x = ((cx * w) - (w * bw / 2)).clip(0, w)
	tl_y = ((cy * h) - (h * bh / 2)).clip(0, h)
	br_x = ((cx * w) + (w * bw / 2)).clip(0, w)
	br_y = ((cy * h) + (h * bh / 2)).clip(0, h)

	bboxes = np.vstack([tl_x, tl_y, br_x, br_y]).T
	return bboxes


	def _rand_masks(rng, num_boxes, bboxes, img_w, img_h):
	from mmdet.structures.mask import BitmapMasks
	masks = np.zeros((num_boxes, img_h, img_w))
	for i, bbox in enumerate(bboxes):
	bbox = bbox.astype(np.int32)
	mask = (rng.rand(1, bbox[3] - bbox[1], bbox[2] - bbox[0]) >
	0.3).astype(np.int64)
	masks[i:i + 1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = mask
	return BitmapMasks(masks, height=img_h, width=img_w)


	def demo_mm_inputs(batch_size=2,
	image_shapes=(3, 128, 128),
	num_items=None,
	num_classes=10,
	sem_seg_output_strides=1,
	with_mask=False,
	with_semantic=False,
	use_box_type=False,
	device='cpu',
	texts=None,
	custom_entities=False):
	"""Create a superset of inputs needed to run test or train batches.

	Args:
	batch_size (int): batch size. Defaults to 2.
	image_shapes (List[tuple], Optional): image shape.
	Defaults to (3, 128, 128)
	num_items (None \| List[int]): specifies the number
	of boxes in each batch item. Default to None.
	num_classes (int): number of different labels a
	box might have. Defaults to 10.
	with_mask (bool): Whether to return mask annotation.
	Defaults to False.
	with_semantic (bool): whether to return semantic.
	Defaults to False.
	device (str): Destination device type. Defaults to cpu.
	"""
	rng = np.random.RandomState(0)

	if isinstance(image_shapes, list):
	assert len(image_shapes) == batch_size
	else:
	image_shapes = [image_shapes] * batch_size

	if isinstance(num_items, list):
	assert len(num_items) == batch_size

	if texts is not None:
	assert batch_size == len(texts)

	packed_inputs = []
	for idx in range(batch_size):
	image_shape = image_shapes[idx]
	c, h, w = image_shape

	image = rng.randint(0, 255, size=image_shape, dtype=np.uint8)

	mm_inputs = dict()
	mm_inputs['inputs'] = torch.from_numpy(image).to(device)

	img_meta = {
	'img_id': idx,
	'img_shape': image_shape[1:],
	'ori_shape': image_shape[1:],
	'filename': '<demo>.png',
	'scale_factor': np.array([1.1, 1.2]),
	'flip': False,
	'flip_direction': None,
	'border': [1, 1, 1, 1] # Only used by CenterNet
	}

	if texts:
	img_meta['text'] = texts[idx]
	img_meta['custom_entities'] = custom_entities

	data_sample = DetDataSample()
	data_sample.set_metainfo(img_meta)

	# gt_instances
	gt_instances = InstanceData()
	if num_items is None:
	num_boxes = rng.randint(1, 10)
	else:
	num_boxes = num_items[idx]

	bboxes = _rand_bboxes(rng, num_boxes, w, h)
	labels = rng.randint(1, num_classes, size=num_boxes)
	# TODO: remove this part when all model adapted with BaseBoxes
	if use_box_type:
	gt_instances.bboxes = HorizontalBoxes(bboxes, dtype=torch.float32)
	else:
	gt_instances.bboxes = torch.FloatTensor(bboxes)
	gt_instances.labels = torch.LongTensor(labels)

	if with_mask:
	masks = _rand_masks(rng, num_boxes, bboxes, w, h)
	gt_instances.masks = masks

	# TODO: waiting for ci to be fixed
	# masks = np.random.randint(0, 2, (len(bboxes), h, w), dtype=np.uint8)
	# gt_instances.mask = BitmapMasks(masks, h, w)

	data_sample.gt_instances = gt_instances

	# ignore_instances
	ignore_instances = InstanceData()
	bboxes = _rand_bboxes(rng, num_boxes, w, h)
	if use_box_type:
	ignore_instances.bboxes = HorizontalBoxes(
	bboxes, dtype=torch.float32)
	else:
	ignore_instances.bboxes = torch.FloatTensor(bboxes)
	data_sample.ignored_instances = ignore_instances

	# gt_sem_seg
	if with_semantic:
	# assume gt_semantic_seg using scale 1/8 of the img
	gt_semantic_seg = torch.from_numpy(
	np.random.randint(
	0,
	num_classes, (1, h // sem_seg_output_strides,
	w // sem_seg_output_strides),
	dtype=np.uint8))
	gt_sem_seg_data = dict(sem_seg=gt_semantic_seg)
	data_sample.gt_sem_seg = PixelData(**gt_sem_seg_data)

	mm_inputs['data_samples'] = data_sample.to(device)

	# TODO: gt_ignore

	packed_inputs.append(mm_inputs)
	data = pseudo_collate(packed_inputs)
	return data


	def demo_mm_proposals(image_shapes, num_proposals, device='cpu'):
	"""Create a list of fake porposals.

	Args:
	image_shapes (list[tuple[int]]): Batch image shapes.
	num_proposals (int): The number of fake proposals.
	"""
	rng = np.random.RandomState(0)

	results = []
	for img_shape in image_shapes:
	result = InstanceData()
	w, h = img_shape[1:]
	proposals = _rand_bboxes(rng, num_proposals, w, h)
	result.bboxes = torch.from_numpy(proposals).float()
	result.scores = torch.from_numpy(rng.rand(num_proposals)).float()
	result.labels = torch.zeros(num_proposals).long()
	results.append(result.to(device))
	return results


	def demo_mm_sampling_results(proposals_list,
	batch_gt_instances,
	batch_gt_instances_ignore=None,
	assigner_cfg=None,
	sampler_cfg=None,
	feats=None):
	"""Create sample results that can be passed to BBoxHead.get_targets."""
	assert len(proposals_list) == len(batch_gt_instances)
	if batch_gt_instances_ignore is None:
	batch_gt_instances_ignore = [None for _ in batch_gt_instances]
	else:
	assert len(batch_gt_instances_ignore) == len(batch_gt_instances)

	default_assigner_cfg = dict(
	type='MaxIoUAssigner',
	pos_iou_thr=0.5,
	neg_iou_thr=0.5,
	min_pos_iou=0.5,
	ignore_iof_thr=-1)
	assigner_cfg = assigner_cfg if assigner_cfg is not None \
	else default_assigner_cfg
	default_sampler_cfg = dict(
	type='RandomSampler',
	num=512,
	pos_fraction=0.25,
	neg_pos_ub=-1,
	add_gt_as_proposals=True)
	sampler_cfg = sampler_cfg if sampler_cfg is not None \
	else default_sampler_cfg
	bbox_assigner = TASK_UTILS.build(assigner_cfg)
	bbox_sampler = TASK_UTILS.build(sampler_cfg)

	sampling_results = []
	for i in range(len(batch_gt_instances)):
	if feats is not None:
	feats = [lvl_feat[i][None] for lvl_feat in feats]
	# rename proposals.bboxes to proposals.priors
	proposals = proposals_list[i]
	proposals.priors = proposals.pop('bboxes')

	assign_result = bbox_assigner.assign(proposals, batch_gt_instances[i],
	batch_gt_instances_ignore[i])
	sampling_result = bbox_sampler.sample(
	assign_result, proposals, batch_gt_instances[i], feats=feats)
	sampling_results.append(sampling_result)

	return sampling_results


	def demo_track_inputs(batch_size=1,
	num_frames=2,
	key_frames_inds=None,
	image_shapes=(3, 128, 128),
	num_items=None,
	num_classes=1,
	with_mask=False,
	with_semantic=False):
	"""Create a superset of inputs needed to run test or train batches.

	Args:
	batch_size (int): batch size. Default to 1.
	num_frames (int): The number of frames.
	key_frames_inds (List): The indices of key frames.
	image_shapes (List[tuple], Optional): image shape.
	Default to (3, 128, 128)
	num_items (None \| List[int]): specifies the number
	of boxes in each batch item. Default to None.
	num_classes (int): number of different labels a
	box might have. Default to 1.
	with_mask (bool): Whether to return mask annotation.
	Defaults to False.
	with_semantic (bool): whether to return semantic.
	Default to False.
	"""
	rng = np.random.RandomState(0)

	# Make sure the length of image_shapes is equal to ``batch_size``
	if isinstance(image_shapes, list):
	assert len(image_shapes) == batch_size
	else:
	image_shapes = [image_shapes] * batch_size

	packed_inputs = []
	for idx in range(batch_size):
	mm_inputs = dict(inputs=dict())
	_, h, w = image_shapes[idx]

	imgs = rng.randint(
	0, 255, size=(num_frames, *image_shapes[idx]), dtype=np.uint8)
	mm_inputs['inputs'] = torch.from_numpy(imgs)

	img_meta = {
	'img_id': idx,
	'img_shape': image_shapes[idx][-2:],
	'ori_shape': image_shapes[idx][-2:],
	'filename': '<demo>.png',
	'scale_factor': np.array([1.1, 1.2]),
	'flip': False,
	'flip_direction': None,
	'is_video_data': True,
	}

	video_data_samples = []
	for i in range(num_frames):
	data_sample = DetDataSample()
	img_meta['frame_id'] = i
	data_sample.set_metainfo(img_meta)

	# gt_instances
	gt_instances = InstanceData()
	if num_items is None:
	num_boxes = rng.randint(1, 10)
	else:
	num_boxes = num_items[idx]

	bboxes = _rand_bboxes(rng, num_boxes, w, h)
	labels = rng.randint(0, num_classes, size=num_boxes)
	instances_id = rng.randint(100, num_classes + 100, size=num_boxes)
	gt_instances.bboxes = torch.FloatTensor(bboxes)
	gt_instances.labels = torch.LongTensor(labels)
	gt_instances.instances_ids = torch.LongTensor(instances_id)

	if with_mask:
	masks = _rand_masks(rng, num_boxes, bboxes, w, h)
	gt_instances.masks = masks

	data_sample.gt_instances = gt_instances
	# ignore_instances
	ignore_instances = InstanceData()
	bboxes = _rand_bboxes(rng, num_boxes, w, h)
	ignore_instances.bboxes = bboxes
	data_sample.ignored_instances = ignore_instances

	video_data_samples.append(data_sample)

	track_data_sample = TrackDataSample()
	track_data_sample.video_data_samples = video_data_samples
	if key_frames_inds is not None:
	assert isinstance(
	key_frames_inds,
	list) and len(key_frames_inds) < num_frames and max(
	key_frames_inds) < num_frames
	ref_frames_inds = [
	i for i in range(num_frames) if i not in key_frames_inds
	]
	track_data_sample.set_metainfo(
	dict(key_frames_inds=key_frames_inds))
	track_data_sample.set_metainfo(
	dict(ref_frames_inds=ref_frames_inds))
	mm_inputs['data_samples'] = track_data_sample

	# TODO: gt_ignore
	packed_inputs.append(mm_inputs)
	data = pseudo_collate(packed_inputs)
	return data


	def random_boxes(num=1, scale=1, rng=None):
	"""Simple version of ``kwimage.Boxes.random``
	Returns:
	Tensor: shape (n, 4) in x1, y1, x2, y2 format.
	References:
	https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390 # noqa: E501
	Example:
	>>> num = 3
	>>> scale = 512
	>>> rng = 0
	>>> boxes = random_boxes(num, scale, rng)
	>>> print(boxes)
	tensor([[280.9925, 278.9802, 308.6148, 366.1769],
	[216.9113, 330.6978, 224.0446, 456.5878],
	[405.3632, 196.3221, 493.3953, 270.7942]])
	"""
	rng = ensure_rng(rng)

	tlbr = rng.rand(num, 4).astype(np.float32)

	tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2])
	tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3])
	br_x = np.maximum(tlbr[:, 0], tlbr[:, 2])
	br_y = np.maximum(tlbr[:, 1], tlbr[:, 3])

	tlbr[:, 0] = tl_x * scale
	tlbr[:, 1] = tl_y * scale
	tlbr[:, 2] = br_x * scale
	tlbr[:, 3] = br_y * scale

	boxes = torch.from_numpy(tlbr)
	return boxes


	# TODO: Support full ceph
	def replace_to_ceph(cfg):
	backend_args = dict(
	backend='petrel',
	path_mapping=dict({
	'./data/': 's3://openmmlab/datasets/detection/',
	'data/': 's3://openmmlab/datasets/detection/'
	}))

	# TODO: name is a reserved interface, which will be used later.
	def _process_pipeline(dataset, name):

	def replace_img(pipeline):
	if pipeline['type'] == 'LoadImageFromFile':
	pipeline['backend_args'] = backend_args

	def replace_ann(pipeline):
	if pipeline['type'] == 'LoadAnnotations' or pipeline[
	'type'] == 'LoadPanopticAnnotations':
	pipeline['backend_args'] = backend_args

	if 'pipeline' in dataset:
	replace_img(dataset.pipeline[0])
	replace_ann(dataset.pipeline[1])
	if 'dataset' in dataset:
	# dataset wrapper
	replace_img(dataset.dataset.pipeline[0])
	replace_ann(dataset.dataset.pipeline[1])
	else:
	# dataset wrapper
	replace_img(dataset.dataset.pipeline[0])
	replace_ann(dataset.dataset.pipeline[1])

	def _process_evaluator(evaluator, name):
	if evaluator['type'] == 'CocoPanopticMetric':
	evaluator['backend_args'] = backend_args

	# half ceph
	_process_pipeline(cfg.train_dataloader.dataset, cfg.filename)
	_process_pipeline(cfg.val_dataloader.dataset, cfg.filename)
	_process_pipeline(cfg.test_dataloader.dataset, cfg.filename)
	_process_evaluator(cfg.val_evaluator, cfg.filename)
	_process_evaluator(cfg.test_evaluator, cfg.filename)