| | |
| | import argparse |
| | import os.path as osp |
| |
|
| | import numpy as np |
| | from mmengine.fileio import dump, load |
| | from mmengine.utils import mkdir_or_exist, track_parallel_progress |
| |
|
| | prog_description = '''K-Fold coco split. |
| | |
| | To split coco data for semi-supervised object detection: |
| | python tools/misc/split_coco.py |
| | ''' |
| |
|
| |
|
| | def parse_args(): |
| | parser = argparse.ArgumentParser() |
| | parser.add_argument( |
| | '--data-root', |
| | type=str, |
| | help='The data root of coco dataset.', |
| | default='./data/coco/') |
| | parser.add_argument( |
| | '--out-dir', |
| | type=str, |
| | help='The output directory of coco semi-supervised annotations.', |
| | default='./data/coco/semi_anns/') |
| | parser.add_argument( |
| | '--labeled-percent', |
| | type=float, |
| | nargs='+', |
| | help='The percentage of labeled data in the training set.', |
| | default=[1, 2, 5, 10]) |
| | parser.add_argument( |
| | '--fold', |
| | type=int, |
| | help='K-fold cross validation for semi-supervised object detection.', |
| | default=5) |
| | args = parser.parse_args() |
| | return args |
| |
|
| |
|
| | def split_coco(data_root, out_dir, percent, fold): |
| | """Split COCO data for Semi-supervised object detection. |
| | |
| | Args: |
| | data_root (str): The data root of coco dataset. |
| | out_dir (str): The output directory of coco semi-supervised |
| | annotations. |
| | percent (float): The percentage of labeled data in the training set. |
| | fold (int): The fold of dataset and set as random seed for data split. |
| | """ |
| |
|
| | def save_anns(name, images, annotations): |
| | sub_anns = dict() |
| | sub_anns['images'] = images |
| | sub_anns['annotations'] = annotations |
| | sub_anns['licenses'] = anns['licenses'] |
| | sub_anns['categories'] = anns['categories'] |
| | sub_anns['info'] = anns['info'] |
| |
|
| | mkdir_or_exist(out_dir) |
| | dump(sub_anns, f'{out_dir}/{name}.json') |
| |
|
| | |
| | np.random.seed(fold) |
| | ann_file = osp.join(data_root, 'annotations/instances_train2017.json') |
| | anns = load(ann_file) |
| |
|
| | image_list = anns['images'] |
| | labeled_total = int(percent / 100. * len(image_list)) |
| | labeled_inds = set( |
| | np.random.choice(range(len(image_list)), size=labeled_total)) |
| | labeled_ids, labeled_images, unlabeled_images = [], [], [] |
| |
|
| | for i in range(len(image_list)): |
| | if i in labeled_inds: |
| | labeled_images.append(image_list[i]) |
| | labeled_ids.append(image_list[i]['id']) |
| | else: |
| | unlabeled_images.append(image_list[i]) |
| |
|
| | |
| | labeled_ids = set(labeled_ids) |
| | labeled_annotations, unlabeled_annotations = [], [] |
| |
|
| | for ann in anns['annotations']: |
| | if ann['image_id'] in labeled_ids: |
| | labeled_annotations.append(ann) |
| | else: |
| | unlabeled_annotations.append(ann) |
| |
|
| | |
| | labeled_name = f'instances_train2017.{fold}@{percent}' |
| | unlabeled_name = f'instances_train2017.{fold}@{percent}-unlabeled' |
| |
|
| | save_anns(labeled_name, labeled_images, labeled_annotations) |
| | save_anns(unlabeled_name, unlabeled_images, unlabeled_annotations) |
| |
|
| |
|
| | def multi_wrapper(args): |
| | return split_coco(*args) |
| |
|
| |
|
| | if __name__ == '__main__': |
| | args = parse_args() |
| | arguments_list = [(args.data_root, args.out_dir, p, f) |
| | for f in range(1, args.fold + 1) |
| | for p in args.labeled_percent] |
| | track_parallel_progress(multi_wrapper, arguments_list, args.fold) |
| |
|