|
|
|
|
| import random
|
| from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple
|
| import torch
|
| from torch import nn
|
|
|
| SampledData = Any
|
| ModelOutput = Any
|
|
|
|
|
| def _grouper(iterable: Iterable[Any], n: int, fillvalue=None) -> Iterator[Tuple[Any]]:
|
| """
|
| Group elements of an iterable by chunks of size `n`, e.g.
|
| grouper(range(9), 4) ->
|
| (0, 1, 2, 3), (4, 5, 6, 7), (8, None, None, None)
|
| """
|
| it = iter(iterable)
|
| while True:
|
| values = []
|
| for _ in range(n):
|
| try:
|
| value = next(it)
|
| except StopIteration:
|
| if values:
|
| values.extend([fillvalue] * (n - len(values)))
|
| yield tuple(values)
|
| return
|
| values.append(value)
|
| yield tuple(values)
|
|
|
|
|
| class ScoreBasedFilter:
|
| """
|
| Filters entries in model output based on their scores
|
| Discards all entries with score less than the specified minimum
|
| """
|
|
|
| def __init__(self, min_score: float = 0.8):
|
| self.min_score = min_score
|
|
|
| def __call__(self, model_output: ModelOutput) -> ModelOutput:
|
| for model_output_i in model_output:
|
| instances = model_output_i["instances"]
|
| if not instances.has("scores"):
|
| continue
|
| instances_filtered = instances[instances.scores >= self.min_score]
|
| model_output_i["instances"] = instances_filtered
|
| return model_output
|
|
|
|
|
| class InferenceBasedLoader:
|
| """
|
| Data loader based on results inferred by a model. Consists of:
|
| - a data loader that provides batches of images
|
| - a model that is used to infer the results
|
| - a data sampler that converts inferred results to annotations
|
| """
|
|
|
| def __init__(
|
| self,
|
| model: nn.Module,
|
| data_loader: Iterable[List[Dict[str, Any]]],
|
| data_sampler: Optional[Callable[[ModelOutput], List[SampledData]]] = None,
|
| data_filter: Optional[Callable[[ModelOutput], ModelOutput]] = None,
|
| shuffle: bool = True,
|
| batch_size: int = 4,
|
| inference_batch_size: int = 4,
|
| drop_last: bool = False,
|
| category_to_class_mapping: Optional[dict] = None,
|
| ):
|
| """
|
| Constructor
|
|
|
| Args:
|
| model (torch.nn.Module): model used to produce data
|
| data_loader (Iterable[List[Dict[str, Any]]]): iterable that provides
|
| dictionaries with "images" and "categories" fields to perform inference on
|
| data_sampler (Callable: ModelOutput -> SampledData): functor
|
| that produces annotation data from inference results;
|
| (optional, default: None)
|
| data_filter (Callable: ModelOutput -> ModelOutput): filter
|
| that selects model outputs for further processing
|
| (optional, default: None)
|
| shuffle (bool): if True, the input images get shuffled
|
| batch_size (int): batch size for the produced annotation data
|
| inference_batch_size (int): batch size for input images
|
| drop_last (bool): if True, drop the last batch if it is undersized
|
| category_to_class_mapping (dict): category to class mapping
|
| """
|
| self.model = model
|
| self.model.eval()
|
| self.data_loader = data_loader
|
| self.data_sampler = data_sampler
|
| self.data_filter = data_filter
|
| self.shuffle = shuffle
|
| self.batch_size = batch_size
|
| self.inference_batch_size = inference_batch_size
|
| self.drop_last = drop_last
|
| if category_to_class_mapping is not None:
|
| self.category_to_class_mapping = category_to_class_mapping
|
| else:
|
| self.category_to_class_mapping = {}
|
|
|
| def __iter__(self) -> Iterator[List[SampledData]]:
|
| for batch in self.data_loader:
|
|
|
|
|
|
|
| images_and_categories = [
|
| {"image": image, "category": category}
|
| for element in batch
|
| for image, category in zip(element["images"], element["categories"])
|
| ]
|
| if not images_and_categories:
|
| continue
|
| if self.shuffle:
|
| random.shuffle(images_and_categories)
|
| yield from self._produce_data(images_and_categories)
|
|
|
| def _produce_data(
|
| self, images_and_categories: List[Tuple[torch.Tensor, Optional[str]]]
|
| ) -> Iterator[List[SampledData]]:
|
| """
|
| Produce batches of data from images
|
|
|
| Args:
|
| images_and_categories (List[Tuple[torch.Tensor, Optional[str]]]):
|
| list of images and corresponding categories to process
|
|
|
| Returns:
|
| Iterator over batches of data sampled from model outputs
|
| """
|
| data_batches: List[SampledData] = []
|
| category_to_class_mapping = self.category_to_class_mapping
|
| batched_images_and_categories = _grouper(images_and_categories, self.inference_batch_size)
|
| for batch in batched_images_and_categories:
|
| batch = [
|
| {
|
| "image": image_and_category["image"].to(self.model.device),
|
| "category": image_and_category["category"],
|
| }
|
| for image_and_category in batch
|
| if image_and_category is not None
|
| ]
|
| if not batch:
|
| continue
|
| with torch.no_grad():
|
| model_output = self.model(batch)
|
| for model_output_i, batch_i in zip(model_output, batch):
|
| assert len(batch_i["image"].shape) == 3
|
| model_output_i["image"] = batch_i["image"]
|
| instance_class = category_to_class_mapping.get(batch_i["category"], 0)
|
| model_output_i["instances"].dataset_classes = torch.tensor(
|
| [instance_class] * len(model_output_i["instances"])
|
| )
|
| model_output_filtered = (
|
| model_output if self.data_filter is None else self.data_filter(model_output)
|
| )
|
| data = (
|
| model_output_filtered
|
| if self.data_sampler is None
|
| else self.data_sampler(model_output_filtered)
|
| )
|
| for data_i in data:
|
| if len(data_i["instances"]):
|
| data_batches.append(data_i)
|
| if len(data_batches) >= self.batch_size:
|
| yield data_batches[: self.batch_size]
|
| data_batches = data_batches[self.batch_size :]
|
| if not self.drop_last and data_batches:
|
| yield data_batches
|
|
|