| | |
| | import logging |
| | import os |
| | import os.path as osp |
| | import sys |
| | from typing import Callable, Optional, Union |
| |
|
| | import torch |
| |
|
| | from mmengine.dist import master_only |
| | from mmengine.hooks import Hook |
| | from mmengine.logging import print_log |
| | from mmengine.registry import HOOKS |
| |
|
| |
|
| | def check_kineto() -> bool: |
| | kineto_exist = False |
| | try: |
| | if torch.autograd.kineto_available(): |
| | kineto_exist = True |
| | except AttributeError: |
| | print_log('NO KINETO', logger='current', level=logging.WARNING) |
| | return kineto_exist |
| |
|
| |
|
| | @HOOKS.register_module() |
| | class ProfilerHook(Hook): |
| | """A hook to analyze performance during training and inference. |
| | |
| | PyTorch Profiler is a tool that allows the collection of the performance |
| | metrics during the training. More details on Profiler can be found at |
| | `official docs <https://pytorch.org/docs/stable/profiler.html |
| | #torch.profiler.profile>`_ |
| | |
| | Args: |
| | by_epoch (bool): Profile performance by epoch or by iteration. |
| | Defaults to True. |
| | profile_times (int): The period (epoch/iter) recorded by the profiler. |
| | Defaults to 1. For example, profile_iters=10 and by_epoch=False, |
| | indicate that 0-10 iterations are recorded. |
| | activity_with_cpu (bool): Activities to be used in the analysis (CPU) |
| | activity_with_cuda (bool): Activities to be used in the analysis (CUDA) |
| | schedule (dict, optional): Key-word arguments passed to |
| | `torch.profile.schedule <https://pytorch.org/docs/stable/ |
| | profiler.html#torch.profiler.schedule>`_. |
| | Defaults to None, which means profiling without a schedule |
| | on_trace_ready (callable, dict, optional): Either a handler or a dict |
| | of generating handler. Defaults to None, which means profiling |
| | without an on_trace_ready.The Callable type needs to construct its |
| | own function that can handle 'torch.autograd.profiler.profile'. |
| | Two officially recommended ways are provided: |
| | |
| | - ``schedule=dict(type='log_trace')``: Print the profiling result |
| | in the terminal. See more details in the `PyTorch official tutorial`_. |
| | The configurable arguments are the same as |
| | ``prof.key_averages().table`` |
| | - ``scheduler=dict(type='tb_trace')``: Profile the performance |
| | with tensorboard. See more details in the tutorial |
| | `profile with tensorboard`_. |
| | |
| | record_shapes (bool): Save information about operator's input shapes. |
| | Defaults to False. |
| | profile_memory (bool): Track tensor memory allocation/deallocation. |
| | Defaults to False. |
| | with_stack (bool): Record source information (file and line number) |
| | for the ops. Defaults to False. |
| | with_flops (bool): Use formula to estimate the FLOPS of specific |
| | operators (matrix multiplication and 2D convolution). |
| | Defaults to False. |
| | json_trace_path (str, optional): Exports the collected trace in Chrome |
| | JSON format. Chrome use 'chrome://tracing' view json file. |
| | Defaults to None, which means profiling does not store json files. |
| | |
| | Warnings: |
| | The profiler will be closed after ``profile_times`` iterations |
| | automatically. Please make sure the configuration of your scheduler |
| | will not close the profiler before the iteration reach the value of |
| | ``profile_times`` |
| | |
| | Examples: |
| | >>> # tensorboard trace |
| | >>> trace_config = dict(type='tb_trace') |
| | >>> profiler_hook_cfg = dict(on_trace_ready=trace_config) |
| | |
| | .. _PyTorch official tutorial: https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html#using-profiler-to-analyze-execution-time |
| | .. _profile with tensorboard: https://pytorch.org/tutorials/intermediate/tensorboard_profiler_tutorial.html#pytorch-profiler-with-tensorboard |
| | """ |
| | priority = 'VERY_LOW' |
| |
|
| | def __init__(self, |
| | *, |
| | by_epoch: bool = True, |
| | profile_times: int = 1, |
| | activity_with_cpu: bool = True, |
| | activity_with_cuda: bool = False, |
| | schedule: Optional[dict] = None, |
| | on_trace_ready: Union[Callable, dict, None] = None, |
| | record_shapes: bool = False, |
| | profile_memory: bool = False, |
| | with_stack: bool = False, |
| | with_flops: bool = False, |
| | json_trace_path: Optional[str] = None) -> None: |
| |
|
| | try: |
| | from torch import profiler |
| | except ImportError: |
| | raise ImportError('please upgrade torch above 1.8.1') |
| | if not check_kineto(): |
| | raise ImportError('Due to Kineto support issues, please upgrade ' |
| | 'pytorch above 1.8.1(windows users above 1.9.1)') |
| |
|
| | assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean.' |
| | self.by_epoch = by_epoch |
| |
|
| | if profile_times < 1: |
| | raise ValueError('profile_iters should be greater than 0, ' |
| | f'but got {profile_times}') |
| | if by_epoch and profile_times > 1: |
| | raise ValueError( |
| | f'Profiler will profile 0-{profile_times} epochs.\n' |
| | 'Since profiler will slow down the training, it is recommended' |
| | ' to train 1 epoch with ProfilerHook and adjust your setting ' |
| | 'according to the profiler summary.\n' |
| | 'During normal training(epoch > 1), ' |
| | 'you may disable the ProfilerHook.') |
| | self.profile_times = profile_times |
| |
|
| | assert isinstance(activity_with_cpu, bool), \ |
| | '``activity_with_cpu`` should be a boolean.' |
| | assert isinstance(activity_with_cuda, bool), \ |
| | '``activity_with_cuda`` should be a boolean.' |
| | self.activities = [] |
| | if activity_with_cpu: |
| | self.activities.append(profiler.ProfilerActivity.CPU) |
| | if activity_with_cuda: |
| | self.activities.append(profiler.ProfilerActivity.CUDA) |
| |
|
| | if schedule is not None: |
| | assert isinstance(schedule, dict), '``schedule`` should be a dict.' |
| | self.schedule = profiler.schedule(**schedule) |
| | else: |
| | self.schedule = None |
| |
|
| | self.on_trace_ready = on_trace_ready |
| | self.record_shapes = record_shapes |
| | self.profile_memory = profile_memory |
| | self.with_stack = with_stack |
| | self.with_flops = with_flops |
| |
|
| | self.json_trace_path = json_trace_path |
| | self._closed = False |
| |
|
| | def before_run(self, runner): |
| | """Initialize the profiler. |
| | |
| | Through the runner parameter, the validity of the parameter is further |
| | determined. |
| | """ |
| | max_times = runner.max_epochs if self.by_epoch else runner.max_iters |
| | if max_times < self.profile_times: |
| | raise ValueError( |
| | f'``profile_times`` should not be greater than {max_times}') |
| |
|
| | on_trace_ready = self._parse_trace_config(runner) |
| |
|
| | self.profiler = torch.profiler.profile( |
| | activities=self.activities, |
| | schedule=self.schedule, |
| | on_trace_ready=on_trace_ready, |
| | record_shapes=self.record_shapes, |
| | profile_memory=self.profile_memory, |
| | with_stack=self.with_stack, |
| | with_flops=self.with_flops) |
| |
|
| | self.profiler.__enter__() |
| | runner.logger.info('profiler is profiling...') |
| |
|
| | def _parse_trace_config(self, runner): |
| | """Used to parse the parameter 'on_trace_ready'.""" |
| | if self.on_trace_ready is None: |
| | _on_trace_ready = None |
| | elif callable(self.on_trace_ready): |
| | _on_trace_ready = self.on_trace_ready |
| | elif isinstance(self.on_trace_ready, dict): |
| | trace_cfg = self.on_trace_ready.copy() |
| | trace_type = trace_cfg.pop('type') |
| |
|
| | |
| | if trace_type == 'log_trace': |
| |
|
| | def _log_handler(_profile): |
| | print(_profile.key_averages().table(**trace_cfg)) |
| |
|
| | _on_trace_ready = _log_handler |
| |
|
| | elif trace_type == 'tb_trace': |
| | try: |
| | import torch_tb_profiler |
| | except ImportError: |
| | raise ImportError( |
| | 'please run ``pip install torch-tb-profiler``') |
| |
|
| | if 'dir_name' not in trace_cfg: |
| | trace_cfg['dir_name'] = osp.join(runner.log_dir, |
| | 'tf_tracing_logs') |
| | elif not osp.isabs(trace_cfg['dir_name']): |
| | trace_cfg['dir_name'] = osp.join(runner.log_dir, |
| | trace_cfg['dir_name']) |
| | runner.logger.info('trace_files of ProfilerHook will be ' |
| | f'saved to {trace_cfg["dir_name"]}.') |
| |
|
| | if self.json_trace_path is not None: |
| | runner.logger.warn( |
| | 'When using tensorboard_trace, it is recommended to ' |
| | 'save json files by setting ``worker_name`` instead of' |
| | ' setting ``json_trace_path``') |
| | _on_trace_ready = torch.profiler.tensorboard_trace_handler( |
| | **trace_cfg) |
| | else: |
| | raise ValueError('trace_type should be "log_trace" or ' |
| | f'"tb_trace", but got {trace_type}') |
| | else: |
| | raise ValueError( |
| | '``on_trace_ready`` should be a handler, or dict, or None, ' |
| | f'but got {self.on_trace_ready}') |
| | return _on_trace_ready |
| |
|
| | def after_train_epoch(self, runner): |
| | """Determine if the content is exported.""" |
| | |
| | |
| | if not self._closed: |
| | self._export_chrome_trace(runner) |
| |
|
| | def after_train_iter(self, runner, batch_idx, data_batch, outputs): |
| | """profiler will call `step` method if it is not closed.""" |
| | if not self._closed: |
| | self.profiler.step() |
| | if runner.iter == self.profile_times - 1 and not self.by_epoch: |
| | self._export_chrome_trace(runner) |
| |
|
| | def _export_chrome_trace(self, runner): |
| | """Exporting content.""" |
| | self._closed = True |
| | runner.logger.info('profiler may take a few minutes...') |
| | self.profiler.__exit__(None, None, None) |
| | if self.json_trace_path is not None: |
| | self.profiler.export_chrome_trace(self.json_trace_path) |
| |
|
| |
|
| | @HOOKS.register_module() |
| | class NPUProfilerHook(Hook): |
| | """NPUProfiler to analyze performance during training. |
| | |
| | NPU Profiling is used to count the device execution time of all operators. |
| | The torch_npu.npu.profile interface is used to complete the profiling data |
| | collection at each stage of the project, and the data is analyzed by the |
| | msprof tool and the data can be dumped to further manually analyze the |
| | key performance bottlenecks. For more details on the torch_npu.npu.profile |
| | interface, please visit |
| | https://gitee.com/ascend/pytorch/blob/master/torch_npu/npu/profiler.py#profile |
| | |
| | Args: |
| | begin (int): Number of start iterations for profiling. Defaults to 0. |
| | end (int): Number of end iterations for profiling. Defaults to 1. |
| | result_path (str): The path to save the profiling results file. |
| | Defaults to 'cann_profiling'. |
| | exit_after_profiling (bool): Whether to exit the program after |
| | profiling. Defaults to True. |
| | use_e2e_profiler (bool): Turn on E2E profiling, E2E profiling combines |
| | performance data at the Pytorch level and the NPU level to analyze |
| | the bottlenecks of model performance end-to-end, and cannot show |
| | detailed content, and only as an auxiliary analysis. |
| | Defaults to False. |
| | ge_profiling_to_std_out (bool): Turn on GE profiling, GE uses to |
| | collect the profiling data of the host side scheduling of the |
| | Assend device. Defaults to False. |
| | |
| | Examples: |
| | >>> cfg = ... |
| | >>> profiler_config = dict(type='NPUProfilerHook', end=2) |
| | >>> cfg.merge_from_dict({'custom_hooks': custom_hooks}) |
| | >>> runner = Runner.from_cfg(cfg) |
| | >>> runner.train() |
| | """ |
| | priority = 'VERY_LOW' |
| |
|
| | def __init__(self, |
| | *, |
| | begin: int = 0, |
| | end: int = 1, |
| | result_path: str = 'cann_profiling', |
| | exit_after_profiling: bool = True, |
| | use_e2e_profiler: bool = False, |
| | ge_profiling_to_std_out: bool = False): |
| |
|
| | try: |
| | import torch_npu |
| | except ImportError: |
| | raise ImportError('Failed to import torch_npu module') |
| |
|
| | if begin >= end: |
| | raise ValueError( |
| | 'The iteration to start profiling should not be greater' |
| | 'than or equal to profile end') |
| |
|
| | self.begin = begin |
| | self.end = end |
| | self.result_path = result_path |
| | self.exit_after_profiling = exit_after_profiling |
| |
|
| | if ge_profiling_to_std_out: |
| | os.environ['GE_PROFILING_TO_STD_OUT'] = '1' |
| |
|
| | if not osp.exists(self.result_path): |
| | os.makedirs(self.result_path, exist_ok=True) |
| |
|
| | self.profiler = torch_npu.npu.profile( |
| | self.result_path, use_e2e_profiler=use_e2e_profiler) |
| |
|
| | @master_only |
| | def before_run(self, runner): |
| |
|
| | if self.end > runner.max_iters: |
| | raise ValueError( |
| | 'The profiling end iteration should not be greater' |
| | 'than the max iteration') |
| |
|
| | @master_only |
| | def before_train_iter(self, runner, batch_idx, data_batch=None): |
| |
|
| | if runner.iter == self.begin: |
| | self.profiler.__enter__() |
| | runner.logger.info('NPUProfiler starts profiling...') |
| |
|
| | @master_only |
| | def after_train_iter(self, |
| | runner, |
| | batch_idx, |
| | data_batch=None, |
| | outputs=None): |
| |
|
| | if runner.iter == self.end - 1: |
| | runner.logger.info('profiler may take a few minutes to' |
| | ' save the profiling result.') |
| | self.profiler.__exit__(None, None, None) |
| | if self.exit_after_profiling: |
| | sys.exit() |
| |
|