| """Kernel test utils""" |
|
|
| import itertools |
| import random |
| import unittest |
| from functools import lru_cache |
| from numbers import Number |
| from typing import Any, Dict, List, NamedTuple, Optional, Sequence, Tuple, Union |
|
|
| import pytest |
| import torch |
| from torch._prims_common import TensorLikeType |
|
|
| |
| |
| DEFAULT_OPCHECK_TEST_UTILS: Tuple[str, ...] = ( |
| "test_schema", |
| "test_autograd_registration", |
| "test_faketensor", |
| ) |
|
|
| ALL_OPCHECK_TEST_UTILS: Tuple[str, ...] = ( |
| "test_schema", |
| "test_autograd_registration", |
| "test_faketensor", |
| "test_aot_dispatch_dynamic", |
| ) |
|
|
|
|
| |
| def fp8_allclose( |
| a: TensorLikeType, |
| b: TensorLikeType, |
| rtol: float = 1e-05, |
| atol: float = 1e-08, |
| equal_nan: bool = False, |
| ) -> bool: |
| """ |
| Reference implementation of torch.allclose |
| """ |
| torch._refs._check_close_args(name="torch.allclose", a=a, b=b, rtol=rtol, atol=atol) |
|
|
| |
| if a.device.type == "mps" or b.device.type == "mps": |
| a_cmp = a.float() |
| b_cmp = b.float() |
| else: |
| a_cmp = a.double() |
| b_cmp = b.double() |
| |
| return bool( |
| torch.all( |
| torch.isclose( |
| a_cmp, b_cmp, rtol=rtol, atol=atol, equal_nan=equal_nan |
| ) |
| ).item() |
| ) |
|
|
|
|
| def compute_max_diff(output, output_ref): |
| return torch.mean(torch.abs(output - output_ref)) / torch.mean( |
| torch.abs(output_ref) |
| ) |
|
|
|
|
| |
| |
| def opcheck( |
| op: Union[ |
| torch._ops.OpOverload, |
| torch._ops.OpOverloadPacket, |
| torch._library.custom_ops.CustomOpDef, |
| ], |
| args: Tuple[Any, ...], |
| kwargs: Optional[Dict[str, Any]] = None, |
| *, |
| test_utils: Union[str, Sequence[str]] = ALL_OPCHECK_TEST_UTILS, |
| raise_exception: bool = True, |
| cond: bool = True, |
| ) -> Dict[str, str]: |
| with unittest.mock.patch("torch.allclose", new=fp8_allclose): |
| if not cond: |
| return {} |
|
|
| return torch.library.opcheck( |
| op, args, kwargs, test_utils=test_utils, raise_exception=raise_exception |
| ) |
|
|
|
|
| @lru_cache(maxsize=None) |
| def get_max_shared_memory_bytes(gpu: int = 0) -> int: |
| """Returns the maximum shared memory per thread block in bytes.""" |
| from paged_attention import ops |
|
|
| max_shared_mem = ops.get_max_shared_memory_per_block_device_attribute(gpu) |
| |
| |
| assert max_shared_mem > 0, "max_shared_mem can not be zero" |
| return int(max_shared_mem) |
|
|