1 | torch_npu.npu_dynamic_quant_asymmetric(Tensor x, *, Tensor? smooth_scales=None, Tensor? group_index=None, ScalarType? dst_type=None) -> (Tensor, Tensor, Tensor) |
1 2 3 4 5 | import torch import torch_npu x = torch.rand((3, 8), dtype=torch.half).npu() y, scale, offset = torch_npu.npu_dynamic_quant_asymmetric(x) print(y, scale, offset) |
1 2 3 4 5 | import torch import torch_npu x = torch.rand((3, 8), dtype=torch.half).npu() y, scale, offset = torch_npu.npu_dynamic_quant_asymmetric(x, dst_type=torch.quint4x2) print(y, scale, offset) |
1 2 3 4 5 6 | import torch import torch_npu x = torch.rand((3, 8), dtype=torch.half).npu() smooth_scales = torch.rand((8,), dtype=torch.half).npu() y, scale, offset = torch_npu.npu_dynamic_quant_asymmetric(x, smooth_scales=smooth_scales) print(y, scale, offset) |
1 2 3 4 5 6 7 | import torch import torch_npu x = torch.rand((3, 8), dtype=torch.half).npu() smooth_scales = torch.rand((2, 8), dtype=torch.half).npu() group_index = torch.Tensor([1, 3]).to(torch.int32).npu() y, scale, offset = torch_npu.npu_dynamic_quant_asymmetric(x, smooth_scales=smooth_scales, group_index=group_index) print(y, scale, offset) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | import torch import torch_npu import torchair as tng from torchair.configs.compiler_config import CompilerConfig torch_npu.npu.set_compile_mode(jit_compile=True) config = CompilerConfig() npu_backend = tng.get_npu_backend(compiler_config=config) device=torch.device(f'npu:4') torch_npu.npu.set_device(device) class DynamicQuantModel(torch.nn.Module): def __init__(self): super().__init__() def forward(self, input_tensor, smooth_scales=None, group_index=None, dst_type=None): out, scale, offset = torch_npu.npu_dynamic_quant_asymmetric(input_tensor, smooth_scales=smooth_scales, group_index=group_index, dst_type=dst_type) return out, scale, offset x = torch.randn((2, 4, 6),device='npu',dtype=torch.float16).npu() smooth_scales = torch.randn((6),device='npu',dtype=torch.float16).npu() dynamic_quant_model = DynamicQuantModel().npu() dynamic_quant_model = torch.compile(dynamic_quant_model, backend=npu_backend, dynamic=True) out, scale, offset = dynamic_quant_model(x, smooth_scales=smooth_scales) print(out) print(scale) print(offset) |