完成量化的矩阵乘计算,最小支持输入维度为2维,最大支持输入维度为3维。
npu_quant_matmul(Tensor x1, Tensor x2, Tensor scale, Tensor? offset=None, Tensor? bias=None) -> Tensor
一个Tensor类型的输出,代表quant_matmul的计算结果。
Atlas A2 训练系列产品
单算子调用: import torch import torch_npu import logging import os cpu_x1 = torch.randint(-5, 5, (1, 1, 256, 768), dtype=torch.int8) cpu_x2 = torch.randint(-5, 5, (2, 31, 768, 16), dtype=torch.int8) scale = torch.randn(16, dtype=torch.float32) offset = torch.randn(16, dtype=torch.float32) bias = torch.randint(-5, 5, (31 * 2, 1, 16), dtype=torch.int32) npu_out = torch_npu.npu_quant_matmul(cpu_x1.npu(), cpu_x2.npu(), scale.npu(), offset.npu(), bias.npu()) 图模式: import torch import torch_npu import torchair as tng from torchair.ge_concrete_graph import ge_apis as ge from torchair.configs.compiler_config import CompilerConfig import logging from torchair.core.utils import logger logger.setLevel(logging.DEBUG) import os import numpy as np os.environ["ENABLE_ACLNN"] = "true" config = CompilerConfig() model = cpu_model.npu() npu_backend = tng.get_npu_backend(compiler_config=config) class MyModel(torch.nn.Module): def __init__(self): super().__init__() def forward(self, x1, x2, scale, offset, bias): scale_1 = torch_npu.npu_trans_quant_param(scale, offset) return torch_npu.npu_quant_matmul(x1, x2, scale_1, offset, bias) cpu_model = MyModel() cpu_x1 = torch.randint(-1, 1, (15, 1, 512), dtype=torch.int8) cpu_x2 = torch.randint(-1, 1, (15, 512, 128), dtype=torch.int8) scale = torch.randn(1, dtype=torch.float32) offset = torch.randn(1, dtype=torch.float32) bias = torch.randint(-1,1, (15, 1, 128), dtype=torch.int32) model = cpu_model.npu() model = torch.compile(cpu_model, backend=npu_backend, dynamic=True) npu_out = model(cpu_x1.npu(), cpu_x2.npu(), scale.npu(), offset.npu(), bias.npu())