FFN算子兼容MoeFFN和FFN。该算子在没有专家分组(expert_tokens为空)时是FFN,有专家分组时是MoeFFN,统称为FFN,属于Moe结构。MoE(Mixture-of-Experts,混合专家系统)是一种用于训练万亿参数量级模型的技术。MoE将预测建模任务分解为若干子任务,在每个子任务上训练一个专家模型(Expert Model),开发一个门控模型(Gating Model),该模型根据要预测的输入来学习信任哪个专家,最终综合多个专家计算结果作为预测结果。
npu_ffn(Tensor x, Tensor weight1, Tensor weight2, string activation, Tensor? expert_tokens=None, Tensor? bias1=None, Tensor? bias2=None, int inner_precise=0)
一个Tensor类型的输出,代表FFN的计算结果。
#单算子调用模式 import torch import torch_npu import logging import os cpu_x = torch.randn((1, 1280), device='npu', dtype=torch.float16) cpu_weight1 = torch.randn(1280, 10240, device='npu', dtype=torch.float16) cpu_weight2 = torch.randn(10240, 1280, device='npu', dtype=torch.float16) activation = "fastgelu" npu_out = torch_npu.npu_ffn(cpu_x.npu(), cpu_weight1.npu(), cpu_weight2.npu(), activation, inner_precise=1) #torch api 入图模式 import torch import torch_npu import torchair as tng from torchair.ge_concrete_graph import ge_apis as ge from torchair.configs.compiler_config import CompilerConfig import logging from torchair.core.utils import logger logger.setLevel(logging.DEBUG) import os os.environ["ENABLE_ACLNN"] = "true" config = CompilerConfig() config.debug.graph_dump.type = "pbtxt" npu_backend = tng.get_npu_backend(compiler_config=config) class MyModel(torch.nn.Module): def __init__(self): super().__init__() def forward(self, x, weight1, weight2, activation, expert): return torch_npu.npu_ffn(x, weight1, weight2, activation, expert, inner_precise=1)#, inner_precise=1 cpu_model = MyModel() cpu_x = torch.randn((1954, 2560),device='npu',dtype=torch.float16) cpu_weight1 = torch.randn((16, 2560, 5120),device='npu',dtype=torch.float16) cpu_weight2 = torch.randn((16, 5120, 200),device='npu',dtype=torch.float16) activation = "fastgelu" expert = [227, 62, 78, 126, 178, 27, 122, 1, 19, 182, 166, 118, 66, 217, 122, 243] model = cpu_model.npu() model = torch.compile(cpu_model, backend=npu_backend, dynamic=True) npu_out = model(cpu_x.npu(), cpu_weight1.npu(), cpu_weight2.npu(), activation, expert)