Layer层主要基于Operation和PluginOperation实现算子层的构建,主要需要实现Param的设计(对应实际模型传入参数)和layer的图构建,核心实现目录为 “models/xx_model/layer/xx_layer”,需要实现以下核心函数。
/* struct GraphParam { uint32_t inTensorNum = 0; uint32_t outTensorNum = 0; uint32_t internalTensorNum = 0; std::vector<Node> nodes; InferShapeFunc inferShapeFunc; }; //GraphParam结构体,描述Layer图数据结构,定义于include/atb/types.h */ struct TestLayerParam { // 定义该Layer需要传入的参数 double layerNormEps = 0; //对应Norm层的参数 int headNum = 0; //对应SelfAttention的headNum bool transKey = false; //SelfAttention的Matmul是否默认转置 int dk = 0; //SelfAttention层的dk值 ... };
enum TestLayerTensorId { IN_HIDDENSTATES, IN_NORMWEIGHT, IN_NORMBIAS, INTERMIDATE_INPUTNORMOUT, OUT_PRESENTKEY ... }; // Layer的输入/输出/中间TensorID // 核心函数:构建整Layer的拓扑图关系 atb::Status TestLayerOperation(const TestLayerParam ¶m, atb::Operation **operation){ // 对Tensor数目/nodes数目初始化 atb::GraphParam opGraph; opGraph.inTensorNum = IN_TENSOR_COUNT; // 输入Tensor opGraph.outTensorNum = OUT_TENSOR_COUNT; // 输出Tensor opGraph.internalTensorNum = INTERMEDIATE_TENSOR_COUNT; // 中间变量Tensor opGraph.nodes.resize(NODE_COUNT); // 实际定义Node size_t nodeId = 0 atb::Node &testNode1 = opGraph_.nodes.at(nodeId++); atb::Node &testNode2 = opGraph_.nodes.at(nodeId++); ... //同样方式获取其他Node atb::infer::TestParam testParam1; testParam1.layerNormEps = param.layerNormEps; testParam1.beginNormAxis = param.beginNormAxis; CreateOp(testParam1, &testNode1.op); //基于Param构造实际Operation testNode1.inTensorIds = {IN_HIDDENSTATES, IN_NORMWEIGHT, IN_NORMBIAS}; //基于输入输出TensorID构造图 testNode1.outTensorIds = {INTERMIDATE_INPUTNORMOUT}; ... // 相同方式定义其他Node opGraph.inferShapeFunc = [=](const atb::SVector<atb::TensorDesc> &inTensorDescs, atb::SVector<atb::TensorDesc> &outTensorDescs) { outTensorDescs.at(0) = inTensorDescs.at(0); outTensorDescs.at(1) = inTensorDescs.at(0); outTensorDescs.at(1).shape.dimNum = 4; outTensorDescs.at(1).shape.dims[2] = param.headNum; outTensorDescs.at(1).shape.dims[3] = param.dk; outTensorDescs.at(2) = outTensorDescs.at(1); return atb::NO_ERROR; }; // 定义Layer层的InferShape逻辑:更新outTensorDescs atb::CreateOperation(opGraph, operation); // 基于CreateOperation接口创建Layer return atb::NO_ERROR; };
static atb::Operation *CreateTestLayer(const nlohmann::json ¶mJson) { // Layer定义Param TestLayerParam param; // Operation定义Param //atb::infer::TestParam param; // 必要的参数赋值 param.headNum = paramJson["headNum"].get<int>(); param.rmsNormEps = paramJson["rmsNormEps"].get<float>(); param.dk = paramJson["dk"].get<int>(); atb::Operation *op; // Layer构建 atb_speed::TestLayerOperation(param, &op); // Operation构建 // CreateOp(param, &op); return op; }
std::map<std::string, OperationCreateFunc> g_funcMap = { // 添加对应Operation/Layer {"TestLayer", &CreateTestLayer}, {"TestOperation", &CreateTestOperation}, ... } // PyTorch侧调用: // self.acl_test_layer = torch.classes.OperationTorch.OperationTorch("TestLayer") // self.acl_test_layer.set_param(param) // self.acl_test_layer.execute(inputs)