Model侧的开发包括两个部分,首先组合前置开发的Layer层,再使得Pytorch等框架能够直接调用Model层。
Model组图调用。将多层layer进行拼接,能够减少Python侧调用时间,具体实现目录为“models/xx_model/model/*model.cpp”,整体实现上类似Layer,核心在于BuildGraph组图。
void BuildGraph() { // 核心实现: 组图过程类似Layer构造 // 先初始化各类Tensors和Nodes const int weightTensorSize = WORDEMBEDDINGNODE_WEIGHT_COUNT + WEIGHT_COUNT_PER_LAYER * param_.layerNum + FINALNORMNODE_WEIGHT_COUNT; graph_.weightTensors.resize(weightTensorSize); graph_.inTensors.resize(IN_TENSOR_NUM); graph_.outTensors.resize(OUT_TENSOR_NUM); graph_.nodes.resize(NODE_SIZE); graph_.internalTensors.resize(INTERNAL_TENSORS_SIZE); // 定义Node,传入实际的Tensor对象,Node可以对应Operation/Layer对象 // Operation构造 int nodeId = 0; auto &TestNode1 = graph_.nodes.at(nodeId++); atb::infer::TestParam testParam1; atb::Operation *op = nullptr; atb::CreateOp(testParam1, &op); testNode1.operation.reset(op); testNode1.inTensors = {&graph_.weightTensors.at(0), &graph_.inTensors.at(0)}; testNode1.outTensors = {&graph_.internalTensors.at(0)}; // 其他Node定义同上... // Layer构造 // 此处构造 layerNum * SingleAttentionLayer 来成图 AsdOps::Tensor *firstInTensor = &graph_.inTensors.at(0); // Layer之间传递的Tensor for (int layerId = 0; layerId < param_.layerNum; ++layerId) { auto &layerNode = graph_.nodes.at(nodeId++); TestLayerParam opParam; assign_param(opParam, param_); //完成opParam的初始化赋值 // Layer构造 TestLayerOperation(opParam, &op); layerNode.operation.reset(op); layerNode.inTensors.resize(layerNode.operation->GetInputNum()); size_t inTensorId = 0; layerNode.inTensors.at(inTensorId++) = firstInTensor; for (size_t weightTensorId = 0; weightTensorId < WEIGHT_COUNT_PER_LAYER; ++weightTensorId) { layerNode.inTensors.at(inTensorId++) = &graph_.weightTensors.at( layerId * WEIGHT_COUNT_PER_LAYER + weightTensorId + WORDEMBEDDINGNODE_WEIGHT_COUNT); } layerNode.inTensors.at(inTensorId++) = &graph_.inTensors.at(IN_TENSOR_POSITIONID); // positionIdTensor ... // 其他输入输出tensor定义同上 layerNode.outTensors = {&graph_.internalTensors.at(OPERATION_COUNT_BEFORE_LAYER + layerId)}; firstInTensor = layerNode.outTensors.at(0); } // 剩余图构造流程同上 };
对于一些特定Operation,需要利用param透传机制:部分Operation在每次执行时param都会改变,需要通过tensor.hostData传递这些数据,在layer和model中,需要将这些数据透传给Operation。
atb::Status TestModel::ParseParam(const std::string ¶m) { // 基于输入参数更新tokenOffset/seqLen变量 nlohmann::json paramJson = nlohmann::json::parse(param); tokenOffset_.clear(); for (auto item : paramJson["tokenOffset"]) { tokenOffset_.push_back(item.get<int>()); } seqLen_.clear(); for (auto item : paramJson["seqLen"]) { seqLen_.push_back(item.get<int>()); } return atb::NO_ERROR; } atb::Status TestModel::BindParamHostTensor(uint32_t nodeId) { // 基于三个值对Param进行透传:layerId/tokenoffset/seqlen layerId_ = nodeId - OPERATION_COUNT_BEFORE_LAYER; auto &node = graph_.nodes.at(nodeId); const uint32_t tokenOffsetTensorId = 19; const uint32_t seqLenTensorId = 20; const uint32_t layerIdTensorId = 21; node.variantPack.inTensors.at(tokenOffsetTensorId).hostData = tokenOffset_.data(); node.variantPack.inTensors.at(seqLenTensorId).hostData = seqLen_.data(); node.variantPack.inTensors.at(layerIdTensorId).hostData = &layerId_; return atb::NO_ERROR; };