本用例为ATB单算子示例。
该用例整体包括:
atb-linear-demo 示例工程 |-- demo_util.h |-- linear_demo.cpp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | #include <iostream> #include <vector> #include <numeric> #include "acl/acl.h" #include "atb/operation.h" #include "atb/types.h" #include "atb/atb_infer.h" #define CHECK_STATUS(status) \ do { \ if ((status) != 0) { \ std::cout << __FILE__ << ":" << __LINE__ << " [error]: " << (status) << std::endl; \ exit(1); \ } \ } while (0) #define CHECK_STATUS_EXPR(status, expr) \ do { \ if ((status) != 0) { \ std::cout << __FILE__ << ":" << __LINE__ << " [error]: " << (status) << std::endl; \ expr; \ } \ } while (0) /** * @brief 创建一个Tensor对象 * @param dataType 数据类型 * @param format 数据格式 * @param shape 数据shape * @return atb::Tensor 返回创建的Tensor对象 */ atb::Tensor CreateTensor(const aclDataType dataType, const aclFormat format, std::vector<int64_t> shape) { atb::Tensor tensor; tensor.desc.dtype = dataType; tensor.desc.format = format; tensor.desc.shape.dimNum = shape.size(); // tensor的dim依次设置为shape中元素 for (size_t i = 0; i < shape.size(); i++) { tensor.desc.shape.dims[i] = shape.at(i); } tensor.dataSize = atb::Utils::GetTensorSize(tensor); // 计算Tensor的数据大小 CHECK_STATUS(aclrtMalloc(&tensor.deviceData, tensor.dataSize, aclrtMemMallocPolicy::ACL_MEM_MALLOC_HUGE_FIRST)); return tensor; } /** * @brief 进行数据类型转换,调用Elewise的cast Op * @param contextPtr context指针 * @param stream stream * @param inTensor 输入tensor * @param outTensorType 输出tensor的数据类型 * @param shape 输出tensor的shape * @return atb::Tensor 转换后的tensor */ atb::Tensor CastOp(atb::Context *contextPtr, aclrtStream stream, const atb::Tensor inTensor, const aclDataType outTensorType, std::vector<int64_t> shape) { uint64_t workspaceSize = 0; void *workspace = nullptr; // 创建Elewise的ELEWISE_CAST atb::infer::ElewiseParam castParam; castParam.elewiseType = atb::infer::ElewiseParam::ELEWISE_CAST; castParam.outTensorType = outTensorType; atb::Operation *castOp = nullptr; CHECK_STATUS(CreateOperation(castParam, &castOp)); atb::Tensor outTensor = CreateTensor(outTensorType, aclFormat::ACL_FORMAT_ND, shape); // cast输出tensor atb::VariantPack castVariantPack; // 参数包 castVariantPack.inTensors = {inTensor}; castVariantPack.outTensors = {outTensor}; // 在Setup接口调用时对输入tensor和输出tensor进行校验。 CHECK_STATUS(castOp->Setup(castVariantPack, workspaceSize, contextPtr)); if (workspaceSize > 0) { CHECK_STATUS(aclrtMalloc(&workspace, workspaceSize, aclrtMemMallocPolicy::ACL_MEM_MALLOC_HUGE_FIRST)); } // ELEWISE_CAST执行 CHECK_STATUS(castOp->Execute(castVariantPack, (uint8_t *)workspace, workspaceSize, contextPtr)); CHECK_STATUS(aclrtSynchronizeStream(stream)); // 流同步,等待device侧任务计算完成 if (workspaceSize > 0) { CHECK_STATUS(aclrtFree(workspace)); // 清理工作空间 } return outTensor; } /** * @brief 简单封装,拷贝vector data中数据以创建tensor * @details 用于创建outTensorType类型的tensor * @param contextPtr context指针 * @param stream stream * @param data 输入vector数据 * @param outTensorType 期望输出tensor数据类型 * @param format 输出tensor的格式,即NZ,ND等 * @param shape 输出tensor的shape * @return atb::Tensor 返回创建的tensor */ template <typename T> atb::Tensor CreateTensorFromVector(atb::Context *contextPtr, aclrtStream stream, std::vector<T> data, const aclDataType outTensorType, const aclFormat format, std::vector<int64_t> shape, const aclDataType inTensorType = ACL_DT_UNDEFINED) { atb::Tensor tensor; aclDataType intermediateType; switch (outTensorType) { case aclDataType::ACL_FLOAT: case aclDataType::ACL_FLOAT16: case aclDataType::ACL_BF16: intermediateType = aclDataType::ACL_FLOAT; break; default: intermediateType = outTensorType; } if (inTensorType == outTensorType && inTensorType != ACL_DT_UNDEFINED) { intermediateType = outTensorType; } tensor = CreateTensor(intermediateType, format, shape); CHECK_STATUS(aclrtMemcpy( tensor.deviceData, tensor.dataSize, data.data(), sizeof(T) * data.size(), ACL_MEMCPY_HOST_TO_DEVICE)); if (intermediateType == outTensorType) { // 原始创建的tensor类型,不需要转换 return tensor; } return CastOp(contextPtr, stream, tensor, outTensorType, shape); } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | #include <iostream> #include <vector> #include <numeric> #include "acl/acl.h" #include "atb/operation.h" #include "atb/types.h" #include <atb/atb_infer.h> #include "demo_util.h" const int32_t DEVICE_ID = 0; const uint32_t X_DIM_0 = 2; const uint32_t X_DIM_1 = 3; const uint32_t WEIGHT_DIM_0 = 3; const uint32_t WEIGHT_DIM_1 = 2; const uint32_t BIAS_DIM_0 = 2; /** * @brief 准备atb::VariantPack * @param contextPtr context指针 * @param stream stream * @return atb::SVector<atb::Tensor> atb::VariantPack * @note 需要传入所有host侧tensor */ atb::SVector<atb::Tensor> PrepareInTensor(atb::Context *contextPtr, aclrtStream stream) { // 创建shape为[2, 3]的输入x tensor atb::Tensor xFloat = CreateTensorFromVector(contextPtr, stream, std::vector<float>{1, 2, 3, 4, 5, 6}, ACL_FLOAT16, aclFormat::ACL_FORMAT_ND, {X_DIM_0, X_DIM_1}); // 创建shape为[3, 2]的输入weight tensor atb::Tensor weightFloat = CreateTensorFromVector(contextPtr, stream, std::vector<float>{1, 2, 3, 4, 5, 6}, ACL_FLOAT16, aclFormat::ACL_FORMAT_ND, {WEIGHT_DIM_0, WEIGHT_DIM_1}); // 创建shape为[2]的输入bias tensor atb::Tensor biasFloat = CreateTensorFromVector( contextPtr, stream, std::vector<float>(BIAS_DIM_0, 1.0), ACL_FLOAT16, aclFormat::ACL_FORMAT_ND, {1, BIAS_DIM_0}); atb::SVector<atb::Tensor> inTensors = {xFloat, weightFloat, biasFloat}; return inTensors; } /** * @brief 创建一个linear operation * @return atb::Operation * 返回一个Operation指针 */ atb::Operation *CreateLinearOperation() { atb::infer::LinearParam param; param.transposeA = false; param.transposeB = false; param.hasBias = true; param.outDataType = aclDataType::ACL_DT_UNDEFINED; param.enAccum = false; param.matmulType = atb::infer::LinearParam::MATMUL_UNDEFINED; atb::Operation *LinearOp = nullptr; CHECK_STATUS(atb::CreateOperation(param, &LinearOp)); return LinearOp; } int main(int argc, char **argv) { // 设置卡号、创建context、设置stream atb::Context *context = nullptr; void *stream = nullptr; CHECK_STATUS(aclInit(nullptr)); CHECK_STATUS(aclrtSetDevice(DEVICE_ID)); CHECK_STATUS(atb::CreateContext(&context)); CHECK_STATUS(aclrtCreateStream(&stream)); context->SetExecuteStream(stream); // 创建op atb::Operation *linearOp = CreateLinearOperation(); // 准备输入tensor atb::VariantPack variantPack; variantPack.inTensors = PrepareInTensor(context, stream); // 放入输入tensor // 准备输出tensor atb::Tensor output = CreateTensor(ACL_FLOAT16, aclFormat::ACL_FORMAT_ND, {X_DIM_0, WEIGHT_DIM_1}); variantPack.outTensors = {output}; // 放入输出tensor uint64_t workspaceSize = 0; // 计算workspaceSize大小 CHECK_STATUS(linearOp->Setup(variantPack, workspaceSize, context)); uint8_t *workspacePtr = nullptr; if (workspaceSize > 0) { CHECK_STATUS(aclrtMalloc((void **)(&workspacePtr), workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST)); } // linear执行 linearOp->Execute(variantPack, workspacePtr, workspaceSize, context); CHECK_STATUS(aclrtSynchronizeStream(stream)); // 流同步,等待device侧任务计算完成 // 释放资源 for (atb::Tensor &inTensor : variantPack.inTensors) { CHECK_STATUS(aclrtFree(inTensor.deviceData)); } for (atb::Tensor &outTensor : variantPack.outTensors) { CHECK_STATUS(aclrtFree(outTensor.deviceData)); } if (workspaceSize > 0) { CHECK_STATUS(aclrtFree(workspacePtr)); } CHECK_STATUS(atb::DestroyOperation(linearOp)); // operation,对象概念,先释放 CHECK_STATUS(aclrtDestroyStream(stream)); CHECK_STATUS(DestroyContext(context)); // context,全局资源,后释放 CHECK_STATUS(aclFinalize()); std::cout << "Linear demo success!" << std::endl; return 0; } |
1 2 | source ${toolkit安装目录}/set_env.sh # 如source /usr/local/Ascend/ascend-toolkit/set_env.sh source ${nnal安装目录}/atb/set_env.sh # 如source /usr/local/Ascend/nnal/atb/set_env.sh |
1 | g++ -I "${ATB_HOME_PATH}/include" -I "${ASCEND_HOME_PATH}/include" -L "${ATB_HOME_PATH}/lib" -L "${ASCEND_HOME_PATH}/lib64" xxxx.cpp demo_util.h -l atb -l ascendcl -o xxxx |
./xxxx
g++ ... demo_util.h "${ATB_HOME_PATH}/lib/libatb_train.so" ...