昇腾社区首页
中文
注册

算子调用示例

算子调用示例请参考源码

用例整体介绍

本用例为ATB单算子示例。

该用例整体包括:

  • 准备atb::VariantPack的输入tensor部分。
  • 准备算子参数并创建算子。
  • 主函数:通用设置卡号、创建context、设置stream部分;算子Setup,执行;清理内存,释放资源。

目录结构介绍

atb-linear-demo 示例工程
|-- demo_util.h
|-- linear_demo.cpp

Linear算子调用示例

  • demo_util.h
      1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
     80
     81
     82
     83
     84
     85
     86
     87
     88
     89
     90
     91
     92
     93
     94
     95
     96
     97
     98
     99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    #include <iostream>
    #include <vector>
    #include <numeric>
    #include "acl/acl.h"
    #include "atb/operation.h"
    #include "atb/types.h"
    #include "atb/atb_infer.h"
    
    #define CHECK_STATUS(status)                                                                   \
        do {                                                                                       \
            if ((status) != 0) {                                                                   \
                std::cout << __FILE__ << ":" << __LINE__ << " [error]: " << (status) << std::endl; \
                exit(1);                                                                           \
            }                                                                                      \
        } while (0)
    
    #define CHECK_STATUS_EXPR(status, expr)                                                        \
        do {                                                                                       \
            if ((status) != 0) {                                                                   \
                std::cout << __FILE__ << ":" << __LINE__ << " [error]: " << (status) << std::endl; \
                expr;                                                                              \
            }                                                                                      \
        } while (0)
    
    /**
     * @brief 创建一个Tensor对象
     * @param  dataType 数据类型
     * @param  format 数据格式
     * @param  shape 数据shape
     * @return atb::Tensor 返回创建的Tensor对象
     */
    atb::Tensor CreateTensor(const aclDataType dataType, const aclFormat format, std::vector<int64_t> shape)
    {
        atb::Tensor tensor;
        tensor.desc.dtype = dataType;
        tensor.desc.format = format;
        tensor.desc.shape.dimNum = shape.size();
        // tensor的dim依次设置为shape中元素
        for (size_t i = 0; i < shape.size(); i++) {
            tensor.desc.shape.dims[i] = shape.at(i);
        }
        tensor.dataSize = atb::Utils::GetTensorSize(tensor);  // 计算Tensor的数据大小
        CHECK_STATUS(aclrtMalloc(&tensor.deviceData, tensor.dataSize, aclrtMemMallocPolicy::ACL_MEM_MALLOC_HUGE_FIRST));
        return tensor;
    }
    
    /**
     * @brief 进行数据类型转换,调用Elewise的cast Op
     * @param contextPtr context指针
     * @param stream stream
     * @param inTensor 输入tensor
     * @param outTensorType 输出tensor的数据类型
     * @param shape 输出tensor的shape
     * @return atb::Tensor 转换后的tensor
     */
    atb::Tensor CastOp(atb::Context *contextPtr, aclrtStream stream, const atb::Tensor inTensor,
        const aclDataType outTensorType, std::vector<int64_t> shape)
    {
        uint64_t workspaceSize = 0;
        void *workspace = nullptr;
        // 创建Elewise的ELEWISE_CAST
        atb::infer::ElewiseParam castParam;
        castParam.elewiseType = atb::infer::ElewiseParam::ELEWISE_CAST;
        castParam.outTensorType = outTensorType;
        atb::Operation *castOp = nullptr;
        CHECK_STATUS(CreateOperation(castParam, &castOp));
        atb::Tensor outTensor = CreateTensor(outTensorType, aclFormat::ACL_FORMAT_ND, shape);  // cast输出tensor
        atb::VariantPack castVariantPack;                                                      // 参数包
        castVariantPack.inTensors = {inTensor};
        castVariantPack.outTensors = {outTensor};
        // 在Setup接口调用时对输入tensor和输出tensor进行校验。
        CHECK_STATUS(castOp->Setup(castVariantPack, workspaceSize, contextPtr));
        if (workspaceSize > 0) {
            CHECK_STATUS(aclrtMalloc(&workspace, workspaceSize, aclrtMemMallocPolicy::ACL_MEM_MALLOC_HUGE_FIRST));
        }
        // ELEWISE_CAST执行
        CHECK_STATUS(castOp->Execute(castVariantPack, (uint8_t *)workspace, workspaceSize, contextPtr));
        CHECK_STATUS(aclrtSynchronizeStream(stream));  // 流同步,等待device侧任务计算完成
        if (workspaceSize > 0) {
            CHECK_STATUS(aclrtFree(workspace));  // 清理工作空间
        }
        return outTensor;
    }
    
    /**
     * @brief 简单封装,拷贝vector data中数据以创建tensor
     * @details 用于创建outTensorType类型的tensor
     * @param contextPtr context指针
     * @param stream stream
     * @param data 输入vector数据
     * @param outTensorType 期望输出tensor数据类型
     * @param format 输出tensor的格式,即NZ,ND等
     * @param shape 输出tensor的shape
     * @return atb::Tensor 返回创建的tensor
     */
    template <typename T>
    atb::Tensor CreateTensorFromVector(atb::Context *contextPtr, aclrtStream stream, std::vector<T> data,
        const aclDataType outTensorType, const aclFormat format, std::vector<int64_t> shape,
        const aclDataType inTensorType = ACL_DT_UNDEFINED)
    {
        atb::Tensor tensor;
        aclDataType intermediateType;
        switch (outTensorType) {
            case aclDataType::ACL_FLOAT:
            case aclDataType::ACL_FLOAT16:
            case aclDataType::ACL_BF16:
                intermediateType = aclDataType::ACL_FLOAT;
                break;
            default:
                intermediateType = outTensorType;
        }
        if (inTensorType == outTensorType && inTensorType != ACL_DT_UNDEFINED) {
            intermediateType = outTensorType;
        }
        tensor = CreateTensor(intermediateType, format, shape);
        CHECK_STATUS(aclrtMemcpy(
            tensor.deviceData, tensor.dataSize, data.data(), sizeof(T) * data.size(), ACL_MEMCPY_HOST_TO_DEVICE));
        if (intermediateType == outTensorType) {
            // 原始创建的tensor类型,不需要转换
            return tensor;
        }
        return CastOp(contextPtr, stream, tensor, outTensorType, shape);
    }
    
  • linear_demo.cpp
      1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
     80
     81
     82
     83
     84
     85
     86
     87
     88
     89
     90
     91
     92
     93
     94
     95
     96
     97
     98
     99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    #include <iostream>
    #include <vector>
    #include <numeric>
    #include "acl/acl.h"
    #include "atb/operation.h"
    #include "atb/types.h"
    #include <atb/atb_infer.h>
    
    #include "demo_util.h"
    const int32_t DEVICE_ID = 0;
    const uint32_t X_DIM_0 = 2;
    const uint32_t X_DIM_1 = 3;
    const uint32_t WEIGHT_DIM_0 = 3;
    const uint32_t WEIGHT_DIM_1 = 2;
    const uint32_t BIAS_DIM_0 = 2;
    
    /**
     * @brief 准备atb::VariantPack
     * @param contextPtr context指针
     * @param stream stream
     * @return atb::SVector<atb::Tensor> atb::VariantPack
     * @note 需要传入所有host侧tensor
     */
    atb::SVector<atb::Tensor> PrepareInTensor(atb::Context *contextPtr, aclrtStream stream)
    {
        // 创建shape为[2, 3]的输入x tensor
        atb::Tensor xFloat = CreateTensorFromVector(contextPtr,
            stream,
            std::vector<float>{1, 2, 3, 4, 5, 6},
            ACL_FLOAT16,
            aclFormat::ACL_FORMAT_ND,
            {X_DIM_0, X_DIM_1});
        // 创建shape为[3, 2]的输入weight tensor
        atb::Tensor weightFloat = CreateTensorFromVector(contextPtr,
            stream,
            std::vector<float>{1, 2, 3, 4, 5, 6},
            ACL_FLOAT16,
            aclFormat::ACL_FORMAT_ND,
            {WEIGHT_DIM_0, WEIGHT_DIM_1});
        // 创建shape为[2]的输入bias tensor
        atb::Tensor biasFloat = CreateTensorFromVector(
            contextPtr, stream, std::vector<float>(BIAS_DIM_0, 1.0), ACL_FLOAT16, aclFormat::ACL_FORMAT_ND, {1, BIAS_DIM_0});
        atb::SVector<atb::Tensor> inTensors = {xFloat, weightFloat, biasFloat};
        return inTensors;
    }
    
    /**
     * @brief 创建一个linear operation
     * @return atb::Operation * 返回一个Operation指针
     */
    atb::Operation *CreateLinearOperation()
    {
        atb::infer::LinearParam param;
        param.transposeA = false;
        param.transposeB = false;
        param.hasBias = true;
        param.outDataType = aclDataType::ACL_DT_UNDEFINED;
        param.enAccum = false;
        param.matmulType = atb::infer::LinearParam::MATMUL_UNDEFINED;
        atb::Operation *LinearOp = nullptr;
        CHECK_STATUS(atb::CreateOperation(param, &LinearOp));
        return LinearOp;
    }
    
    int main(int argc, char **argv)
    {
        // 设置卡号、创建context、设置stream
        atb::Context *context = nullptr;
        void *stream = nullptr;
    
        CHECK_STATUS(aclInit(nullptr));
        CHECK_STATUS(aclrtSetDevice(DEVICE_ID));
        CHECK_STATUS(atb::CreateContext(&context));
        CHECK_STATUS(aclrtCreateStream(&stream));
        context->SetExecuteStream(stream);
    
        // 创建op
        atb::Operation *linearOp = CreateLinearOperation();
        // 准备输入tensor
        atb::VariantPack variantPack;
        variantPack.inTensors = PrepareInTensor(context, stream);  // 放入输入tensor
        // 准备输出tensor
        atb::Tensor output = CreateTensor(ACL_FLOAT16, aclFormat::ACL_FORMAT_ND, {X_DIM_0, WEIGHT_DIM_1});
        variantPack.outTensors = {output};  // 放入输出tensor
    
        uint64_t workspaceSize = 0;
        // 计算workspaceSize大小
        CHECK_STATUS(linearOp->Setup(variantPack, workspaceSize, context));
        uint8_t *workspacePtr = nullptr;
        if (workspaceSize > 0) {
            CHECK_STATUS(aclrtMalloc((void **)(&workspacePtr), workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST));
        }
        // linear执行
        linearOp->Execute(variantPack, workspacePtr, workspaceSize, context);
        CHECK_STATUS(aclrtSynchronizeStream(stream));  // 流同步,等待device侧任务计算完成
    
        // 释放资源
        for (atb::Tensor &inTensor : variantPack.inTensors) {
            CHECK_STATUS(aclrtFree(inTensor.deviceData));
        }
        for (atb::Tensor &outTensor : variantPack.outTensors) {
            CHECK_STATUS(aclrtFree(outTensor.deviceData));
        }
        if (workspaceSize > 0) {
            CHECK_STATUS(aclrtFree(workspacePtr));
        }
        CHECK_STATUS(atb::DestroyOperation(linearOp));  // operation,对象概念,先释放
        CHECK_STATUS(aclrtDestroyStream(stream));
        CHECK_STATUS(DestroyContext(context));  // context,全局资源,后释放
        CHECK_STATUS(aclFinalize());
        std::cout << "Linear demo success!" << std::endl;
        return 0;
    }
    

用例运行

  1. 调用前需设置ascend-toolkit、atb环境变量:
    1
    2
    source ${toolkit安装目录}/set_env.sh # 如source /usr/local/Ascend/ascend-toolkit/set_env.sh
    source ${nnal安装目录}/atb/set_env.sh # 如source /usr/local/Ascend/nnal/atb/set_env.sh
    
  2. 按以下步骤编译:
    1
    g++ -I "${ATB_HOME_PATH}/include" -I "${ASCEND_HOME_PATH}/include" -L "${ATB_HOME_PATH}/lib" -L "${ASCEND_HOME_PATH}/lib64" xxxx.cpp demo_util.h -l atb -l ascendcl -o xxxx
    
  3. 执行用例:
    ./xxxx
  • 使用hccl算子时,需要额外增加编译选项:-D_GLIBCXX_USE_CXX11_ABI=0。
  • 使用反向等训练算子时,需要动态链接:"${ATB_HOME_PATH}/lib/libatb_train.so",即更新编译命令为:
    g++ ... demo_util.h "${ATB_HOME_PATH}/lib/libatb_train.so" ...