TransposeOperation
功能
改变输入Tensor的排列顺序,在多个维度上进行转置。
图1 TransposeOperation


定义
1 2 3 4 | struct TransposeParam { SVector<int32_t> perm; uint8_t rsv[8] = {0}; }; |
参数列表
成员名称 |
类型 |
默认值 |
描述 |
|---|---|---|---|
perm |
SVector<int32_t> |
- |
指示输入维度的重排结果。 例如:输入tensor的shape是[a,b,c],perm是[0,2,1],表明交换第一维和第二维,输出tensor的shape是[a,c,b]。 perm需要保证输入正确,维度和输入x一致。 |
rsv[8] |
uint8_t |
{0} |
预留参数。 |
输入
参数 |
维度 |
数据类型 |
格式 |
|---|---|---|---|
x |
[dim_0, dim_1, ..., dim_n] |
float16/bf16/int64/int8/int32 |
ND |
输出
参数 |
维度 |
数据类型 |
格式 |
|---|---|---|---|
output |
维度由参数确定 |
float16/bf16/int64/int8/int32 数据类型与x保持一致 |
ND |
规格约束
算子调用示例(C++)
前置条件和编译命令请参见算子调用示例。
场景:基础场景。
#include <iostream>
#include <vector>
#include <numeric>
#include "acl/acl.h"
#include "atb/operation.h"
#include "atb/types.h"
#include "atb/atb_infer.h"
#include "demo_util.h"
const uint32_t DIM1 = 2;
const uint32_t DIM2 = 3;
/**
* @brief 准备atb::VariantPack中的所有输入tensor
* @param contextPtr context指针
* @param stream stream
* @return atb::SVector<atb::Tensor> atb::VariantPack中的输入tensor
* @note 需要传入所有host侧tensor
*/
atb::SVector<atb::Tensor> PrepareInTensor(atb::Context *contextPtr, aclrtStream stream)
{
// 创建x tensor
std::vector<float> xData(DIM1 * DIM2, 1.0);
std::vector<int64_t> xShape = {DIM1, DIM2};
atb::Tensor tensorX =
CreateTensorFromVector(contextPtr, stream, xData, ACL_FLOAT16, aclFormat::ACL_FORMAT_ND, xShape);
atb::SVector<atb::Tensor> inTensors = {tensorX};
return inTensors;
}
/**
* @brief 创建一个Reduce的Operation,并设置参数
* @return atb::Operation * 返回一个Operation指针
*/
atb::Operation *PrepareOperation()
{
atb::infer::TransposeParam transposeOpParam;
atb::SVector<int32_t> perm = {1, 0};
transposeOpParam.perm = perm;
atb::Operation *transposeOp = nullptr;
CHECK_STATUS(atb::CreateOperation(transposeOpParam, &transposeOp));
return transposeOp;
}
int main(int argc, char **argv)
{
// 设置卡号、创建context、设置stream
CHECK_STATUS(aclInit(nullptr));
int32_t deviceId = 0;
CHECK_STATUS(aclrtSetDevice(deviceId));
atb::Context *context = nullptr;
CHECK_STATUS(atb::CreateContext(&context));
void *stream = nullptr;
CHECK_STATUS(aclrtCreateStream(&stream));
context->SetExecuteStream(stream);
// Transpose示例
atb::Operation *transposeOp = PrepareOperation();
// 准备输入tensor
atb::VariantPack transposeVariantPack;
transposeVariantPack.inTensors = PrepareInTensor(context, stream); // 放入输入tensor
atb::Tensor tensorOut = CreateTensor(ACL_FLOAT16, aclFormat::ACL_FORMAT_ND, {DIM2, DIM1});
transposeVariantPack.outTensors = {tensorOut}; // 放入输出tensor
uint64_t workspaceSize = 0;
// 计算workspace大小
CHECK_STATUS(transposeOp->Setup(transposeVariantPack, workspaceSize, context));
uint8_t *workspacePtr = nullptr;
if (workspaceSize > 0) {
CHECK_STATUS(aclrtMalloc((void **)(&workspacePtr), workspaceSize, ACL_MEM_MALLOC_HUGE_FIRST));
}
// reduce执行
transposeOp->Execute(transposeVariantPack, workspacePtr, workspaceSize, context);
CHECK_STATUS(aclrtSynchronizeStream(stream)); // 流同步,等待device侧任务计算完成
for (atb::Tensor &inTensor : transposeVariantPack.inTensors) {
CHECK_STATUS(aclrtFree(inTensor.deviceData));
}
if (workspaceSize > 0) {
CHECK_STATUS(aclrtFree(workspacePtr));
}
CHECK_STATUS(atb::DestroyOperation(transposeOp)); // operation,对象概念,先释放
CHECK_STATUS(aclrtDestroyStream(stream));
CHECK_STATUS(DestroyContext(context)); // context,全局资源,后释放
CHECK_STATUS(aclrtResetDevice());
CHECK_STATUS(aclFinalize());
std::cout << "Transpose demo success!" << std::endl;
return 0;
}