预留接口

本章节列出的接口均为预留接口，后续有可能变更或废弃，不建议开发者使用，开发者无需关注。

Matmul

__aicore__ inline void SetSubBlockIdx(uint8_t subBlockIdx);
__aicore__ inline uint8_t GetSubBlockIdx();
__aicore__ inline void SetTensorAWithCopy(const GlobalTensor<SrcAT>& gm, const LocalTensor<SrcAT> &leftMatrix, bool isTransposeA = false);
__aicore__ inline void SetTensorBWithCopy(const GlobalTensor<SrcBT>& gm, const LocalTensor<SrcBT> &rightMatrix, bool isTransposeB = false);

ConfusionTranspose

void GetConfusionTransposeOnlyTilingInfo(const ge::Shape &srcShape, const uint32_t stackBufferSize, const uint32_t typeSize, optiling::ConfusionTransposeTiling &tiling);

内存管理和同步控制

TPipe
- GetAbsAddr

__aicore__ constexpr Hardware GetPhyType(TPosition pos);
template <typename T, TPosition pos> __aicore__ inline bool PopStackBuffer(LocalTensor<T>& popLocal);

矢量计算

template <typename T, bool isSetMask = true> __aicore__ inline void RepeatReduceSum(const LocalTensor<T>& dstLocal, const LocalTensor<T>& srcLocal, const int32_t repeat, const int32_t elemsInOneRepeate, const int32_t dstBlkStride, const int32_t srcBlkStride, const int32_t dstRepStride, const int32_t srcRepStride);
template <typename T, typename U> __aicore__ inline void Select(const LocalTensor<T>& dstLocal, const LocalTensor<U>& selMask, const LocalTensor<T>& src0Local, uint8_t repeatTimes, const BinaryRepeatParams& repeatParams);
template <typename T, SELMODE selMode> __aicore__ inline void Select(const LocalTensor<T>& dstLocal, const LocalTensor<T>& src0Local, const LocalTensor<T>& src1Local, uint8_t repeatTimes, const BinaryRepeatParams& repeatParams);

矩阵计算

template <typename DstT, typename SrcT, const FixpipeConfig& config = CFG_ROW_MAJOR> void Fixpipe(const LocalTensor<DstT>& dstLocal, const LocalTensor<SrcT>& srcLocal, const FixpipeParamsV220& intriParams);
template <typename DstT, typename SrcT, const FixpipeConfig& config = CFG_ROW_MAJOR> void Fixpipe(const LocalTensor<DstT>& dstLocal, const LocalTensor<SrcT>& srcLocal, const LocalTensor<uint64_t>& cbufWorkspace, const FixpipeParamsV220& intriParams);
template <typename T, typename U> __aicore__ inline __inout_pipe__(V) void BroadCastVecToMM(const LocalTensor<T> &dstLocal, const LocalTensor<U> &srcLocal, const int32_t blockCount, const uint8_t blockLen, const uint8_t srcGap, const uint8_t dstGap);

其他

__aicore__ void SetSysWorkSpacePtr(__gm__ uint8_t* workspace)
template <typename T> __aicore__ inline __in_pipe__(V) __out_pipe__(MTE3) void InitOutput(GlobalTensor<T> gmWorkspaceAddr, uint32_t size, T value = 0);

#define GET_TILING_DATA_MEMBER(tiling_type, member, var, tiling) tiling_type point##var
template <typename T> __aicore__ inline void SetCmpMask(const LocalTensor<T>& src);

父主题： Ascend C Kernel API