预留接口
本章节列出的接口均为预留接口,后续有可能变更或废弃,不建议开发者使用,开发者无需关注。
Matmul
- __aicore__ inline void SetSubBlockIdx(uint8_t subBlockIdx);
- __aicore__ inline uint8_t GetSubBlockIdx();
- __aicore__ inline void SetTensorAWithCopy(const GlobalTensor<SrcAT>& gm, const LocalTensor<SrcAT> &leftMatrix, bool isTransposeA = false);
- __aicore__ inline void SetTensorBWithCopy(const GlobalTensor<SrcBT>& gm, const LocalTensor<SrcBT> &rightMatrix, bool isTransposeB = false);
ConfusionTranspose
void GetConfusionTransposeOnlyTilingInfo(const ge::Shape &srcShape, const uint32_t stackBufferSize, const uint32_t typeSize, optiling::ConfusionTransposeTiling &tiling);
内存管理和同步控制
- TPipe
- GetAbsAddr
- __aicore__ constexpr Hardware GetPhyType(TPosition pos);
- template <typename T, TPosition pos> __aicore__ inline bool PopStackBuffer(LocalTensor<T>& popLocal);
矢量计算
- template <typename T, bool isSetMask = true> __aicore__ inline void RepeatReduceSum(const LocalTensor<T>& dstLocal, const LocalTensor<T>& srcLocal, const int32_t repeat, const int32_t elemsInOneRepeate, const int32_t dstBlkStride, const int32_t srcBlkStride, const int32_t dstRepStride, const int32_t srcRepStride);
- template <typename T, typename U> __aicore__ inline void Select(const LocalTensor<T>& dstLocal, const LocalTensor<U>& selMask, const LocalTensor<T>& src0Local, uint8_t repeatTimes, const BinaryRepeatParams& repeatParams);
- template <typename T, SELMODE selMode> __aicore__ inline void Select(const LocalTensor<T>& dstLocal, const LocalTensor<T>& src0Local, const LocalTensor<T>& src1Local, uint8_t repeatTimes, const BinaryRepeatParams& repeatParams);
矩阵计算
- template <typename DstT, typename SrcT, const FixpipeConfig& config = CFG_ROW_MAJOR> void Fixpipe(const LocalTensor<DstT>& dstLocal, const LocalTensor<SrcT>& srcLocal, const FixpipeParamsV220& intriParams);
- template <typename DstT, typename SrcT, const FixpipeConfig& config = CFG_ROW_MAJOR> void Fixpipe(const LocalTensor<DstT>& dstLocal, const LocalTensor<SrcT>& srcLocal, const LocalTensor<uint64_t>& cbufWorkspace, const FixpipeParamsV220& intriParams);
- template <typename T, typename U> __aicore__ inline __inout_pipe__(V) void BroadCastVecToMM(const LocalTensor<T> &dstLocal, const LocalTensor<U> &srcLocal, const int32_t blockCount, const uint8_t blockLen, const uint8_t srcGap, const uint8_t dstGap);
其他
- __aicore__ void SetSysWorkSpacePtr(__gm__ uint8_t* workspace)
- template <typename T> __aicore__ inline __in_pipe__(V) __out_pipe__(MTE3) void InitOutput(GlobalTensor<T> gmWorkspaceAddr, uint32_t size, T value = 0);
- #define GET_TILING_DATA_MEMBER(tiling_type, member, var, tiling) tiling_type point##var
- template <typename T> __aicore__ inline void SetCmpMask(const LocalTensor<T>& src);
父主题: Ascend C Kernel API