昇腾社区首页
中文
注册

预留接口

本章节列出的接口均为预留接口,后续有可能变更或废弃,不建议开发者使用,开发者无需关注。

Matmul

  • __aicore__ inline void SetSubBlockIdx(uint8_t subBlockIdx);
  • __aicore__ inline const handle SyncGroupJoin(uint32_t groupID);
  • __aicore__ inline const handle SyncGroupJoin(uint32_t groupID);
  • __aicore__ inline void SetAntiQuantScalar(const SrcT offsetScalar, const SrcT scaleScalar);
  • __aicore__ inline void SetAntiQuantVector(const LocalTensor<SrcT> &offsetTensor, const LocalTensor<SrcT> &scaleTensor);
  • __aicore__ inline void SetTensorAWithCopy(const GlobalTensor<SrcAT>& gm, const LocalTensor<SrcAT> &leftMatrix, bool isTransposeA = false);
  • __aicore__ inline void SetTensorBWithCopy(const GlobalTensor<SrcBT>& gm, const LocalTensor<SrcBT> &rightMatrix, bool isTransposeB = false);
  • __aicore__ inline void SetUserDefInfo(const uint64_t tilingPtr);
  • __aicore__ inline void SetSelfDefineData(const uint64_t dataPtr);

LayerNorm

  • template <typename T, bool isReuseSource = false> __aicore__ inline void LayerNorm(const LocalTensor<T>& output, const LocalTensor<T>& outputMean, const LocalTensor<T>& outputVariance, const LocalTensor<T>& inputX, const LocalTensor<T>& gamma, const LocalTensor<T>& beta, const T epsilon, LayerNormTiling& tiling)
  • template <typename T, bool isReuseSource = false> __aicore__ inline void LayerNorm(const LocalTensor<T>& output, const LocalTensor<T>& outputMean, const LocalTensor<T>& outputVariance, const LocalTensor<T>& inputX, const LocalTensor<T>& gamma, const LocalTensor<T>& beta, const LocalTensor<uint8_t>& sharedTmpBuffer, const T epsilon, LayerNormTiling& tiling)
  • template <typename T, bool isReuseSource = false>

    __aicore__ inline void LayerNormGrad(const LocalTensor<T> &outputPdX, const LocalTensor<T> &resForGamma, const LocalTensor<T> &inputDy, const LocalTensor<T> &inputX, const LocalTensor<T> &inputVariance, const LocalTensor<T> &inputMean, const LocalTensor<T> &inputGamma, T epsilon, LayerNormGradTiling &tiling)

  • template <typename T, bool isReuseSource = false>

    __aicore__ inline void LayerNormGrad(const LocalTensor<T> &outputPdX, const LocalTensor<T> &resForGamma, const LocalTensor<T> &inputDy, const LocalTensor<T> &inputX, const LocalTensor<T> &inputVariance, const LocalTensor<T> &inputMean, const LocalTensor<T> &inputGamma, LocalTensor<uint8_t> &sharedTmpBuffer, T epsilon, LayerNormGradTiling &tiling)

  • void GetLayerNormMaxMinTmpSize(const ge::Shape& srcShape, const uint32_t typeSize, const bool isReuseSource, uint32_t& maxValue, uint32_t& minValue)
  • void GetLayerNormRedNDTillingInfo(const uint32_t stackBufferSize, const uint32_t typeSize, optiling::LayerNormRedTiling& tilling, optiling::LayerNormRedParams& redParams)
  • inline void GetLayerNormGradMaxMinTmpSize(const ge::Shape &srcShape, const uint32_t typeSize, const bool isReuseSource, uint32_t &maxValue, uint32_t &minValue)
  • inline void GetLayerNormGradNDTilingInfo(const ge::Shape srcShape, const uint32_t stackBufferSize, const uint32_t typeSize, const bool isReuseSource, optiling::LayerNormGradTiling &tiling)
  • inline void GetLayerNormGradReduceTilingInfo(const uint32_t stackBufferSize, const uint32_t typeSize, optiling::LayerNormGradReduceTiling &tiling, optiling::LayerNormGradReduceParams &reduceParams, const bool isReuseSource = false)
  • void GetLayerNormGradBetaMaxMinTmpSize(const ge::Shape& srcShape, const uint32_t typeSize, const bool isReuseSource, uint32_t& maxValue, uint32_t& minValue)
  • void GetLayerNormGradBetaNDTilingInfo(const ge::Shape srcShape, const uint32_t stackBufferSize, const uint32_t typeSize, const bool isReuseSource, optiling::LayerNormGradBetaTiling& tiling)
  • void GetLayerNormGradBetaReduceTilingInfo(const uint32_t stackBufferSize, const uint32_t typeSize, optiling::LayerNormGradBetaReduceTiling& tiling, optiling::LayerNormGradBetaReduceParams& params)

ConfusionTranspose

  • void GetConfusionTransposeOnlyTilingInfo(const ge::Shape &srcShape, const uint32_t stackBufferSize, const uint32_t typeSize, optiling::ConfusionTransposeTiling &tiling);

AscendAntiQuant

  • inline uint32_t GetAscendAntiQuantMaxTmpSize(const ge::Shape &srcShape, const ge::Shape &scaleShape, bool isTranspose, ge::DataType inputDataType, ge::DataType outputDataType)
  • inline uint32_t GetAscendAntiQuantMinTmpSize(const ge::Shape &srcShape, const ge::Shape &scaleShape, bool isTranspose, ge::DataType inputDataType, ge::DataType outputDataType)

数学库

  • inline uint32_t GetSignMaxTmpSize(const ge::Shape srcShape, const uint32_t typeSize, const bool isReuseSource)
  • inline uint32_t GetSignMinTmpSize(const ge::Shape srcShape, const uint32_t typeSize, const bool isReuseSource)
  • inline uint32_t GetXorMaxTmpSize(const ge::Shape srcShape, const uint32_t typeSize, const bool isReuseSource)
  • inline uint32_t GetXorMinTmpSize(const ge::Shape srcShape, const uint32_t typeSize, const bool isReuseSource)

数据类型定义

  • LocalTensor
    • SetAddrWithOffset
    • Print [CPU ONLY]
    • ToFile [CPU ONLY]
    • GetBufferHandle
    • GetPosition
    • GetLength
    • SetBufferLen
    • SetShapeInfo
    • GetShapeInfo
    • SetAddr
    • GetPhyAddr
    • operator()
  • GetShapeSize
  • GlobalTensor
    • SetAddr
    • GetValue
    • SetValue
    • SetShapeInfo
    • GetShapeInfo
    • operator()
  • TensorDesc类
  • ListTensorDesc类

内存管理和同步控制

  • TPipe
    • Init
    • GetAbsAddr
    • InitShareBufStart
    • InitShareBufEnd
    • GetQueueEndAddress
    • Reset
    • Destroy
    • GetBaseAddr
    • ReleaseEvent
    • IsAivTscm
    • GetBaseAddr [cpu only]
  • TBuf

    GetWithOffset

    SetTpipeBuf

  • __aicore__ constexpr Hardware GetPhyType(TPosition pos);
  • template <typename T, TPosition pos> __aicore__ inline bool PopStackBuffer(LocalTensor<T>& popLocal);
  • template <TPosition pos> __aicore__ inline bool PopStackBuffer(TBuf<pos>& popBuffer, TBufType& bufStart);

矢量计算

  • template <typename T, bool isSetMask = true> __aicore__ inline void RepeatReduceSum(const LocalTensor<T>& dstLocal, const LocalTensor<T>& srcLocal, const int32_t repeat, const int32_t elemsInOneRepeate, const int32_t dstBlkStride, const int32_t srcBlkStride, const int32_t dstRepStride, const int32_t srcRepStride);
  • template <typename T, typename U> __aicore__ inline void Select(const LocalTensor<T>& dstLocal, const LocalTensor<U>& selMask, const LocalTensor<T>& src0Local, uint8_t repeatTimes, const BinaryRepeatParams& repeatParams);
  • template <typename T, SELMODE selMode> __aicore__ inline void Select(const LocalTensor<T>& dstLocal, const LocalTensor<T>& src0Local, const LocalTensor<T>& src1Local, uint8_t repeatTimes, const BinaryRepeatParams& repeatParams);
  • template <typename T> __aicore__ inline void GatherMask(const LocalTensor<T>& dstLocal, const LocalTensor<T>& src0Local, const LocalTensor<T>& src1Local, const uint8_t patternMode, const GatherMaskParams& gatherMaskParams);
  • template <typename T> __aicore__ inline void GatherMask(const LocalTensor<T>& dstLocal, const LocalTensor<T>& src0Local, const uint8_t patternMode, const GatherMaskParams& gatherMaskParams);

矩阵计算

  • template <typename T> __aicore__ inline void InitConstValue(const LocalTensor<T> &dstLocal, const InitConstValueParams<T> &initConstValueParams);
  • template <typename T> __aicore__ inline void LoadDataWithTranspose(const LocalTensor<T>& dstLocal, const LocalTensor<T>& srcLocal, const LoadData2dTransposeParams& loadDataParams);
  • __aicore__ inline void SetFmatrix(uint16_t l1H, uint16_t l1W, const uint8_t padList[4], const FmatrixMode &fmatrixMode);
  • __aicore__ inline void SetLoadDataRepeat(const LoadDataRepeatParam& repeatParams);
  • template <typename T> __aicore__ inline void SetLoadDataPaddingValue(const T padValue);
  • __aicore__ inline void SetLoadDataBoundary(uint32_t boundaryValue);
  • template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const LocalTensor<dst_T>& dstLocal, const LocalTensor<src_T>& srcLocal, const FixpipeParams<src_T>& intriParams);
  • template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const LocalTensor<dst_T>& dstLocal, const LocalTensor<src_T>& srcLocal, const LocalTensor<uint64_t>& cbufWorkspace, const FixpipeParams<src_T>& intriParams);
  • template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const GlobalTensor<dst_T>& dstGlobal, const LocalTensor<src_T>& srcLocal, const FixpipeParams<src_T>& intriParams);
  • template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const GlobalTensor<dst_T>& dstGlobal, const LocalTensor<src_T>& srcLocal, const LocalTensor<uint64_t>& cbufWorkspace, const FixpipeParams<src_T>& intriParams);
  • template <typename DstT, typename SrcT, const FixpipeConfig& config = CFG_ROW_MAJOR> void Fixpipe(const LocalTensor<DstT>& dstLocal, const LocalTensor<SrcT>& srcLocal, const FixpipeParamsV220& intriParams);
  • template <typename DstT, typename SrcT, const FixpipeConfig& config = CFG_ROW_MAJOR> void Fixpipe(const LocalTensor<DstT>& dstLocal, const LocalTensor<SrcT>& srcLocal, const LocalTensor<uint64_t>& cbufWorkspace, const FixpipeParamsV220& intriParams);
  • template <typename T> __aicore__ inline void SetFixPipeConfig(const LocalTensor<T> &reluPre, const LocalTensor<T> &quantPre, bool isUnitFlag = false);
  • template <typename T, bool setRelu = false> __aicore__ inline void SetFixPipeConfig(const LocalTensor<T> &preTensor, bool isUnitFlag = false);
  • __aicore__ inline void SetFixpipeNz2ndFlag(uint16_t ndNum, uint16_t srcNdStride, uint16_t dstNdStride);
  • __aicore__ inline void SetFixpipePreQuantFlag(uint64_t config);
  • template<typename T> __aicore__ inline void SetLeakyReluAlpha(T scaleValue);
  • template <typename T, typename U> __aicore__ inline __inout_pipe__(V) void BroadCastVecToMM(const LocalTensor<T> &dstLocal, const LocalTensor<U> &srcLocal, const int32_t blockCount, const uint8_t blockLen, const uint8_t srcGap, const uint8_t dstGap);

其他

  • __aicore__ inline __gm__ uint8_t* __gm__ SetDumpWorkSpacePtr(__gm__ uint8_t* workspace)
  • __aicore__ inline __gm__ uint8_t* __gm__ GetDumpWorkSpacePtr()
  • __aicore__ void SetSysWorkSpacePtr(__gm__ uint8_t* workspace)
  • __aicore__ inline AscendC::RpcCommClient* GetRpcClient()
  • __aicore__ inline void ResetMask()
  • __aicore__ inline void SetLoadDataBoundary(uint32_t boundaryValue)
  • template <MemDsbT arg0> __aicore__ inline void DataSyncBarrier()
  • template <HardEventevent, MemoryT memT, bool isVirtual> __aicore__ inline void HSetFlag(int32_t eventID)
  • template <HardEvent event, MemoryT memT, bool isVirtual> __aicore__ inline void WaitFlag(int32_t eventID)
  • __aicore__ inline void PreLoad(const int64_t prefetchLen)
  • template <typename T> __aicore__ inline __inout_pipe__(S) void InitSyncID(GlobalTensor<T> gmWorkspace);
  • template <typename T> __aicore__ inline __in_pipe__(V) __out_pipe__(MTE3) void InitOutput(GlobalTensor<T> gmWorkspaceAddr, uint32_t size, T value = 0);
  • __aicore__ inline void InitDetermineComputeWorkspace(GlobalTensor<int32_t> &gmWorkspace, LocalTensor<int32_t> &ubWorkspace);
  • __aicore__ inline void NotifyNextBlock(GlobalTensor<int32_t> &gmWorkspace, LocalTensor<int32_t> &ubWorkspace);
  • __aicore__ inline void WaitPreBlock(GlobalTensor<int32_t> &gmWorkspace, LocalTensor<int32_t> &ubWorkspace);
  • template <typename T> __aicore__ inline void Concat(LocalTensor<T> &concatLocal, const LocalTensor<T> &srcLocal, const LocalTensor<T> &tmpLocal, const int32_t repeatTimes);
  • template <typename T> __aicore__ inline void Extract(const LocalTensor<T> &dstValueLocal, const LocalTensor<uint32_t> &dstIndexLocal, const LocalTensor<T> &sortedLocal, const int32_t repeatTimes);
  • template <typename T, bool isFullSort> __aicore__ inline void Sort(const LocalTensor<T> &dstLocal, const LocalTensor<T> &concatLocal, const LocalTensor<uint32_t> &indexLocal, LocalTensor<T> &tmpLocal, const int32_t repeatTimes);
  • template <typename T> __aicore__ inline void MrgSort(const LocalTensor<T>& dstLocal, const MrgSortSrcList<T>& srcLocal, const MrgSort4Info& params);
  • template <typename T, bool isExhaustedSuspension = false> __aicore__ inline void MrgSort(const LocalTensor<T> &dstLocal, const MrgSortSrcList<T> &sortList, const uint16_t elementCountList[4], uint32_t sortedNum[4], uint16_t validBit, const int32_t repeatTimes);
  • template <typename T> __aicore__ inline uint32_t GetSortLen(const uint32_t elemCount);
  • template <typename T> __aicore__ inline uint32_t GetSortOffset(const uint32_t elemOffset);
  • inline uint32_t GetSortTmpSize(const platform_ascendc::PlatformAscendC &ascendcPlatform, const uint32_t elemCount, const uint32_t dataTypeSize);
  • inline uint32_t GetConcatTmpSize(const platform_ascendc::PlatformAscendC &ascendcPlatform, const uint32_t elemCount, const uint32_t dataTypeSize);
  • __aicore__ inline void WaitEvent(uint16_t flagId);
  • emplate<pipe_t pipe> __aicore__ inline void NotifyEvent(uint16_t flagId);
  • __aicore__ inline int64_t GetStoreAtomicConfig();
  • template <AtomicDtype type, AtomicOp op> __aicore__ inline void SetStoreAtomicConfig();
  • __aicore__ inline int64_t GetAccVal();
  • __aicore__ inline int64_t GetReduceMaxMinCount();
  • __aicore__ inline void InitDump(uint32_t gmLen);
  • __aicore__ inline void InitDump(GM_ADDR dumpStartAddr, uint32_t gmLen);
  • template <typename T> __aicore__ inline void DumpAccChkPoint(LocalTensor<T> &tensor, uint32_t index, uint32_t countOff, uint32_t dumpSize);
  • template <typename T> __aicore__ inline void DumpAccChkPoint(GlobalTensor<T> &tensor, uint32_t index, uint32_t countOff, uint32_t dumpSize);
  • #define GET_TILING_DATA_WITH_STRUCT(tiling_struct, tiling_data, tiling_arg) tiling_struct tiling_data
  • #define GET_TILING_DATA_MEMBER(tiling_type, member, var, tiling) tiling_type point##var
  • #define KERNEL_TASK_TYPE(key, value) ENABLE_FEATURE_FOR_COMPILE(key, value)
  • templete<typename T> void TransDataTo5HD(uint64_t dstLocalList[16], uint64_t srcLocalList[16], const TransDataTo5HDParams& transDataParams);
  • template <typename T> __aicore__ inline void SetCmpMask(const LocalTensor<T>& src);