昇腾社区首页
中文
注册
开发者
下载

load_cbuf_to_ca_transpose

功能说明

实现数据从L1搬运到L0A,这条接口在完成搬运时将同时实现简单的分形矩阵转置,相较load_cbuf_to_ca,其区别在于:本类接口始终带有转置操作,而load_cbuf_to_ca的转置操作由参数transpose控制;并且本类接口对数据类型的支持范围更广(支持{b4, b8, b16, b32}),load_cbuf_to_ca若需实现分形的转置仅支持b16数据类型。

接口原型

// 相同接口的不同原型区别在于源地址和目的地址的数据类型不同
void load_cbuf_to_ca_transpose(__ca__ bfloat16_t *dst, __cbuf__ bfloat16_t *src, uint16_t indexID, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, bool addrmode, uint16_t dstFracGap);

void load_cbuf_to_ca_transpose(__ca__ half *dst, __cbuf__ half *src, uint16_t indexID, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, bool addrmode, uint16_t dstFracGap);

void load_cbuf_to_ca_transpose(__ca__ float *dst, __cbuf__ float *src, uint16_t indexID, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, bool addrmode, uint16_t dstFracGap);

void load_cbuf_to_ca_transpose(__ca__ int32_t *dst, __cbuf__ int32_t *src, uint16_t indexID, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, bool addrmode, uint16_t dstFracGap);

void load_cbuf_to_ca_transpose(__ca__ int8_t *dst, __cbuf__ int8_t *src, uint16_t indexID, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, bool addrmode, uint16_t dstFracGap);

void load_cbuf_to_ca_transpose(__ca__ uint32_t *dst, __cbuf__ uint32_t *src, uint16_t indexID, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, bool addrmode, uint16_t dstFracGap);

void load_cbuf_to_ca_transpose(__ca__ uint8_t *dst, __cbuf__ uint8_t *src, uint16_t indexID, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, bool addrmode, uint16_t dstFracGap);

流水类型

PIPE_MTE1