load_cbuf_to_cb
功能说明
实现数据从L1搬运到L0B,这条接口在完成搬运时可以通过transpose参数实现简单的分形矩阵转置,但仅对b16类型有效。如果数据类型为b4、b8、b32需要转置,请参考load_cbuf_to_cb_transpose接口。
接口原型
// 相同接口的不同原型区别在于源地址和目的地址的数据类型不同 void load_cbuf_to_cb(__cb__ bfloat16_t *dst, __cbuf__ bfloat16_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, bool transpose, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_cbuf_to_cb(__cb__ half *dst, __cbuf__ half *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, bool transpose, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_cbuf_to_cb(__cb__ float *dst, __cbuf__ float *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, bool transpose, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_cbuf_to_cb(__cb__ int32_t *dst, __cbuf__ int32_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, bool transpose, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_cbuf_to_cb(__cb__ int8_t *dst, __cbuf__ int8_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, bool transpose, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_cbuf_to_cb(__cb__ uint32_t *dst, __cbuf__ uint32_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, bool transpose, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_cbuf_to_cb(__cb__ uint8_t *dst, __cbuf__ uint8_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, bool transpose, __cce_scalar::addr_cal_mode_t addr_cal_mode); // void *为s4 void load_cbuf_to_cb_s4(__cb__ void *dst, __cbuf__ void *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, bool transpose, __cce_scalar::addr_cal_mode_t addr_cal_mode);
参数说明
参数含义见表1 矩阵输入搬运参数说明。
流水类型
PIPE_MTE1
父主题: 矩阵输入搬运