load_gm_to_cb
功能说明
实现数据从GM搬运到L0B,该接口不具备分形矩阵转置的能力,仅做简单搬运。
接口原型
// 相同接口的不同原型区别在于源地址和目的地址的数据类型不同 void load_gm_to_cb(__cb__ bfloat16_t *dst, __gm__ bfloat16_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_gm_to_cb(__cb__ half *dst, __gm__ half *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_gm_to_cb(__cb__ float *dst, __gm__ float *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_gm_to_cb(__cb__ int32_t *dst, __gm__ int32_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_gm_to_cb(__cb__ int8_t *dst, __gm__ int8_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_gm_to_cb(__cb__ uint32_t *dst, __gm__ uint32_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode); void load_gm_to_cb(__cb__ uint8_t *dst, __gm__ uint8_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode); // void *为s4 void load_gm_to_cb_s4(__cb__ void *dst, __gm__ void *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode);
参数说明
参数含义见表1 矩阵输入搬运参数说明。
流水类型
PIPE_MTE2
父主题: 矩阵输入搬运