昇腾社区首页
中文
注册
开发者
下载

load_gm_to_cb

功能说明

实现数据从GM搬运到L0B,该接口不具备分形矩阵转置的能力,仅做简单搬运。

接口原型

// 相同接口的不同原型区别在于源地址和目的地址的数据类型不同
void load_gm_to_cb(__cb__ bfloat16_t *dst, __gm__ bfloat16_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode);

void load_gm_to_cb(__cb__ half *dst, __gm__ half *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode);

void load_gm_to_cb(__cb__ float *dst, __gm__ float *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode);

void load_gm_to_cb(__cb__ int32_t *dst, __gm__ int32_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode);

void load_gm_to_cb(__cb__ int8_t *dst, __gm__ int8_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode);

void load_gm_to_cb(__cb__ uint32_t *dst, __gm__ uint32_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode);

void load_gm_to_cb(__cb__ uint8_t *dst, __gm__ uint8_t *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode);

// void *为s4
void load_gm_to_cb_s4(__cb__ void *dst, __gm__ void *src, uint16_t baseIdx, uint8_t repeat, uint16_t srcStride, uint16_t dstGap, uint8_t sid, __cce_scalar::addr_cal_mode_t addr_cal_mode);

参数说明

参数含义见表1 矩阵输入搬运参数说明

流水类型

PIPE_MTE2