mad
功能说明
矩阵乘运算,c = a * b + c。
接口原型
void mad(__cc__ float *c, __ca__ float *a, __cb__ float *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlag, bool kDirectionAlign, bool cmatrixSource, bool cmatrixInitVal); void mad(__cc__ float *dst, __ca__ bfloat16_t *src0, __cb__ bfloat16_t *src1, uint16_t m, uint16_t k, uint16_t n, uint8_t featOffset, uint8_t smaskOffset, uint8_t unitFlag, bool kDirectionAlign, bool isWeightOffset, bool cmatrixSource, bool cmatrixInitVal); void mad(__cc__ int32_t *c, __ca__ int8_t *a, __cb__ int8_t *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlag, bool kDirectionAlign, bool cmatrixSource, bool cmatrixInitVal); void mad(__cc__ float *c, __ca__ bfloat16_t *a, __cb__ bfloat16_t *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlag, bool kDirectionAlign, bool cmatrixSource, bool cmatrixInitVal); void mad(__cc__ float *c, __ca__ half *a, __cb__ half *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlag, bool kDirectionAlign, bool cmatrixSource, bool cmatrixInitVal); void mad(__cc__ float *c, __ca__ half *a, __cb__ half *b, uint16_t m, uint16_t k, uint16_t n, bool init_val_controlC); void mad(__cc__ int32_t *c, __ca__ int8_t *a, __cb__ int8_t *b, uint16_t m, uint16_t k, uint16_t n, bool init_val_controlC); void mad(__cc__ int32_t *dst, __ca__ int8_t *src0, __cb__ int8_t *src1, uint16_t m, uint16_t k, uint16_t n, uint8_t featOffset, uint8_t smaskOffset, uint8_t unitFlag, bool kDirectionAlign, bool isWeightOffset, bool cmatrixSource, bool cmatrixInitVal); void mad(__cc__ float *dst, __ca__ float *src0, __cb__ float *src1, uint16_t m, uint16_t k, uint16_t n, uint8_t featOffset, uint8_t smaskOffset, uint8_t unitFlag, bool kDirectionAlign, bool isWeightOffset, bool cmatrixSource, bool cmatrixInitVal); void mad(__cc__ float *dst, __ca__ half *src0, __cb__ half *src1, uint16_t m, uint16_t k, uint16_t n, uint8_t featOffset, uint8_t smaskOffset, uint8_t unitFlag, bool kDirectionAlign, bool isWeightOffset, bool cmatrixSource, bool cmatrixInitVal); void mad_s4(__cc__ int32_t *c, __ca__ void *a, __cb__ void *b, uint16_t m, uint16_t k, uint16_t n, bool init_val_controlC); void mad_s4(__cc__ int32_t *c, __ca__ void *a, __cb__ void *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlag, bool kDirectionAlign, bool cmatrixSource, bool cmatrixInitVal); void mad_s4(__cc__ int32_t *dst, __ca__ void *src0, __cb__ void *src1, uint16_t m, uint16_t k, uint16_t n, uint8_t featOffset, uint8_t smaskOffset, uint8_t unitFlag, bool kDirectionAlign, bool isWeightOffset, bool cmatrixSource, bool cmatrixInitVal); void mad_sp(__cc__ int32_t *c, __ca__ int8_t *a, __cb__ int8_t *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlagMode, bool cmatrixSource, bool cmatrixInitVal);
参数说明
参数名 |
说明 |
取值范围 |
单位 |
---|---|---|---|
c |
目的地址,矩阵c地址,存放在L0C上 |
/ |
/ |
a |
左矩阵源地址,存放在L0A上 |
/ |
/ |
b |
右矩阵源地址,存放在L0B上 |
/ |
/ |
m |
左矩阵的高 |
[0, 2^12-1] |
elem |
k |
左矩阵的宽,右矩阵的高 |
[0, 2^12-1] |
elem |
n |
右矩阵的宽 |
[0, 2^12-1] |
elem |
unitFlag |
预留参数,设置为0即可 |
/ |
/ |
kDirectionAlign |
预留参数,设置为False即可 |
[0, 1] |
/ |
cmatrixSource |
True表示初始c矩阵在bias table buffer上,False表示初始c矩阵在L0C上 |
[0, 1] |
/ |
cmatrixInitVal |
True表示c矩阵内容为0,False使用c矩阵中具体数据 |
[0, 1] |
/ |
featOffset |
特征图矩阵偏移。如果数据类型为s8,则偏移量被视为s8;如果数据类型为u8,则偏移量被视为u8。如果不需要偏移量,则设置0 |
[0, 2^8-1] |
elem |
smaskOffset |
SMASK缓冲区地址以2字节为单位加载权重偏移量。设置为1 表示SMASK缓冲区读取地址为2B |
[0, 2^7-1] |
2B |
isWeightOffset |
权重矩阵偏移使能位 |
[0, 1] |
elem |
流水类型
PIPE_M
父主题: 矩阵计算接口