昇腾社区首页
中文
注册

mad

功能说明

矩阵乘运算,c = a * b + c。

接口原型

void mad(__cc__ float *c, __ca__ float *a, __cb__ float *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlag, bool kDirectionAlign, bool cmatrixSource, bool cmatrixInitVal);

void mad(__cc__ float *dst, __ca__ bfloat16_t *src0, __cb__ bfloat16_t *src1, uint16_t m, uint16_t k, uint16_t n, uint8_t featOffset, uint8_t smaskOffset, uint8_t unitFlag, bool kDirectionAlign, bool isWeightOffset, bool cmatrixSource, bool cmatrixInitVal);


void mad(__cc__ int32_t *c, __ca__ int8_t *a, __cb__ int8_t *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlag, bool kDirectionAlign, bool cmatrixSource, bool cmatrixInitVal);

void mad(__cc__ float *c, __ca__ bfloat16_t *a, __cb__ bfloat16_t *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlag, bool kDirectionAlign, bool cmatrixSource, bool cmatrixInitVal);

void mad(__cc__ float *c, __ca__ half *a, __cb__ half *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlag, bool kDirectionAlign, bool cmatrixSource, bool cmatrixInitVal);

void mad(__cc__ float *c, __ca__ half *a, __cb__ half *b, uint16_t m, uint16_t k, uint16_t n, bool init_val_controlC);

void mad(__cc__ int32_t *c, __ca__ int8_t *a, __cb__ int8_t *b, uint16_t m, uint16_t k, uint16_t n, bool init_val_controlC);

void mad(__cc__ int32_t *dst, __ca__ int8_t *src0, __cb__ int8_t *src1, uint16_t m, uint16_t k, uint16_t n, uint8_t featOffset, uint8_t smaskOffset, uint8_t unitFlag, bool kDirectionAlign, bool isWeightOffset, bool cmatrixSource, bool cmatrixInitVal);

void mad(__cc__ float *dst, __ca__ float *src0, __cb__ float *src1, uint16_t m, uint16_t k, uint16_t n, uint8_t featOffset, uint8_t smaskOffset, uint8_t unitFlag, bool kDirectionAlign, bool isWeightOffset, bool cmatrixSource, bool cmatrixInitVal);

void mad(__cc__ float *dst, __ca__ half *src0, __cb__ half *src1, uint16_t m, uint16_t k, uint16_t n, uint8_t featOffset, uint8_t smaskOffset, uint8_t unitFlag, bool kDirectionAlign, bool isWeightOffset, bool cmatrixSource, bool cmatrixInitVal);
 
void mad_s4(__cc__ int32_t *c, __ca__ void *a, __cb__ void *b, uint16_t m, uint16_t k, uint16_t n, bool init_val_controlC); 

void mad_s4(__cc__ int32_t *c, __ca__ void *a, __cb__ void *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlag, bool kDirectionAlign, bool cmatrixSource, bool cmatrixInitVal);

void mad_s4(__cc__ int32_t *dst, __ca__ void *src0, __cb__ void *src1, uint16_t m, uint16_t k, uint16_t n, uint8_t featOffset, uint8_t smaskOffset, uint8_t unitFlag, bool kDirectionAlign, bool isWeightOffset, bool cmatrixSource, bool cmatrixInitVal); 

void mad_sp(__cc__ int32_t *c, __ca__ int8_t *a, __cb__ int8_t *b, uint16_t m, uint16_t k, uint16_t n, uint8_t unitFlagMode, bool cmatrixSource, bool cmatrixInitVal);

参数说明

表1 mad参数说明

参数名

说明

取值范围

单位

c

目的地址,矩阵c地址,存放在L0C上

/

/

a

左矩阵源地址,存放在L0A上

/

/

b

右矩阵源地址,存放在L0B上

/

/

m

左矩阵的高

[0, 2^12-1]

elem

k

左矩阵的宽,右矩阵的高

[0, 2^12-1]

elem

n

右矩阵的宽

[0, 2^12-1]

elem

unitFlag

预留参数,设置为0即可

/

/

kDirectionAlign

预留参数,设置为False即可

[0, 1]

/

cmatrixSource

True表示初始c矩阵在bias table buffer上,False表示初始c矩阵在L0C上

[0, 1]

/

cmatrixInitVal

True表示c矩阵内容为0,False使用c矩阵中具体数据

[0, 1]

/

featOffset

特征图矩阵偏移。如果数据类型为s8,则偏移量被视为s8;如果数据类型为u8,则偏移量被视为u8。如果不需要偏移量,则设置0

[0, 2^8-1]

elem

smaskOffset

SMASK缓冲区地址以2字节为单位加载权重偏移量。设置为1 表示SMASK缓冲区读取地址为2B

[0, 2^7-1]

2B

isWeightOffset

权重矩阵偏移使能位

[0, 1]

elem

流水类型

PIPE_M