vadds/vmuls
功能说明
计算源数据逐元素加上或者乘标量 a,计算公式如下:
// vadds each_element_of([dst]) = a + each_element_of([src]) // vmuls each_element_of([dst]) = a * each_element_of([src])
以 block(32Byte)为单位完成计算,一次完成8个 block 的计算。
上述接口均支持通过MASK控制哪些元素参与计算。
接口原型
// 相同接口的不同原型区别在于源地址和目的地址的数据类型不同。 // vadds void vadds(__ubuf__ half *dst, __ubuf__ half *src, half a, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vadds(__ubuf__ float *dst, __ubuf__ float *src, float a, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vadds(__ubuf__ int16_t *dst, __ubuf__ int16_t *src, int16_t a, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vadds(__ubuf__ int32_t *dst, __ubuf__ int32_t *src, int32_t a, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); // vmuls void vmuls(__ubuf__ half *dst, __ubuf__ half *src0, half src1, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vmuls(__ubuf__ float *dst, __ubuf__ float *src0, float src1, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vmuls(__ubuf__ int16_t *dst, __ubuf__ int16_t *src0, int16_t src1, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vmuls(__ubuf__ int32_t *dst, __ubuf__ int32_t *src0, int32_t src1, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride);
参数说明
参数含义见 表2 双目运算参数说明。
流水类型
PIPE_V
父主题: 双目运算