vadds/vmuls
功能说明
以block(32Byte)为单位完成以下计算,一次完成8个block的计算。
each_element_of([dst]) = a + each_element_of([src])
函数原型
void vadds(__ubuf__ half *dst, __ubuf__ half *src, half a, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vadds(__ubuf__ float *dst, __ubuf__ float *src, float a, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vadds(__ubuf__ int16_t *dst, __ubuf__ int16_t *src, int16_t a, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vadds(__ubuf__ int32_t *dst, __ubuf__ int32_t *src, int32_t a, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride);
流水类型
PIPE_V
同类型接口
// vmuls void vmuls(__ubuf__ half *dst, __ubuf__ half *src0, half src1, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vmuls(__ubuf__ float *dst, __ubuf__ float *src0, float src1, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vmuls(__ubuf__ int16_t *dst, __ubuf__ int16_t *src0, int16_t src1, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride); void vmuls(__ubuf__ int32_t *dst, __ubuf__ int32_t *src0, int32_t src1, uint8_t repeat, uint16_t dstBlockStride, uint16_t srcBlockStride, uint16_t dstRepeatStride, uint16_t srcRepeatStride);
父主题: 双目运算