vcmpv
功能说明
逐元素比较向量 src0 与向量 src1 的每个元素,如果比较后的结果为真,则输出结果的对应比特位为1,否则为0。
元素类型为f16时,向量中元素个数为128,因此结果是一个连续的128bit,写入dst中,重复计算时,新的dst = dst + 16Bytes。
元素类型为f32时,向量中元素个数为64,因此结果是一个连续的64bit,写入dst中,重复计算时,新的dst = dst + 8Bytes。
支持多种比较接口:
- EQ:src0等于src1(equal to)
- NE:src0不等于src1(not equal to)
- LT:src0小于src1(less than)
- GT:src0大于src1(greater than)
- LE:src0小于或等于src1(less than or equal to)
- GE:src0大于或等于src1(greater than or equal to)
int32类型只支持eq接口。
该接口无MASK参数。
接口原型
// 相同接口的不同原型区别在于源地址和目的地址的数据类型不同 // vcmpv_eq void vcmpv_eq(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); void vcmpv_eq(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); void vcmpv_eq(__ubuf__ uint8_t *dst, __ubuf__ int32_t *src0, __ubuf__ int32_t *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); // vcmpv_ne void vcmpv_ne(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); void vcmpv_ne(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); // vcmpv_lt void vcmpv_lt(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); void vcmpv_lt(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); // vcmpv_gt void vcmpv_gt(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); void vcmpv_gt(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); // vcmpv_le void vcmpv_le(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); void vcmpv_le(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); // vcmpv_ge void vcmpv_ge(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride); void vcmpv_ge(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);
参数说明
参数含义见 表2 双目运算参数说明。
流水类型
PIPE_V
父主题: 比较运算