昇腾社区首页
中文
注册
开发者
下载

vcmpv

功能说明

逐元素比较向量 src0 与向量 src1 的每个元素,如果比较后的结果为真,则输出结果的对应比特位为1,否则为0。

元素类型为f16时,向量中元素个数为128,因此结果是一个连续的128bit,写入dst中,重复计算时,新的dst = dst + 16Bytes。

元素类型为f32时,向量中元素个数为64,因此结果是一个连续的64bit,写入dst中,重复计算时,新的dst = dst + 8Bytes。

支持多种比较接口:

  • EQ:src0等于src1(equal to)
  • NE:src0不等于src1(not equal to)
  • LT:src0小于src1(less than)
  • GT:src0大于src1(greater than)
  • LE:src0小于或等于src1(less than or equal to)
  • GE:src0大于或等于src1(greater than or equal to)

int32类型只支持eq接口。

该接口无MASK参数。

接口原型

// 相同接口的不同原型区别在于源地址和目的地址的数据类型不同
// vcmpv_eq
void vcmpv_eq(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

void vcmpv_eq(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

void vcmpv_eq(__ubuf__ uint8_t *dst, __ubuf__ int32_t *src0, __ubuf__ int32_t *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

// vcmpv_ne
void vcmpv_ne(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

void vcmpv_ne(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

// vcmpv_lt
void vcmpv_lt(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

void vcmpv_lt(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

// vcmpv_gt
void vcmpv_gt(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

void vcmpv_gt(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

// vcmpv_le
void vcmpv_le(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

void vcmpv_le(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

// vcmpv_ge
void vcmpv_ge(__ubuf__ uint8_t *dst, __ubuf__ half *src0, __ubuf__ half *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

void vcmpv_ge(__ubuf__ uint8_t *dst, __ubuf__ float *src0, __ubuf__ float *src1, uint8_t repeat, uint8_t dstBlockStride, uint8_t src0BlockStride, uint8_t src1BlockStride, uint8_t dstRepeatStride, uint8_t src0RepeatStride, uint8_t src1RepeatStride);

参数说明

参数含义见 表2 双目运算参数说明

流水类型

PIPE_V