SetCmpMask (ISASI)

Function Usage

Sets the comparison register for the APIs where Select does not specify the mask parameter. Different data is specified based on different selMode values.

  • Mode 0 (SELMODE::VSEL_CMPMASK_SPR)

    The selMask LocalTensor is specified in SetCmpMask.

  • Mode 1 (SELMODE::VSEL_TENSOR_SCALAR_MODE)

    The src1Local LocalTensor is specified in SetCmpMask.

  • Mode 2 (SELMODE::VSEL_TENSOR_TENSOR_MODE)

    LocalTensor is specified in SetCmpMask, and LocalTensor stores the address of selMask.

Prototype

1
2
template <typename T>
__aicore__ inline void SetCmpMask(const LocalTensor<T>& src)

Parameters

Table 1 Parameters

Parameter

Input/Output

Description

src

Input

The type is LocalTensor, and the supported TPosition is VECIN, VECCALC, or VECOUT.

The start address of the LocalTensor must be 32-byte aligned.

Returns

None

Availability

Constraints

None

Example

  • When selMode is set to mode 0 or mode 2:
    AscendC::LocalTensor<float> dst;
    AscendC::LocalTensor<uint8_t> sel;
    AscendC::LocalTensor<float> src0;
    AscendC::LocalTensor<float> src1;
    uint8_t repeat = 4;
    uint32_t mask = 64;
    AscendC::BinaryRepeatParams repeatParams = { 1, 1, 1, 8, 8, 8 };
    
    // Set selMode to mode 0 (SELMODE::VSEL_CMPMASK_SPR).
    AscendC::SetCmpMask(sel);
    AscendC::PipeBarrier<PIPE_V>();
    AscendC::SetVectorMask<float>(mask);
    AscendC::Select<float, AscendC::SELMODE::VSEL_CMPMASK_SPR>(dst, src0, src1, repeat, repeatParams);
    
    // Set selMode to mode 2 (SELMODE::VSEL_TENSOR_TENSOR_MODE).
    AscendC::LocalTensor<int32_t> tempBuf;
    #if defined(ASCENDC_CPU_DEBUG) && (ASCENDC_CPU_DEBUG = = 1) // CPU debugging
    tempBuf.ReinterpretCast<int64_t>().SetValue(0, reinterpret_cast<int64_t>(reinterpret_cast<__ubuf__ int64_t*>(sel.GetPhyAddr())));
    event_t eventIdSToV = static_cast<event_t>(AscendC::GetTPipePtr()->FetchEventID(AscendC::HardEvent::S_V));
    AscendC::SetFlag<AscendC::HardEvent::S_V>(eventIdSToV);
    AscendC::WaitFlag<AscendC::HardEvent::S_V>(eventIdSToV);
    #else // NPU debugging
    uint32_t selAddr = static_cast<uint32_t>(reinterpret_cast<int64_t>(reinterpret_cast<__ubuf__ int64_t*>(sel.GetPhyAddr())));
    AscendC::SetVectorMask<uint32_t>(32);
    AscendC::Duplicate<uint32_t, false>(tempBuf.ReinterpretCast<uint32_t>(), selAddr, AscendC::MASK_PLACEHOLDER, 1, 1, 8);
    AscendC::PipeBarrier<PIPE_V>();
    #endif
    AscendC::SetCmpMask<int64_t>(tempBuf.ReinterpretCast<int64_t>());
    AscendC::PipeBarrier<PIPE_V>();
    AscendC::SetVectorMask<float>(mask);
    AscendC::Select<float, AscendC::SELMODE::VSEL_TENSOR_TENSOR_MODE>(dst, src0, src1, repeat, repeatParams);
  • When selMode is set to mode 1:
    AscendC::LocalTensor<float> dst;
    AscendC::LocalTensor<uint8_t> sel;
    AscendC::LocalTensor<float> src0;
    AscendC::LocalTensor<float> tmpScalar;
    uint8_t repeat = 4;
    uint32_t mask = 64;
    AscendC::BinaryRepeatParams repeatParams = { 1, 1, 1, 8, 8, 8 };
    
    // Set selMode to mode 1 (SELMODE::VSEL_TENSOR_SCALAR_MODE).
    AscendC::SetVectorMask<uint32_t>(32);
    AscendC::Duplicate<float, false>(tmpScalar, static_cast<float>(1.0), MASK_PLACEHOLDER, 1, 1, 8);
    AscendC::PipeBarrier<PIPE_V>();
    AscendC::SetCmpMask(tmpScalar);
    AscendC::PipeBarrier<PIPE_V>();
    AscendC::SetVectorMask<float>(mask);
    AscendC::Select<float, uint8_t>(dst, sel, src0, repeat, repeatParams);