SetCmpMask (ISASI)

Applicability

Product

Supported/Unsupported

Atlas A3 training products/Atlas A3 inference products

Atlas A2 training products/Atlas A2 inference products

Atlas 200I/500 A2 inference products

x

Atlas inference product's AI Core

Atlas inference product's Vector Core

x

Atlas training products

x

Function Usage

Sets the comparison register for the APIs where Select does not specify the mask parameter. Different data is specified based on different selMode values.

  • Mode 0 (SELMODE::VSEL_CMPMASK_SPR)

    The selMask LocalTensor is specified in SetCmpMask.

  • Mode 1 (SELMODE::VSEL_TENSOR_SCALAR_MODE)

    Pass the src1 LocalTensor to SetCmpMask.

  • Mode 2 (SELMODE::VSEL_TENSOR_TENSOR_MODE)

    LocalTensor is specified in SetCmpMask, and LocalTensor stores the address of selMask.

Prototype

1
2
template <typename T>
__aicore__ inline void SetCmpMask(const LocalTensor<T>& src)

Parameters

Table 1 Parameters in the template

Parameter

Description

T

Data type of the operand.

Table 2 Parameters

Parameter

Input/Output

Description

src

Input

The type is LocalTensor, and the supported TPosition is VECIN, VECCALC, or VECOUT.

The start address of the LocalTensor must be 16-byte aligned.

Returns

None

Constraints

None

Examples

  • When selMode is set to mode 0 or mode 2:
     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    uint32_t dataSize = 256;
    uint32_t selDataSize = 8;
    TPipe pipe;
    TQue<TPosition::VECIN, 1> inQueueX;
    TQue<TPosition::VECIN, 1> inQueueY;
    TQue<TPosition::VECIN, 1> inQueueSel;
    TQue<TPosition::VECOUT, 1> outQueue;
    pipe.InitBuffer(inQueueX, 1, dataSize * sizeof(float));
    pipe.InitBuffer(inQueueY, 1, dataSize * sizeof(float));
    pipe.InitBuffer(inQueueSel, 1, selDataSize * sizeof(uint8_t));
    pipe.InitBuffer(outQueue, 1, dataSize * sizeof(float));
    AscendC::LocalTensor<float> dst = outQueue.AllocTensor<float>();
    AscendC::LocalTensor<uint8_t> sel = inQueueSel.AllocTensor<uint8_t>();
    AscendC::LocalTensor<float> src0 = inQueueX.AllocTensor<float>();
    AscendC::LocalTensor<float> src1 = inQueueY.AllocTensor<float>();
    uint8_t repeat = 4;
    uint32_t mask = 64;
    AscendC::BinaryRepeatParams repeatParams = { 1, 1, 1, 8, 8, 8 };
    
    // Set selMode to mode 0 (SELMODE::VSEL_CMPMASK_SPR).
    AscendC::SetCmpMask(sel);
    AscendC::PipeBarrier<PIPE_V>();
    AscendC::SetVectorMask<float>(mask);
    AscendC::Select<float, AscendC::SELMODE::VSEL_CMPMASK_SPR>(dst, src0, src1, repeat, repeatParams);
    
    // Set selMode to mode 2 (SELMODE::VSEL_TENSOR_TENSOR_MODE).
    AscendC::LocalTensor<int32_t> tempBuf;
    #if defined(ASCENDC_CPU_DEBUG) && (ASCENDC_CPU_DEBUG = = 1) // CPU debugging
    tempBuf.ReinterpretCast<int64_t>().SetValue(0, reinterpret_cast<int64_t>(reinterpret_cast<__ubuf__ int64_t*>(sel.GetPhyAddr())));
    event_t eventIdSToV = static_cast<event_t>(AscendC::GetTPipePtr()->FetchEventID(AscendC::HardEvent::S_V));
    AscendC::SetFlag<AscendC::HardEvent::S_V>(eventIdSToV);
    AscendC::WaitFlag<AscendC::HardEvent::S_V>(eventIdSToV);
    #else // NPU debugging
    uint32_t selAddr = static_cast<uint32_t>(reinterpret_cast<int64_t>(reinterpret_cast<__ubuf__ int64_t*>(sel.GetPhyAddr())));
    AscendC::SetVectorMask<uint32_t>(32);
    AscendC::Duplicate<uint32_t, false>(tempBuf.ReinterpretCast<uint32_t>(), selAddr, AscendC::MASK_PLACEHOLDER, 1, 1, 8);
    AscendC::PipeBarrier<PIPE_V>();
    #endif
    AscendC::SetCmpMask<int64_t>(tempBuf.ReinterpretCast<int64_t>());
    AscendC::PipeBarrier<PIPE_V>();
    AscendC::SetVectorMask<float>(mask);
    AscendC::Select<float, AscendC::SELMODE::VSEL_TENSOR_TENSOR_MODE>(dst, src0, src1, repeat, repeatParams);
    
  • When selMode is set to mode 1:
     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    uint32_t dataSize = 256;
    uint32_t selDataSize = 8;
    TPipe pipe;
    TQue<TPosition::VECIN, 1> inQueueX;
    TQue<TPosition::VECIN, 1> inQueueY;
    TQue<TPosition::VECIN, 1> inQueueSel;
    TQue<TPosition::VECOUT, 1> outQueue;
    pipe.InitBuffer(inQueueX, 1, dataSize * sizeof(float));
    pipe.InitBuffer(inQueueY, 1, dataSize * sizeof(float));
    pipe.InitBuffer(inQueueSel, 1, selDataSize * sizeof(uint8_t));
    pipe.InitBuffer(outQueue, 1, dataSize * sizeof(float));
    AscendC::LocalTensor<float> dst = outQueue.AllocTensor<float>();
    AscendC::LocalTensor<uint8_t> sel = inQueueSel.AllocTensor<uint8_t>();
    AscendC::LocalTensor<float> src0 = inQueueX.AllocTensor<float>();
    AscendC::LocalTensor<float> tmpScalar = inQueueY.AllocTensor<float>();
    
    uint8_t repeat = 4;
    uint32_t mask = 64;
    AscendC::BinaryRepeatParams repeatParams = { 1, 1, 1, 8, 8, 8 };
    
    // Set selMode to mode 1 (SELMODE::VSEL_TENSOR_SCALAR_MODE).
    AscendC::SetVectorMask<uint32_t>(32);
    AscendC::Duplicate<float, false>(tmpScalar, static_cast<float>(1.0), MASK_PLACEHOLDER, 1, 1, 8);
    AscendC::PipeBarrier<PIPE_V>();
    AscendC::SetCmpMask(tmpScalar);
    AscendC::PipeBarrier<PIPE_V>();
    AscendC::SetVectorMask<float>(mask);
    AscendC::Select(dst, sel, src0, repeat, repeatParams);