RpSort16

Function Usage

Sorts the Region Proposals based on their score fields in descending order. 16 Region Proposals are sorted in each iteration.

Prototype

1
2
template <typename T>
__aicore__ inline void RpSort16(const LocalTensor<T>& dstLocal, const LocalTensor<T>& srcLocal, const int32_t repeatTimes)

Parameters

Table 1 Parameters

Parameter

Input/Output

Meaning

dstLocal

Output

Destination operand, which stores sorted Region Proposals.

The type is LocalTensor, and the supported TPosition is VECIN, VECCALC, or VECOUT.

The start address of the LocalTensor must be 32-byte aligned.

For the Atlas Training Series Product, the supported data type is half.

srcLocal

Input

Source operand, which stores unsorted Region Proposals.

The type is LocalTensor, and the supported TPosition is VECIN, VECCALC, or VECOUT.

The start address of the LocalTensor must be 32-byte aligned.

For the Atlas Training Series Product, the supported data type is half.

repeatTimes

Input

Number of iteration repeats. The value is of the int32_t type. 16 proposals are sorted in each iteration. Value range: repeatTimes ∈ [0,255]

Availability

Atlas Training Series Product

Precautions

  • Ensure that the numbers of Region Proposals stored in srcLocal and dstLocal are greater than the required data number. Otherwise, tensor access violation occurs.
  • If the score values of proposal [i] and proposal [j] are the same and i is greater than j, proposal [j] is selected first.
  • For details about the alignment requirements of the operand address offset, see General Restrictions.

Example

  • API usage example
    1
    2
    // repeatTimes = 2. Sort the two Region Proposals.
    AscendC::RpSort16(dstLocal, dstLocal, 2);
    
  • Complete example
     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    #include "kernel_operator.h"
    
    class KernelVecProposal {
    public:
        __aicore__ inline KernelVecProposal() {}
        __aicore__ inline void Init(__gm__ uint8_t* src, __gm__ uint8_t* dstGm)
        {
            srcGlobal.SetGlobalBuffer((__gm__ half*)src);
            dstGlobal.SetGlobalBuffer((__gm__ half*)dstGm);
    
            pipe.InitBuffer(inQueueSrc, 1, srcDataSize * sizeof(half));
            pipe.InitBuffer(outQueueDst, 1, dstDataSize * sizeof(half));
        }
        __aicore__ inline void Process()
        {
            CopyIn();
            PreProcess();
            Compute();
            CopyOut();
        }
    
    private:
        __aicore__ inline void CopyIn()
        {
            AscendC::LocalTensor<half> srcLocal = inQueueSrc.AllocTensor<half>();
            AscendC::DataCopy(srcLocal, srcGlobal, srcDataSize);
            inQueueSrc.EnQue(srcLocal);
        }
        __aicore__ inline void PreProcess()
        {
            AscendC::LocalTensor<half> srcLocal = inQueueSrc.DeQue<half>();
            AscendC::LocalTensor<half> dstLocal = outQueueDst.AllocTensor<half>();
            AscendC::ProposalConcat(dstLocal, srcLocal, repeat, mode); // Proposals are sorted based on scores. Create a proposal with score data here. Note that the non-score data may be random values.
            outQueueDst.EnQue<half>(dstLocal);
            inQueueSrc.FreeTensor(srcLocal);
        }
        __aicore__ inline void Compute()
        {
            AscendC::LocalTensor<half> dstLocal = outQueueDst.DeQue<half>();
            AscendC::RpSort16(dstLocal, dstLocal, repeat);
            outQueueDst.EnQue<half>(dstLocal);
        }
        __aicore__ inline void CopyOut()
        {
            AscendC::LocalTensor<half> dstLocal = outQueueDst.DeQue<half>();
            AscendC::DataCopy(dstGlobal, dstLocal, dstDataSize);
            outQueueDst.FreeTensor(dstLocal);
        }
    
    private:
        AscendC::TPipe pipe;
        AscendC::TQue<AscendC::QuePosition::VECIN, 1> inQueueSrc;
        AscendC::TQue<AscendC::QuePosition::VECOUT, 1> outQueueDst;
        AscendC::GlobalTensor<half> srcGlobal, dstGlobal;
        int srcDataSize = 32;
        int dstDataSize = 256;
        int repeat = srcDataSize / 16;
        int mode = 4;
    };
    
    extern "C" __global__ __aicore__ void vec_proposal_kernel(__gm__ uint8_t* src, __gm__ uint8_t* dstGm)
    {
        KernelVecProposal op;
        op.Init(src, dstGm);
        op.Process();
    }
    
    Result example:
    Input (src_gm):
    [ -1.624 -42.3   -54.12   91.25  -99.4    36.72   67.44  -66.3   -52.53
       3.377 -62.47  -15.85  -31.47    3.143  58.47  -83.75 21.58   63.47    
       7.234  35.16  -39.72   37.8    73.06  -98.7    44.1 -77.2    67.2    
       19.62  -87.9   -14.875  15.86  -77.75]
    Output (dst_gm):
    [  0.      0.      0.      0.     91.25    0.      0.      0.      0.
       0.      0.      0.     67.44    0.      0.      0.      0.      0.
       0.      0.     58.47    0.      0.      0.      0.      0.      0.
       0.     36.72    0.      0.      0.      0.      0.      0.      0.
       3.377   0.      0.      0.      0.      0.      0.      0.      3.143
       0.      0.      0.      0.      0.      0.      0.     -1.624   0.
       0.      0.      0.      0.      0.      0.    -15.85    0.      0.
       0.      0.      0.      0.      0.    -31.47    0.      0.      0.
       0.      0.      0.      0.    -42.3     0.      0.      0.      0.
       0.      0.      0.    -52.53    0.      0.      0.      0.      0.
       0.      0.    -54.12    0.      0.      0.      0.      0.      0.
       0.    -62.47    0.      0.      0.      0.      0.      0.      0.
     -66.3     0.      0.      0.      0.      0.      0.      0.    -83.75
       0.      0.      0.      0.      0.      0.      0.    -99.4     0.
       0.      0.      0.      0.      0.      0.     73.06    0.      0.      
       0.      0.      0.      0.      0.     67.2     0.      0.      0.      
       0.      0.      0.      0.     63.47    0.      0.      0.      0.      
       0.      0.      0.     44.1     0.      0.      0.      0.      0.      
       0.      0.     37.8     0.      0.      0.      0.      0.      0.      
       0.     35.16    0.      0.      0.      0.      0.      0.      0.     
      21.58    0.      0.      0.      0.      0.      0.      0.     19.62    
       0.      0.      0.      0.      0.      0.      0.     15.86    0.      
       0.      0.      0.      0.      0.      0.      7.234   0.      0.      
       0.      0.      0.      0.      0.    -14.875   0.      0.      0.      
       0.      0.      0.      0.    -39.72    0.      0.      0.      0.      
       0.      0.      0.    -77.2     0.      0.      0.      0.      0.      
       0.      0.    -77.75    0.      0.      0.      0.      0.      0.      
       0.    -87.9     0.      0.      0.      0.      0.      0.      0.    
     -98.7     0.      0.      0.   ]