定义

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
struct PagedAttentionParam {
    int32_t headNum = 0;
    float qkScale = 1.0;
    int32_t kvHeadNum = 0;
    enum MaskType : int {
        UNDEFINED = 0,          
        MASK_TYPE_NORM,         
        MASK_TYPE_ALIBI,      
        MASK_TYPE_SPEC          
    };
    MaskType maskType = UNDEFINED;
    bool batchRunStatusEnable = false;
    enum QuantType : int {
        TYPE_QUANT_UNDEFINED = 0,
        TYPE_QUANT_UNQUANT = 0, 
        TYPE_DEQUANT_FUSION,       
        TYPE_QUANT_QKV_OFFLINE,  
        TYPE_QUANT_QKV_ONLINE   
    };
    QuantType quantType = TYPE_QUANT_UNQUANT;
    aclDataType outDataType = ACL_DT_UNDEFINED;
    bool hasQuantOffset = false;
    enum CompressType : int {
        COMPRESS_TYPE_UNDEFINED = 0,  
        COMPRESS_TYPE_KVHEAD,          
        COMPRESS_TYPE_KVHEAD_ROPE,     
        COMPRESS_TYPE_MAX              
    };
    CompressType compressType = COMPRESS_TYPE_UNDEFINED;
    enum CalcType : int {
        CALC_TYPE_UNDEFINED = 0,  
        CALC_TYPE_SPEC           
    };
    CalcType calcType = CALC_TYPE_UNDEFINED;
    enum ScaleType : int {
        SCALE_TYPE_TOR = 0,      
        SCALE_TYPE_LOGN,        
        SCALE_TYPE_MAX           
    };
    ScaleType scaleType = SCALE_TYPE_TOR;
    InputLayout inputLayout = TYPE_BSND;
    uint32_t mlaVHeadSize = 0;
    uint8_t rsv[68] = {0};
};