定义

struct PagedAttentionParam {
    int32_t headNum = 0;
    float qkScale = 1.0;
    int32_t kvHeadNum = 0;
    enum MaskType : int {
        UNDEFINED = 0,          
        MASK_TYPE_NORM,         
        MASK_TYPE_ALIBI,      
        MASK_TYPE_SPEC          
    };
    MaskType maskType = UNDEFINED;
    bool batchRunStatusEnable = false;
    enum QuantType : int {
        TYPE_QUANT_UNDEFINED = 0, 
        TYPE_DEQUANT_FUSION,       
        TYPE_QUANT_QKV_OFFLINE,  
        TYPE_QUANT_QKV_ONLINE   
    };
    QuantType quantType = TYPE_QUANT_UNDEFINED;
    aclDataType outDataType = ACL_DT_UNDEFINED;
    bool hasQuantOffset = false;
    enum CompressType : int {
        COMPRESS_TYPE_UNDEFINED = 0,  
        COMPRESS_TYPE_KVHEAD,          
        COMPRESS_TYPE_KVHEAD_ROPE,     
        COMPRESS_TYPE_MAX              
    };
    CompressType compressType = COMPRESS_TYPE_UNDEFINED;
    enum CalcType : int {
        CALC_TYPE_UNDEFINED = 0,  
        CALC_TYPE_SPEC           
    };
    CalcType calcType = CALC_TYPE_UNDEFINED;
    enum ScaleType : int {
        SCALE_TYPE_TOR = 0,      
        SCALE_TYPE_LOGN,        
        SCALE_TYPE_MAX           
    };
    ScaleType scaleType = SCALE_TYPE_TOR;
    InputLayout inputLayout = TYPE_BSND;
};