定义

struct SelfAttentionParam {
    enum CalcType : int {
        UNDEFINED = 0, 
        ENCODER,       
        DECODER,       
        PA_ENCODER     
    };
    enum KernelType : int {
        KERNELTYPE_DEFAULT = 0,   
        KERNELTYPE_HIGH_PRECISION 
    };
    enum ClampType : int {
        CLAMP_TYPE_UNDEFINED = 0, 
        CLAMP_TYPE_MIN_MAX        
    };
    enum MaskType : int {
        MASK_TYPE_UNDEFINED = 0,             
        MASK_TYPE_NORM,                       
        MASK_TYPE_ALIBI,                     
        MASK_TYPE_NORM_COMPRESS,            
        MASK_TYPE_ALIBI_COMPRESS,           
        MASK_TYPE_ALIBI_COMPRESS_SQRT,       
        MASK_TYPE_ALIBI_COMPRESS_LEFT_ALIGN  
    };
    enum KvCacheCfg :int {
        K_CACHE_V_CACHE = 0,
        K_BYPASS_V_BYPASS,  
    };
    enum ScaleType : int {
        SCALE_TYPE_TOR = 0,      
        SCALE_TYPE_LOGN,        
        SCALE_TYPE_MAX           
    };
    enum QuantType : int {
        TYPE_QUANT_UNDEFINED = 0, 
        TYPE_DEQUANT_FUSION,      
        TYPE_QUANT_QKV_OFFLINE, 
        TYPE_QUANT_QKV_ONLINE   
    };
    QuantType quantType = TYPE_QUANT_UNDEFINED;
    aclDataType outDataType = ACL_DT_UNDEFINED;
    int32_t headNum = 0;
    int32_t kvHeadNum = 0;
    float qScale = 1;
    float qkScale = 1;
    bool batchRunStatusEnable = false;
    uint32_t isTriuMask = 0;
    CalcType calcType = UNDEFINED;
    KernelType kernelType = KERNELTYPE_DEFAULT;
    ClampType clampType = CLAMP_TYPE_UNDEFINED;
    float clampMin = 0;
    float clampMax = 0;
    MaskType maskType = MASK_TYPE_UNDEFINED;
    KvCacheCfg kvcacheCfg = K_CACHE_V_CACHE;
    ScaleType scaleType = SCALE_TYPE_TOR;
    InputLayout inputLayout = TYPE_BSND;
};