struct SelfAttentionParam { enum CalcType : int { UNDEFINED = 0, ENCODER, DECODER, PA_ENCODER }; enum KernelType : int { KERNELTYPE_DEFAULT = 0, KERNELTYPE_HIGH_PRECISION }; enum ClampType : int { CLAMP_TYPE_UNDEFINED = 0, CLAMP_TYPE_MIN_MAX }; enum MaskType : int { MASK_TYPE_UNDEFINED = 0, MASK_TYPE_NORM, MASK_TYPE_ALIBI, MASK_TYPE_NORM_COMPRESS, MASK_TYPE_ALIBI_COMPRESS, MASK_TYPE_ALIBI_COMPRESS_SQRT, MASK_TYPE_ALIBI_COMPRESS_LEFT_ALIGN }; enum KvCacheCfg :int { K_CACHE_V_CACHE = 0, K_BYPASS_V_BYPASS, }; enum ScaleType : int { SCALE_TYPE_TOR = 0, SCALE_TYPE_LOGN, SCALE_TYPE_MAX }; enum QuantType : int { TYPE_QUANT_UNDEFINED = 0, TYPE_DEQUANT_FUSION, TYPE_QUANT_QKV_OFFLINE, TYPE_QUANT_QKV_ONLINE }; QuantType quantType = TYPE_QUANT_UNDEFINED; aclDataType outDataType = ACL_DT_UNDEFINED; int32_t headNum = 0; int32_t kvHeadNum = 0; float qScale = 1; float qkScale = 1; bool batchRunStatusEnable = false; uint32_t isTriuMask = 0; CalcType calcType = UNDEFINED; KernelType kernelType = KERNELTYPE_DEFAULT; ClampType clampType = CLAMP_TYPE_UNDEFINED; float clampMin = 0; float clampMax = 0; MaskType maskType = MASK_TYPE_UNDEFINED; KvCacheCfg kvcacheCfg = K_CACHE_V_CACHE; ScaleType scaleType = SCALE_TYPE_TOR; InputLayout inputLayout = TYPE_BSND; };