struct PagedAttentionParam {
int32_t headNum = 0;
float qkScale = 1.0;
int32_t kvHeadNum = 0;
enum MaskType : int {
UNDEFINED = 0,
MASK_TYPE_NORM,
MASK_TYPE_ALIBI,
MASK_TYPE_SPEC
};
MaskType maskType = UNDEFINED;
bool batchRunStatusEnable = false;
enum QuantType : int {
TYPE_QUANT_UNDEFINED = 0,
TYPE_QUANT_UNQUANT = 0,
TYPE_DEQUANT_FUSION,
TYPE_QUANT_QKV_OFFLINE,
TYPE_QUANT_QKV_ONLINE
};
QuantType quantType = TYPE_QUANT_UNQUANT;
aclDataType outDataType = ACL_DT_UNDEFINED;
bool hasQuantOffset = false;
enum CompressType : int {
COMPRESS_TYPE_UNDEFINED = 0,
COMPRESS_TYPE_KVHEAD,
COMPRESS_TYPE_KVHEAD_ROPE,
COMPRESS_TYPE_MAX
};
CompressType compressType = COMPRESS_TYPE_UNDEFINED;
enum CalcType : int {
CALC_TYPE_UNDEFINED = 0,
CALC_TYPE_SPEC
};
CalcType calcType = CALC_TYPE_UNDEFINED;
enum ScaleType : int {
SCALE_TYPE_TOR = 0,
SCALE_TYPE_LOGN,
SCALE_TYPE_MAX
};
ScaleType scaleType = SCALE_TYPE_TOR;
InputLayout inputLayout = TYPE_BSND;
uint32_t mlaVHeadSize = 0;
uint8_t rsv[68] = {0};
};