定义
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | struct PagedAttentionParam { int32_t headNum = 0; float qkScale = 1.0; int32_t kvHeadNum = 0; enum MaskType : int { UNDEFINED = 0, MASK_TYPE_NORM, MASK_TYPE_ALIBI, MASK_TYPE_SPEC, MASK_TYPE_MASK_FREE }; MaskType maskType = UNDEFINED; bool batchRunStatusEnable = false; enum QuantType : int { TYPE_QUANT_UNDEFINED = 0, TYPE_QUANT_UNQUANT = 0, TYPE_DEQUANT_FUSION, TYPE_QUANT_QKV_OFFLINE, TYPE_QUANT_QKV_ONLINE }; QuantType quantType = TYPE_QUANT_UNQUANT; aclDataType outDataType = ACL_DT_UNDEFINED; bool hasQuantOffset = false; enum CompressType : int { COMPRESS_TYPE_UNDEFINED = 0, COMPRESS_TYPE_KVHEAD, COMPRESS_TYPE_KVHEAD_ROPE, COMPRESS_TYPE_MAX }; CompressType compressType = COMPRESS_TYPE_UNDEFINED; enum CalcType : int { CALC_TYPE_UNDEFINED = 0, CALC_TYPE_SPEC }; CalcType calcType = CALC_TYPE_UNDEFINED; enum ScaleType : int { SCALE_TYPE_TOR = 0, SCALE_TYPE_LOGN, SCALE_TYPE_MAX }; ScaleType scaleType = SCALE_TYPE_TOR; InputLayout inputLayout = TYPE_BSND; uint32_t mlaVHeadSize = 0; uint8_t rsv[68] = {0}; }; |