定义
struct MultiLatentAttentionParam { int32_t headNum = 0; float qkScale = 1.0; int32_t kvHeadNum = 0; enum MaskType : int { UNDEFINED = 0, MASK_TYPE_SPEC, MASK_TYPE_MASK_FREE, MASK_TYPE_CAUSAL_MASK }; MaskType maskType = UNDEFINED; enum CalcType : int { CALC_TYPE_UNDEFINED = 0, CALC_TYPE_SPEC, CALC_TYPE_RING, CALC_TYPE_SPEC_AND_RING, CALC_TYPE_PREFILL, }; CalcType calcType = CALC_TYPE_UNDEFINED; enum CacheMode : uint8_t { KVCACHE = 0, KROPE_CTKV, INT8_NZCACHE, NZCACHE, }; CacheMode cacheMode = KVCACHE; uint8_t rsv[43] = {0}; };