昇腾社区首页
中文
注册

定义

struct MultiLatentAttentionParam {
    int32_t headNum = 0;
    float qkScale = 1.0;
    int32_t kvHeadNum = 0;
    enum MaskType : int {
        UNDEFINED = 0,      
        MASK_TYPE_SPEC,      
        MASK_TYPE_MASK_FREE, 
        MASK_TYPE_CAUSAL_MASK
    };
    MaskType maskType = UNDEFINED;
    enum CalcType : int {
        CALC_TYPE_UNDEFINED = 0,
        CALC_TYPE_SPEC,          
        CALC_TYPE_RING,
        CALC_TYPE_SPEC_AND_RING, 
        CALC_TYPE_PREFILL,        
    };
    CalcType calcType = CALC_TYPE_UNDEFINED;
    enum CacheMode : uint8_t {
        KVCACHE = 0,  
        KROPE_CTKV,   
        INT8_NZCACHE, 
        NZCACHE,     
    };
    CacheMode cacheMode = KVCACHE;
    uint8_t rsv[43] = {0};
};