使用样例

限制与约束

操作步骤

lora_adapter.json文件配置方式已日落,新的配置方式是在MindIE Service的config.json文件中添加LoraModules字段开启Multi LoRA特性,详细操作步骤如下所示。

本章节以LLaMA3.1 70B模型为例,简单介绍Multi LoRA如何使用。

  1. 在MindIE Service的config.json文件添加LoraModules字段(以下加粗部分),LoraModules字段解释请参见LoraModules参数说明,config.json文件部分参数如下所示。

    {    
        "BackendConfig": {
            "backendName" : "mindieservice_llm_engine",
            "modelInstanceNumber" : 1,
            "npuDeviceIds" : [[0,1,2,3,4,5,6,7]],
            "tokenizerProcessNumber" : 8,
            "multiNodesInferEnabled": false,
            "multiNodesInferPort": 1120,
            "interNodeTLSEnabled": true,
            "interNodeTlsCaPath": "security/grpc/ca/",
            "interNodeTlsCaFiles": ["ca.pem"],
            "interNodeTlsCert": "security/grpc/certs/server.pem",
            "interNodeTlsPk": "security/grpc/keys/server.key.pem",
            "interNodeTlsPkPwd": "security/grpc/pass/mindie_server_key_pwd.txt",
            "interNodeTlsCrlPath" : "security/grpc/certs/",
            "interNodeTlsCrlfiles" : ["server_crl.pem"],
            "interNodeKmcKsfMaster": "tools/pmt/master/ksfa",
            "interNodeKmcKsfStandby": "tools/pmt/standby/ksfb",
            "ModelDeployConfig":
            {
                "maxSeqLen" : 2560,
                "maxInputTokenLen" : 2048,
                "truncation" : false,
                "ModelConfig" : [
                    {
                        "modelInstanceType": "Standard",
                        "modelName" : "llama3.1-70b",
                        "modelWeightPath" : "/data/weights/llama3.1-70b-safetensors",
                        "worldSize" : 8,
                        "cpuMemSize" : 5,
                        "npuMemSize" : -1,
                        "backendType": "atb",
                        "trustRemoteCode": false
                    }
                ],
                "LoraModules" :[{
                "name" : "adapter1",
                "path" : "/data/lora_model_weights/Meta-Llama-3.1-70B-Chat-Uncensored",
                "baseModelName" : "llama3.1-70b"
                }]
            }, 
        }
    }

  2. 在Lora权重路径/data/lora_model_weights/Meta-Llama-3.1-70B-Chat-Uncensored下的adapter_config.json文件中配置基础权重的路径base_model_name_or_path(与config.json配置文件中"ModelConfig"字段下的 "modelWeightPath" 参数保持一致)。

    {
      "alpha_pattern": {},
      "auto_mapping": null,
      "base_model_name_or_path": "/data/weights/llama3.1-70b-safetensors",
      "bias": "none",
      "fan_in_fan_out": null,
      "inference_mode": true,
      "init_lora_weights": true,
      "layer_replication": null,
      "layers_pattern": null,
      "layers_to_transform": null,
      "loftq_config": {},
      "lora_alpha": 16,
      "lora_dropout": 0.05,
      "megatron_config": null,
      "megatron_core": "megatron.core",
      "modules_to_save": null,
      "peft_type": "LORA",
      "r":8,
      "rank_pattern": {}, 
      "revision":null,
      "target_modules": [
        "k_proj",
        "q_proj",
        "gate_proj",
        "v_proj",
        "o_proj",
        "up_proj",
        "down_proj"
      ],
      "task_type": "CAUSAL_LM",
      "use_dora": false,
      "use_rslora": false
    }

  3. 配置服务化参数并启动,服务化参数说明请参见配置参数说明章节。

    cd {MindIE安装目录}/latest/mindie-service/
    vi conf/config.json
    ./bin/mindieservice_daemon

  4. 使用以下指令发送请求。

    其中"model"参数可以设置为基础模型名称(config.json配置文件中"ModelConfig"字段下的"modelName"参数的值)或lora ID(config.json配置文件中"LoraModules"字段下"name"参数的值)。当"model"参数为基础模型名称时,不使用Lora权重进行推理。当"model"参数为lora ID时,启用基础模型权重和指定的Lora权重进行推理。

    curl https://127.0.0.1:1025/generate \
    -H "Content-Type: application/json" \
    --cacert ca.pem --cert client.pem  --key client.key.pem \
    -X POST \
    -d '{
    "model": "${基础模型名称}",
    "prompt": "Taxation in Puerto Rico -- The Commonwealth government has its own tax laws and Puerto Ricans are also required to pay some US federal taxes, although most residents do not have to pay the federal personal income tax. In 2009, Puerto Rico paid $3.742 billion into the US Treasury. Residents of Puerto Rico pay into Social Security, and are thus eligible for Social Security benefits upon retirement. However, they are excluded from the Supplemental Security Income.\nQuestion: is federal income tax the same as social security?\nAnswer:",
    "max_tokens": 20,
    "temperature": 0
    }'
    
    curl https://127.0.0.1:1025/generate \
    -H "Content-Type: application/json" \
    --cacert ca.pem --cert client.pem  --key client.key.pem \
    -X POST \
    -d '{
    "model": "adapter1",
    "prompt": "Taxation in Puerto Rico -- The Commonwealth government has its own tax laws and Puerto Ricans are also required to pay some US federal taxes, although most residents do not have to pay the federal personal income tax. In 2009, Puerto Rico paid $3.742 billion into the US Treasury. Residents of Puerto Rico pay into Social Security, and are thus eligible for Social Security benefits upon retirement. However, they are excluded from the Supplemental Security Income.\nQuestion: is federal income tax the same as social security?\nAnswer:",
    "max_tokens": 20,
    "temperature": 0
    }'