lora_adapter.json文件配置方式已日落,新的配置方式是在MindIE Service的config.json文件中添加LoraModules字段开启Multi LoRA特性,详细操作步骤如下所示。
本章节以LLaMA3.1 70B模型为例,简单介绍Multi LoRA如何使用。
{ "BackendConfig": { "backendName" : "mindieservice_llm_engine", "modelInstanceNumber" : 1, "npuDeviceIds" : [[0,1,2,3,4,5,6,7]], "tokenizerProcessNumber" : 8, "multiNodesInferEnabled": false, "multiNodesInferPort": 1120, "interNodeTLSEnabled": true, "interNodeTlsCaPath": "security/grpc/ca/", "interNodeTlsCaFiles": ["ca.pem"], "interNodeTlsCert": "security/grpc/certs/server.pem", "interNodeTlsPk": "security/grpc/keys/server.key.pem", "interNodeTlsPkPwd": "security/grpc/pass/mindie_server_key_pwd.txt", "interNodeTlsCrlPath" : "security/grpc/certs/", "interNodeTlsCrlfiles" : ["server_crl.pem"], "interNodeKmcKsfMaster": "tools/pmt/master/ksfa", "interNodeKmcKsfStandby": "tools/pmt/standby/ksfb", "ModelDeployConfig": { "maxSeqLen" : 2560, "maxInputTokenLen" : 2048, "truncation" : false, "ModelConfig" : [ { "modelInstanceType": "Standard", "modelName" : "llama3.1-70b", "modelWeightPath" : "/data/weights/llama3.1-70b-safetensors", "worldSize" : 8, "cpuMemSize" : 5, "npuMemSize" : -1, "backendType": "atb", "trustRemoteCode": false } ], "LoraModules" :[{ "name" : "adapter1", "path" : "/data/lora_model_weights/Meta-Llama-3.1-70B-Chat-Uncensored", "baseModelName" : "llama3.1-70b" }] }, } }
{ "alpha_pattern": {}, "auto_mapping": null, "base_model_name_or_path": "/data/weights/llama3.1-70b-safetensors", "bias": "none", "fan_in_fan_out": null, "inference_mode": true, "init_lora_weights": true, "layer_replication": null, "layers_pattern": null, "layers_to_transform": null, "loftq_config": {}, "lora_alpha": 16, "lora_dropout": 0.05, "megatron_config": null, "megatron_core": "megatron.core", "modules_to_save": null, "peft_type": "LORA", "r":8, "rank_pattern": {}, "revision":null, "target_modules": [ "k_proj", "q_proj", "gate_proj", "v_proj", "o_proj", "up_proj", "down_proj" ], "task_type": "CAUSAL_LM", "use_dora": false, "use_rslora": false }
cd {MindIE安装目录}/latest/mindie-service/ vi conf/config.json ./bin/mindieservice_daemon
其中"model"参数可以设置为基础模型名称(config.json配置文件中"ModelConfig"字段下的"modelName"参数的值)或lora ID(config.json配置文件中"LoraModules"字段下"name"参数的值)。当"model"参数为基础模型名称时,不使用Lora权重进行推理。当"model"参数为lora ID时,启用基础模型权重和指定的Lora权重进行推理。
curl https://127.0.0.1:1025/generate \ -H "Content-Type: application/json" \ --cacert ca.pem --cert client.pem --key client.key.pem \ -X POST \ -d '{ "model": "${基础模型名称}", "prompt": "Taxation in Puerto Rico -- The Commonwealth government has its own tax laws and Puerto Ricans are also required to pay some US federal taxes, although most residents do not have to pay the federal personal income tax. In 2009, Puerto Rico paid $3.742 billion into the US Treasury. Residents of Puerto Rico pay into Social Security, and are thus eligible for Social Security benefits upon retirement. However, they are excluded from the Supplemental Security Income.\nQuestion: is federal income tax the same as social security?\nAnswer:", "max_tokens": 20, "temperature": 0 }' curl https://127.0.0.1:1025/generate \ -H "Content-Type: application/json" \ --cacert ca.pem --cert client.pem --key client.key.pem \ -X POST \ -d '{ "model": "adapter1", "prompt": "Taxation in Puerto Rico -- The Commonwealth government has its own tax laws and Puerto Ricans are also required to pay some US federal taxes, although most residents do not have to pay the federal personal income tax. In 2009, Puerto Rico paid $3.742 billion into the US Treasury. Residents of Puerto Rico pay into Social Security, and are thus eligible for Social Security benefits upon retirement. However, they are excluded from the Supplemental Security Income.\nQuestion: is federal income tax the same as social security?\nAnswer:", "max_tokens": 20, "temperature": 0 }'