pip install tenacity datasets lm-eval
export HF_ENDPOINT=https://hf-mirror.com lm_eval --model vllm --model_args pretrained={模型路径}/Qwen2.5-7B-Instruct,max_model_len=4096,block_size=4,tensor_parallel_size=1 --tasks gsm8k --batch_size 8
python -m vllm.entrypoints.openai.api_server --model=/{模型路径}/Qwen2.5-7B-Instruct --enforce-eager -tp 4 --port 8000 --block-size=128
lm_eval --model local-completions --model_args "model=/{模型路径}/Qwen2.5-7B-Instruct,base_url=http://127.0.0.1:8000/v1/completions,tokenized_requests=False, max_tokens=256" --tasks gsm8k --output_path ./
精度测试结果如下所示:
1 2 3 4 | |Tasks|Version| Filter |n-shot| Metric | |Value | |Stderr| |-----|------:|----------------|-----:|-----------|---|-----:|---|-----:| |gsm8k| 3|flexible-extract| 5|exact_match|↑ |0.2798|± |0.0124| | | |strict-match | 5|exact_match|↑ |0.2259|± |0.0115| |