模型准备完毕后,即可运行TEI框架(请确认已运行过昇腾环境必需组件的set_env.sh),以下提供一个启动TEI服务和发送请求的简单样例:
export PATH=$PATH:~/.cargo/bin/ # 设置TEI运行计算卡id号与模型后端 export TEI_NPU_DEVICE=0 # 按需设置计算卡id export TEI_NPU_BACKEND=atb # 按需选择mindietorch或atb # 本地模型权重路径或在Huggingface代码仓中的模型id model_path_embedding=/home/data/models/bge-large-zh-v1.5 model_path_reranker=/home/data/models/bge-reranker-large # 以下启动方式及参数名与原生TEI一致,请按需选择拉起Embedding或Reranker模型 # Embedding模型 text-embeddings-router --model-id $model_path_embedding --dtype float16 --pooling cls --max-concurrent-requests 2048 --max-batch-requests 2048 --max-batch-tokens 1100000 --max-client-batch-size 256 --port 12347 # Reranker模型 text-embeddings-router --model-id $model_path_reranker --dtype float16 --max-client-batch-size 192 --max-concurrent-requests 2048 --max-batch-tokens 163840 --max-batch-requests 128 --port 12347
export model_name_or_path=/path/to/your/gte/qwen/embedding/directory
# Embed接口 curl 127.0.0.1:12347/embed \ -X POST \ -d '{"inputs": ["What is Deep Learning?"]}' \ -H 'Content-Type: application/json' # Embed_all接口 curl 127.0.0.1:12347/embed_all \ -X POST \ -d '{"inputs": ["What is Deep Learning?"]}' \ -H 'Content-Type: application/json' # Rerank接口 curl 127.0.0.1:12347/rerank \ -X POST \ -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is a sub-filed of Machine Learning.", "Deep learning is a country."]}' \ -H 'Content-Type: application/json'