使用兼容Triton接口
- Token推理接口:
curl -H "Accept: application/json" -H "Content-type: application/json" --cacert ca.pem --cert client.pem --key client.key.pem -X POST -d '{ "id": "42", "inputs": [{ "name": "input0", "shape": [ 1, 10 ], "datatype": "UINT32", "data": [ 396, 319, 13996, 29877, 29901, 29907, 3333, 20718, 316, 23924 ], "parameters": { "temperature": 0.5, "top_k": 10, "top_p": 0.95, "do_sample": true, "seed": null, "repetition_penalty": 1.03, "max_new_tokens": 512 } }], "outputs": [{ "name": "output0" }] }' https://127.0.0.1:1025/v2/models/llama_65b/infer - 文本推理接口:
curl -H "Accept: application/json" -H "Content-type: application/json" --cacert ca.pem --cert client.pem --key client.key.pem -X POST -d '{ "id":"a123", "text_input": "My name is Olivier and I", "parameters": { "details": true, "do_sample": true, "max_new_tokens":200, "repetition_penalty": 1.1, "seed": 123, "temperature": 1, "top_k": 2147483647, "top_p": 0.99, "batch_size":100 } }' https://127.0.0.1:1025/v2/models/llama_65b/generate
- 流式推理接口:
curl -H "Accept: application/json" -H "Content-type: application/json" --cacert ca.pem --cert client.pem --key client.key.pem -X POST -d '{ "id":"a123", "text_input": "My name is Olivier and I", "parameters": { "details": true, "do_sample": true, "max_new_tokens":200, "repetition_penalty": 1.1, "seed": 123, "temperature": 1, "top_k": 2147483647, "top_p": 0.99, "batch_size":100 } }' https://127.0.0.1:1025/v2/models/llama_65b/generate_stream
其他接口请参见兼容OpenAI接口章节。
也可以使用MindIE Client的Python接口来进行推理,例如文本推理。
首先需要用户创建一个MindIE Client,将该文件命名为utils.py,后续可持续使用该方法。
import argparse
from mindieclient.python.httpclient import MindIEHTTPClient
def create_client(request_count=1):
# get argument
parser = argparse.ArgumentParser()
parser.add_argument(
"-u",
"--url",
required=False,
default="https://127.0.0.1:1025",
help="MindIE-Server URL.",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
required=False,
default=True,
help="Enable detailed information output.",
)
parser.add_argument(
"-s",
"--ssl",
action="store_true",
required=False,
default=False,
help="Enable encrypted link with https",
)
parser.add_argument(
"-ca",
"--ca_certs",
required=False,
default="ca.pem",
help="Provide https ca certificate.",
)
parser.add_argument(
"-key",
"--key_file",
required=False,
default="client.key.pem",
help="Provide https client certificate.",
)
parser.add_argument(
"-cert",
"--cert_file",
required=False,
default="client.pem",
help="Provide https client keyfile.",
)
args = parser.parse_args()
# create client
try:
if args.ssl:
ssl_options = {}
if args.ca_certs is not None:
ssl_options["ca_certs"] = args.ca_certs
if args.key_file is not None:
ssl_options["keyfile"] = args.key_file
if args.cert_file is not None:
ssl_options["certfile"] = args.cert_file
mindie_client = MindIEHTTPClient(
url=args.url,
verbose=args.verbose,
enable_ssl=True,
ssl_options=ssl_options,
concurrency=request_count,
)
else:
mindie_client = MindIEHTTPClient(
url=args.url, verbose=args.verbose, concurrency=request_count
)
except Exception as e:
raise e
return mindie_client之后创建文件,调用上述create_client方法,即可调用文本推理接口。
from utils import create_client
if __name__ == "__main__":
# get argument and create client
try:
mindie_client = create_client()
except Exception as e:
print("Client Creation falied!")
sys.exit(1)
# create input
prompt = "My name is Olivier and I"
model_name = "llama_65b"
parameters = {
"do_sample": True,
"temperature": 0.5,
"top_k": 10,
"top_p": 0.9,
"truncate": 5,
"typical_p": 0.9,
"seed": 1,
"repetition_penalty": 1,
"watermark": True,
"details": True,
}
# apply model inference
result = mindie_client.generate(
model_name,
prompt,
request_id="1",
parameters=parameters,
)
print(result.get_response())
其他MindIE Client接口请参见class MindIEHTTPClient章节。
父主题: 使用接口说明