同步推理
如创建样例代码test_infer.py(需要和创建客户端中创建客户端的代码样例utils.py在同一个目录下),并根据实际情况修改model_name,然后执行python test_infer.py命令运行该样例。
import sys
import numpy as np
from mindieclient.python.httpclient import Input, RequestedOutput
from utils import create_client
if __name__ == "__main__":
# get argument and create client
try:
mindie_client = create_client()
except Exception as e:
print("Client Creation falied!")
sys.exit(1)
# create input and requested output
inputs = []
outputs = []
input_data = np.arange(start=0, stop=16, dtype=np.uint32)
input_data = np.expand_dims(input_data, axis=0)
inputs.append(Input("INPUT0", [1, 16], "UINT32"))
inputs[0].initialize_data(input_data)
outputs.append(RequestedOutput("OUTPUT0"))
# apply model inference
model_name = "llama_65b" # 需要和服务端配置的modelName保持一致
results = mindie_client.infer(
model_name,
inputs,
outputs=outputs,
)
print(results.get_response())
output_data = results.retrieve_output_name_to_numpy("OUTPUT0")
print("input_data: %s" % np.array2string(input_data))
print("output_data: %s" % np.array2string(output_data))
父主题: 样例代码