curl -H "Accept: application/json" -H "Content-type: application/json" --cacert ca.pem --cert client.pem --key client.key.pem -X POST -d '{ "id": "42", "inputs": [{ "name": "input0", "shape": [ 1, 10 ], "datatype": "UINT32", "data": [ 396, 319, 13996, 29877, 29901, 29907, 3333, 20718, 316, 23924 ] }], "outputs": [{ "name": "output0" }], "parameters": { "temperature": 0.5, "top_k": 10, "top_p": 0.95, "do_sample": true, "seed": null, "repetition_penalty": 1.03, "max_new_tokens": 20, "watermark": true } }' https://127.0.0.1:1025/v2/models/llama_65b/infer
curl -H "Accept: application/json" -H "Content-type: application/json" --cacert ca.pem --cert client.pem --key client.key.pem -X POST -d '{ "id":"a123", "text_input": "My name is Olivier and I", "parameters": { "details": true, "do_sample": true, "max_new_tokens":250, "repetition_penalty": 1.1, "seed": 123, "temperature": 1, "top_k": 10, "top_p": 0.99, "batch_size":100, "typical_p": 0.5, "watermark": false, "perf_stat": false } }' https://127.0.0.1:1025/v2/models/llama_65b/generate
curl -H "Accept: application/json" -H "Content-type: application/json" --cacert ca.pem --cert client.pem --key client.key.pem -X POST -d '{ "id":"a123", "text_input": "My name is Olivier and I", "parameters": { "details": true, "do_sample": true, "max_new_tokens":200, "repetition_penalty": 1.1, "seed": 123, "temperature": 1, "top_k": 10, "top_p": 0.99, "batch_size":100, "typical_p": 0.5, "watermark": false, "perf_stat": false } }' https://127.0.0.1:1025/v2/models/llama_65b/generate_stream
其他接口请参见兼容Triton接口章节。