HDK、CANN、PTA、ATB Models请参考模型仓readme。
需提前安装cmake和gcc:cmake版本>=3.18;gcc版本需支持c++17。
apt update apt install cmake apt install gcc g++
git clone https://github.com/triton-inference-server/server.git triton_server
cd triton_server git checkout r24.02
#!/bin/bash CACHE_DIR=/home/xxx/triton_server/build INCREMENTAL_COMPILATION="OFF" build_type="Release" while getopts "de" opt; do case ${opt} in d) build_type="Debug" ;; e) INCREMENTAL_COMPILATION="ON" ;; \?) echo "Invalid option: -$OPTARG" >&2 exit 1 ;; esac done mkdir -p $CACHE_DIR && cd $CACHE_DIR if [ ${INCREMENTAL_COMPILATION} == "OFF" ]; then rm -rf * fi cmake -DCMAKE_BUILD_TYPE=$build_type -DCMAKE_INSTALL_PREFIX:PATH=/opt/tritonserver \ -DTRITON_THIRD_PARTY_REPO_TAG=r24.02 \ -DTRITON_COMMON_REPO_TAG=r24.02 \ -DTRITON_CORE_REPO_TAG=r24.02 \ -DTRITON_BACKEND_REPO_TAG=r24.02 \ -DTRITON_VERSION=1.0.0 \ -DTRITON_ENABLE_METRICS_GPU=OFF \ -DTRITON_ENABLE_GPU=OFF \ -DTRITON_CORE_HEADERS_ONLY=OFF \ .. make install -j$(nproc)
apt-get install rapidjson-dev # rapidjson apt-get install libboost-dev # boost apt-get install libre2-dev # Re2 apt-get install libb64-dev # b64 apt-get install libnuma-dev # numa apt-get install patchelf # patchelf
# 注:git clone前可以将curl的postBuffer设大,笔者配置为20G,按需分配,运行如下指令: # git config --global http.postBuffer 20971520000 cd /home/xxx/triton_server/build/_deps/repo-third-party-build/grpc-repo/src git clone --branch v1.48.0 https://github.com/grpc/grpc.git // 使用哪个版本需要看triton_server/build/_deps/repo-third-party-src/CMakeLists.txt中grpc的git tag cd grpc git submodule update --init --recursive // 下载git子仓库
cd /home/xxx/triton_server/build/_deps/repo-third-party-build/grpc-repo/src/grpc/third_part git clone xxx // 上一步没有git clone下来的子仓 cd .. git submodule init git submodule update
cd /home/xxx/triton_server/build/_deps/repo-third-party-src/ vim CMakeLists.txt
ExternalProject_Add(grpc-repo PREFIX grpc-repo # GIT_REPOSITORY "https://gitee.com/Gongen/grpc.git" # GIT_TAG "v1.48.0" SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/grpc-repo/src/grpc" EXCLUDE_FROM_ALL ON CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" TEST_COMMAND "" PATCH_COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/tools/install_src.py --src <SOURCE_DIR> ${INSTALL_SRC_DEST_ARG} --dest-basename=grpc_1.48.0 )
wget https://boostorg.jfrog.io/artifactory/main/release/1.78.0/source/boost_1_78_0.tar.gz --no-check-certificate tar xvf boost_1_78_0.tar.gz cd boost_1_78_0 ./bootstrap.sh --with-python=/usr/local/bin/python3 ./b2 install
pip install nvidia-pyindex pip install tritonclient[all]
在 /opt/tritonserver/backends 目录下创建 mindie 目录,将 libtriton_mindie.so 放在该目录下。
export MINDIE_LLM_HOME_PATH=${mindie_dir}/latest/mindie-llm #mindie-llm安装路径 export TRITON_HOME_PATH=/opt/tritonserver #tritonserver安装路径
cd ${working_dir}/Triton_MindIE-LLM_Backend bash build.sh