First Compilation (Instrumentation Compilation)
# Set the environment variable (/path/to/profile is the default path for storing performance data. You can change it by setting LLVM_PROFILE_FILE). export CMAKE_C_FLAGS="-flto=thin -fuse-ld=lld -fprofile-generate=/path/to/profile" export CMAKE_CXX_FLAGS="-flto=thin -fuse-ld=lld -fprofile-generate=/path/to/profile" export CC=clang export CXX=clang++ export USE_XNNPACK=0 # Compile torch. cd pytorch-2.1.0 git clean -dfx python3 setup.py bdist_wheel # Compile torch_npu (you need to install the newly compiled torch first). cd torch_npu git clean -dfx bash ci/build.sh --python=3.8 --enable_lto --enable_pgo=1
Parent topic: Compilation Optimization of torch and torch_npu