First Compilation (Instrumentation Compilation)

# Set the environment variable (/path/to/profile is the default path for storing performance data. You can change it by setting LLVM_PROFILE_FILE).
export CMAKE_C_FLAGS="-flto=thin -fuse-ld=lld -fprofile-generate=/path/to/profile"
export CMAKE_CXX_FLAGS="-flto=thin -fuse-ld=lld -fprofile-generate=/path/to/profile"
export CC=clang
export CXX=clang++
export USE_XNNPACK=0

# Compile torch.
cd pytorch-2.1.0
git clean -dfx
python3 setup.py bdist_wheel

# Compile torch_npu (you need to install the newly compiled torch first).
cd torch_npu
git clean -dfx
bash ci/build.sh --python=3.8 --enable_lto --enable_pgo=1