Example of Converting the Checkpoint File Format (Torch)
For PyTorch users, the checkpoint file needs to be used for inference after foundation model training is complete. The following example describes how to convert the checkpoint file saved by MindIO ACP into a file in the native format of Torch.
- load_dir: Replace it with the actual directory that stores the checkpoint file.
- new_dir: Replace it with the new directory after checkpoint conversion. You are advised to set it to an empty directory.
- iteration: Used to convert all checkpoint files in the current iteration . This parameter is concatenated with load_dir.
# Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
import os
import mindio_acp
def main():
load_dir = "" # Replace with the actual checkpoint directory path
new_dir = "" # Replace with the actual new directory path
iteration = 2000 # Replace with the actual iteration number
directory = 'iter_{:07d}'.format(iteration)
common_path = os.path.join(load_dir, directory)
if not os.path.exists(common_path):
print(f"Source directory {common_path} does not exist.")
return
if not os.path.exists(new_dir):
os.makedirs(new_dir)
for root, _, files in os.walk(common_path):
# Compute the relative path and target directory
relative_path = os.path.relpath(root, common_path)
target_dir = os.path.join(new_dir, relative_path)
# Create directories in the target directory
if not os.path.exists(target_dir):
os.makedirs(target_dir)
# Convert all files in the current directory
for file in files:
src_file = os.path.join(root, file)
dst_file = os.path.join(target_dir, file)
res = mindio_acp.convert(src_file, dst_file)
print(f"Convert {src_file} to {dst_file}, result: {res}")
if __name__ == '__main__':
main()
Parent topic: Usage Guidance