下载原始代码仓：

git clone https://gitee.com/ascend/AscendSpeed
cd AscendSpeed 
mkdir logs
mkdir ckpt

准备环境：

conda create -n (conda环境名称) python=3.7
conda activate (conda环境名称)

下载并安装torch及torch_npu：

# 安装torch：
pip install torch==1.11 -i https://pypi.tuna.tsinghua.edu.cn/simple
# 安装torch_npu:
wget https://gitee.com/ascend/pytorch/releases/download/v5.0.rc2-pytorch1.11.0/torch_npu-1.11.0.post1-cp37-cp37m-linux_aarch64.whl (ARM)
wget https://gitee.com/ascend/pytorch/releases/download/v5.0.rc2-pytorch1.11.0/torch_npu-1.11.0.post1-cp37-cp37m-linux_x86_64.whl (X86)
pip install torch_npu-1.11.0.post1-cp37-cp37m-linux_XXXXXX.whl

安装deepspeed与deepspeed_npu：

pip install deepspeed==0.9.2
git clone https://gitee.com/ascend/DeepSpeed.git -b v0.9.2 deepspeed_npu
cd deepspeed_npu
pip3 install -e .

安装其他所需要的依赖：

pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

获取数据集：

wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt
wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json
# 执行preprocess_data.py
python tools/preprocess_data.py \
            --input ./oscar-1GB.jsonl \
            --output-prefix my-gpt2 --vocab ./gpt2-vocab.json \
            --dataset-impl mmap \
            --tokenizer-type GPT2BPETokenizer \
            --merge-file .//gpt2-merges.txt \
            --append-eod --workes 9 \
# 移动到文件夹
mkdir -p dataset/oscar_data_1g
mv gpt2-vocab.json gpt2-merges.txt my-gpt2_text_* dataset/oscar_data_1g/
mkdir -p dataset/bloom_vocab/vocab_file
# 下载数据到vocab_file
wget https://huggingface.co/bigscience-catalogue-data-dev/byte-level-bpe-tokenizer-no-norm-250k-whitespace-and-eos-regex-alpha-v3-dedup-lines-articles/tree/main

启动任务：
```
sh examples/bloom/pretrain_bloom_7b1.sh
```

运行仓上模型