MindSpore训练场景
MindSpore 昇腾NPU环境训练脚本样例
import mindspore as ms import numpy as np import mindspore mindspore.set_device("Ascend") import mindspore.mint as mint import mindspore.dataset as ds from mindspore import nn class Net(nn.Cell): def __init__(self): super(Net, self).__init__() self.fc = nn.Dense(2, 2) def construct(self, x): # 先经过全连接 y = self.fc(x) # 调用mint.add,把y跟自己相加 # 也可以根据需求改成 mint.add(x, x)或者mint.add(y, x) z = mint.add(y, y) return z def generator_net(): for _ in range(10): yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32) def forward_fn(data, label): logits = model(data) loss = loss_fn(logits, label) return loss, logits model = Net() optimizer = nn.Momentum(model.trainable_params(), 1, 0.9) loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True) grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True) # 3. Define function of one-step training def train_step(data, label): (loss, _), grads = grad_fn(data, label) optimizer(grads) return loss if __name__ == "__main__": step = 0 # Train Model for data, label in ds.GeneratorDataset(generator_net(), ["data", "label"]): train_step(data, label) print(f"train step {step}") step += 1 print("train finish")
MindSpore训练前配置检查代码样例
from msprobe.core.config_check import ConfigChecker ConfigChecker.apply_patches(fmk="mindspore") import mindspore as ms import numpy as np import mindspore mindspore.set_device("Ascend") import mindspore.mint as mint import mindspore.dataset as ds from mindspore import nn class Net(nn.Cell): def __init__(self): super(Net, self).__init__() self.fc = nn.Dense(2, 2) def construct(self, x): # 先经过全连接 y = self.fc(x) # 调用mint.add,把y跟自己相加 # 也可以根据需求改成 mint.add(x, x)或者mint.add(y, x) z = mint.add(y, y) return z def generator_net(): for _ in range(10): yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32) def forward_fn(data, label): logits = model(data) loss = loss_fn(logits, label) return loss, logits model = Net() optimizer = nn.Momentum(model.trainable_params(), 1, 0.9) loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True) grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True) # 3. Define function of one-step training def train_step(data, label): (loss, _), grads = grad_fn(data, label) optimizer(grads) return loss if __name__ == "__main__": step = 0 from msprobe.core.config_check import ConfigChecker ConfigChecker(model=model, output_zip_path="./config_check_pack.zip", fmk="mindspore") # Train Model for data, label in ds.GeneratorDataset(generator_net(), ["data", "label"]): train_step(data, label) print(f"train step {step}") step += 1 print("train finish")
MindSpore训练状态监控代码样例
import mindspore as ms import numpy as np import mindspore mindspore.set_device("Ascend") import mindspore.mint as mint import mindspore.dataset as ds from mindspore import nn from msprobe.mindspore import TrainerMon monitor = TrainerMon( config_file_path="./monitor_config.json", process_group=None, params_have_main_grad=True, # 权重是否使用main_grad,通常megatron为True,deepspeed为False。默认为True。 ) class Net(nn.Cell): def __init__(self): super(Net, self).__init__() self.fc = nn.Dense(2, 2) def construct(self, x): # 先经过全连接 y = self.fc(x) # 调用mint.add,把y跟自己相加 # 也可以根据需求改成 mint.add(x, x)或者mint.add(y, x) z = mint.add(y, y) return z def generator_net(): for _ in range(10): yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32) def forward_fn(data, label): logits = model(data) loss = loss_fn(logits, label) return loss, logits model = Net() optimizer = nn.Momentum(model.trainable_params(), 1, 0.9) # 挂载监控对象 monitor.set_monitor( model, grad_acc_steps=1, optimizer=optimizer, dp_group=None, tp_group=None) loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True) grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True) # 3. Define function of one-step training def train_step(data, label): (loss, _), grads = grad_fn(data, label) optimizer(grads) return loss if __name__ == "__main__": step = 0 # Train Model for data, label in ds.GeneratorDataset(generator_net(), ["data", "label"]): train_step(data, label) print(f"train step {step}") step += 1 print("train finish")
MindSpore精度数据采集代码样例
import mindspore as ms import numpy as np import mindspore mindspore.set_device("Ascend") import mindspore.mint as mint import mindspore.dataset as ds from mindspore import nn from msprobe.mindspore import PrecisionDebugger debugger = PrecisionDebugger(config_path="./config.json") class Net(nn.Cell): def __init__(self): super(Net, self).__init__() self.fc = nn.Dense(2, 2) def construct(self, x): # 先经过全连接 y = self.fc(x) # 调用mint.add,把y跟自己相加 # 也可以根据需求改成 mint.add(x, x)或者mint.add(y, x) z = mint.add(y, y) return z def generator_net(): for _ in range(10): yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32) def forward_fn(data, label): logits = model(data) loss = loss_fn(logits, label) return loss, logits model = Net() optimizer = nn.Momentum(model.trainable_params(), 1, 0.9) loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True) grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True) # 3. Define function of one-step training def train_step(data, label): (loss, _), grads = grad_fn(data, label) optimizer(grads) return loss if __name__ == "__main__": step = 0 # Train Model for data, label in ds.GeneratorDataset(generator_net(), ["data", "label"]): debugger.start(model) train_step(data, label) print(f"train step {step}") step += 1 debugger.stop() debugger.step() print("train finish")
MindSpore Profiler接口采集性能数据代码样例
import mindspore as ms import numpy as np import mindspore mindspore.set_device("Ascend") import mindspore.mint as mint import mindspore.dataset as ds from mindspore import nn from mindspore.profiler import ProfilerLevel, ProfilerActivity, AicoreMetrics class Net(nn.Cell): def __init__(self): super(Net, self).__init__() self.fc = nn.Dense(2, 2) def construct(self, x): # 先经过全连接 y = self.fc(x) # 调用mint.add,把y跟自己相加 # 也可以根据需求改成 mint.add(x, x)或者mint.add(y, x) z = mint.add(y, y) return z def generator_net(): for _ in range(10): yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32) def forward_fn(data, label): logits = model(data) loss = loss_fn(logits, label) return loss, logits model = Net() optimizer = nn.Momentum(model.trainable_params(), 1, 0.9) loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True) grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True) # 3. Define function of one-step training def train_step(data, label): (loss, _), grads = grad_fn(data, label) optimizer(grads) return loss if __name__ == "__main__": mindspore.set_context(mode=mindspore.PYNATIVE_MODE) mindspore.set_device("Ascend") # Init Profiler # pylint: disable=protected-access experimental_config = mindspore.profiler._ExperimentalConfig( profiler_level=ProfilerLevel.Level0, aic_metrics=AicoreMetrics.AiCoreNone, l2_cache=False, mstx=False, data_simplification=False ) step = 0 # Note that the Profiler should be initialized before model.train with mindspore.profiler.profile( activities=[ProfilerActivity.CPU, ProfilerActivity.NPU], schedule=mindspore.profiler.schedule( wait=0, warmup=0, active=1, repeat=1, skip_first=0 ), on_trace_ready=mindspore.profiler.tensorboard_trace_handler("./profiling_data"), profile_memory=False, experimental_config=experimental_config, ) as prof: # Train Model for data, label in ds.GeneratorDataset(generator_net(), ["data", "label"]): train_step(data, label) print(f"train step {step}") step += 1 prof.step() print("train finish")
父主题: 代码样例