torch_npu.profiler.profile

接口原型

1
torch_npu.profiler.profile(activities=None, schedule=None, on_trace_ready=None, record_shapes=False, profile_memory=False, with_stack=False, with_modules, with_flops=False, experimental_config=None)

功能描述

提供PyTorch训练过程中的性能数据采集功能。

参数说明

支持的型号

调用示例

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import torch
import torch_npu

...

experimental_config = torch_npu.profiler._ExperimentalConfig(
	export_type=[
		torch_npu.profiler.ExportType.Text,
		torch_npu.profiler.ExportType.Db
		],
	profiler_level=torch_npu.profiler.ProfilerLevel.Level0,
	msprof_tx=False,
	aic_metrics=torch_npu.profiler.AiCMetrics.AiCoreNone,
	l2_cache=False,
	op_attr=False,
	data_simplification=False,
	record_op_args=False,
	gc_detect_threshold=None
)

with torch_npu.profiler.profile(
	activities=[
		torch_npu.profiler.ProfilerActivity.CPU,
		torch_npu.profiler.ProfilerActivity.NPU
		],
	schedule=torch_npu.profiler.schedule(wait=0, warmup=0, active=1, repeat=1, skip_first=1),
	on_trace_ready=torch_npu.profiler.tensorboard_trace_handler("./result"),
	record_shapes=False,
	profile_memory=False,
	with_stack=False,
	with_modules=False,
	with_flops=False,
	experimental_config=experimental_config) as prof:
	for step in range(steps): # 训练函数
		train_one_step() # 训练函数
		prof.step()