Dynamo动/静态图展示
本节提供如下样例test_compile.py,样例包含parameter、buffer、user_input输入、scalar输入, 假设dynamic参数采用默认值。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | import os os.environ['TNG_LOG_LEVEL'] = '0' # 打开TorchAir的C++层日志 import torch import torch.nn as nn import logging torch._logging.set_logs(dynamo=logging.DEBUG,aot=logging.DEBUG,output_code=True,graph_code=True,recompiles=True) import torchair from torchair.configs.compiler_config import CompilerConfig from torchair.core.utils import logger, EVENT_LEVEL logger.setLevel(logging.DEBUG) # 打开TorchAir python日志 class DemoModel(nn.Module): def __init__(self): super().__init__() self.register_buffer('running_mean', torch.zeros(64)) # buffer (全局缓冲区) self.linear1 = nn.Linear(128, 64) # Linear层 - 其weight和bias自动成为Parameter def forward(self, x, scale_factor): """ Args: x: user_input tensor (用户输入的tensor) scale_factor: scalar输入 (标量输入) """ x = self.linear1(x) x = x - self.running_mean x = x * scale_factor return x demo_model = DemoModel().npu() config = CompilerConfig() # Dump TorchAir优化后的图(py) config.debug.graph_dump.type = "py" npu_backend = torchair.get_npu_backend(compiler_config=config) # 假设dynamic参数采用默认 demo_model = torch.compile(demo_model, dynamic=False, fullgraph=True, backend=npu_backend) inp = torch.randn(100, 128).npu() # 输入tensor标记mark_static示例 # torch._dynamo.mark_static(inp) scale_factor = 100 for i in range(4): demo_model(inp, scale_factor) |
dynamic=False,Dynamo编译结果为静态图
运行上述脚本test_compile.py,得到Dynamo生成的原生FX图和TorchAir优化后的图。
- 原生FX图结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
[INFO] TORCHAIR(1630010,python):2025-11-07 15:35:39.502.029 [npu_fx_compiler.py:451]1630010 compiler inputs [INFO] TORCHAIR(1630010,python):2025-11-07 15:35:39.502.244 [npu_fx_compiler.py:453]1630010 input 0: FakeTensor(..., device='npu:0', size=(64, 128)) [INFO] TORCHAIR(1630010,python):2025-11-07 15:35:39.502.597 [npu_fx_compiler.py:453]1630010 input 1: FakeTensor(..., device='npu:0', size=(64,)) [INFO] TORCHAIR(1630010,python):2025-11-07 15:35:39.502.937 [npu_fx_compiler.py:453]1630010 input 2: FakeTensor(..., device='npu:0', size=(100, 128)) [INFO] TORCHAIR(1630010,python):2025-11-07 15:35:39.503.269 [npu_fx_compiler.py:453]1630010 input 3: FakeTensor(..., device='npu:0', size=(64,)) [INFO] TORCHAIR(1630010,python):2025-11-07 15:35:39.503.589 [npu_fx_compiler.py:454]1630010 graph: graph(): %primals_1 : [num_users=1] = placeholder[target=primals_1] %primals_2 : [num_users=1] = placeholder[target=primals_2] %primals_3 : [num_users=2] = placeholder[target=primals_3] %primals_4 : [num_users=1] = placeholder[target=primals_4] %t : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%primals_1,), kwargs = {}) %addmm : [num_users=1] = call_function[target=torch.ops.aten.addmm.default](args = (%primals_2, %primals_3, %t), kwargs = {}) %sub : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%addmm, %primals_4), kwargs = {}) %mul : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%sub, 100), kwargs = {}) return (mul, primals_3)
FX图中所有输入shape都是固定常量,是一个FX静态图。
- TorchAir优化后的图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
# -*- coding: utf-8 -*- from torch import tensor from torchair._ge_concrete_graph import ge_apis as ge from torchair.ge._ge_graph import get_default_ge_graph primals_1_0 = ge.Data(index=0, dtype=0, shape=[64, 128], placement="NPU", node_name="primals_1") primals_2_0 = ge.Data(index=1, dtype=0, shape=[64], placement="NPU", node_name="primals_2") primals_3_0 = ge.Data(index=2, dtype=0, shape=[100, 128], placement="NPU", node_name="primals_3") primals_4_0 = ge.Data(index=3, dtype=0, shape=[64], placement="NPU", node_name="primals_4") # File "/npu/pytorch/test/test.py", line 24, in forward x = self.linear1(x) ## FX Code: t: torch.float32[128, 64]npu:0 = torch.ops.aten.t.default(primals_1: torch.float32[64, 128]npu:0) ## FX Code: addmm: torch.float32[100, 64]npu:0 = torch.ops.aten.addmm.default(primals_2: torch.float32[64]npu:0, primals_3: torch.float32[100, 128]npu:0, t: torch.float32[128, 64]npu:0) Transpose_0 = ge.Transpose(primals_1_0, [1, 0], node_name="Transpose") MatMulV2_0 = ge.MatMulV2(primals_3_0, Transpose_0, None, None, node_name="MatMulV2") Mul_0 = ge.Mul(MatMulV2_0, ge.Const(1, dtype=0), node_name="Mul") Add_0 = ge.Add(Mul_0, primals_2_0, node_name="Add") # File "/npu/pytorch/test/test.py", line 25, in forward x = x - self.running_mean ## FX Code: sub: torch.float32[100, 64]npu:0 = torch.ops.aten.sub.Tensor(addmm: torch.float32[100, 64]npu:0, primals_4: torch.float32[64]npu:0) Sub_0 = ge.Sub(Add_0, primals_4_0, node_name="Sub") # File "/npu/pytorch/test/test.py", line 26, in forward x = x * scale_factor ## FX Code: mul: torch.float32[100, 64]npu:0 = torch.ops.aten.mul.Tensor(sub: torch.float32[100, 64]npu:0, 100) Mul_1_0 = ge.Mul(Sub_0, ge.Const(100, dtype=0), node_name="Mul_1") Cast_0 = ge.Cast(Mul_1_0, dst_type=0, node_name="Cast") _ = ge.NetOutput([Cast_0], dependencies=[])
TorchAir优化后的图Aten IR已经转换为GE IR,输入shape都是固定形状,输入scalar常量值转换成ge.Const节点,这是一个静态GE原图。
dynamic=True,Dynamo编译结果为动态图
上述脚本test_compile.py改为dynamic=True,得到Dynamo生成的原生FX图和TorchAir优化后的图。
- 原生FX图结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
[INFO] TORCHAIR(1632429,python):2025-11-07 15:37:54.655.141 [npu_fx_compiler.py:451]1632429 compiler inputs [INFO] TORCHAIR(1632429,python):2025-11-07 15:37:54.655.275 [npu_fx_compiler.py:453]1632429 input 0: s2 [INFO] TORCHAIR(1632429,python):2025-11-07 15:37:54.655.426 [npu_fx_compiler.py:453]1632429 input 1: s0 [INFO] TORCHAIR(1632429,python):2025-11-07 15:37:54.655.558 [npu_fx_compiler.py:453]1632429 input 2: FakeTensor(..., device='npu:0', size=(s0, 128)) [INFO] TORCHAIR(1632429,python):2025-11-07 15:37:54.655.982 [npu_fx_compiler.py:453]1632429 input 3: FakeTensor(..., device='npu:0', size=(s0, 64)) [INFO] TORCHAIR(1632429,python):2025-11-07 15:37:54.656.374 [npu_fx_compiler.py:454]1632429 graph: graph(): %primals_6 : [num_users=1] = placeholder[target=primals_6] %primals_3 : [num_users=0] = placeholder[target=primals_3] %primals_4 : [num_users=1] = placeholder[target=primals_4] %tangents_1 : [num_users=1] = placeholder[target=tangents_1] %mul_7 : [num_users=2] = call_function[target=torch.ops.aten.mul.Tensor](args = (%tangents_1, %primals_6), kwargs = {}) %t_1 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%mul_7,), kwargs = {}) %mm : [num_users=1] = call_function[target=torch.ops.aten.mm.default](args = (%t_1, %primals_4), kwargs = {}) %t_2 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%mm,), kwargs = {}) %sum_1 : [num_users=1] = call_function[target=torch.ops.aten.sum.dim_IntList](args = (%mul_7, [0], True), kwargs = {}) %view : [num_users=1] = call_function[target=torch.ops.aten.view.default](args = (%sum_1, [64]), kwargs = {}) %t_3 : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%t_2,), kwargs = {}) return (t_3, view, None, None, None, None)
FX图中输入shape的具体维度存在符号,是一个FX动态图。
- TorchAir优化后的图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
# -*- coding: utf-8 -*- from torch import tensor from torchair._ge_concrete_graph import ge_apis as ge from torchair.ge._ge_graph import get_default_ge_graph primals_1_0 = ge.Data(index=0, dtype=0, shape=[64, 128], placement="NPU", node_name="primals_1") primals_2_0 = ge.Data(index=1, dtype=0, shape=[64], placement="NPU", node_name="primals_2") primals_3_0 = ge.Data(index=2, dtype=9, shape=[], placement="CPU", node_name="primals_3") primals_4_0 = ge.Data(index=3, dtype=0, shape=[-1, 128], placement="NPU", node_name="primals_4") Shape_0 = ge.Shape(primals_4_0, dtype=9, node_name="Shape") Gather_0 = ge.Gather(Shape_0, 0, node_name="Gather") primals_5_0 = ge.Data(index=4, dtype=0, shape=[64], placement="NPU", node_name="primals_5") primals_6_0 = ge.Data(index=5, dtype=9, shape=[], placement="CPU", node_name="primals_6") # File "/npu/pytorch/test/test.py", line 24, in forward x = self.linear1(x) ## FX Code: t: torch.float32[128, 64]npu:0 = torch.ops.aten.t.default(primals_1: torch.float32[64, 128]npu:0) ## FX Code: addmm: torch.float32[s0, 64]npu:0 = torch.ops.aten.addmm.default(primals_2: torch.float32[64]npu:0, primals_4: torch.float32[s0, 128]npu:0, t: torch.float32[128, 64]npu:0) Transpose_0 = ge.Transpose(primals_1_0, [1, 0], node_name="Transpose") MatMulV2_0 = ge.MatMulV2(primals_4_0, Transpose_0, None, None, node_name="MatMulV2") Mul_0 = ge.Mul(MatMulV2_0, ge.Const(1, dtype=0), node_name="Mul") Add_0 = ge.Add(Mul_0, primals_2_0, node_name="Add") # File "/npu/pytorch/test/test.py", line 25, in forward x = x - self.running_mean ## FX Code: sub_1: torch.float32[s0, 64]npu:0 = torch.ops.aten.sub.Tensor(addmm: torch.float32[s0, 64]npu:0, primals_5: torch.float32[64]npu:0) Sub_0 = ge.Sub(Add_0, primals_5_0, node_name="Sub") # File "/npu/pytorch/test/test.py", line 26, in forward x = x * scale_factor ## FX Code: mul_4: torch.float32[s0, 64]npu:0 = torch.ops.aten.mul.Tensor(sub_1: torch.float32[s0, 64]npu:0, primals_6: "Sym(s2)") Cast_0 = ge.Cast(primals_6_0, dst_type=0, node_name="Cast") Mul_1_0 = ge.Mul(Sub_0, Cast_0, node_name="Mul_1") Cast_1_0 = ge.Cast(Mul_1_0, dst_type=0, node_name="Cast_1") _ = ge.NetOutput([Cast_1_0], dependencies=[])
TorchAir优化后的图Aten IR已经转换为GE IR,FX图中输入tensor对应的符号位维度转换成了-1,输入scalar符号转换成了ge.Data节点,ge.Data的shape是固定常量,这是一个动态GE原图。
dynamic=True且对输入mark_static,Dynamo编译结果为静态图
上述脚本test_compile.py改为dynamic=True且对输入tensor标记mark_static,得到Dynamo生成的原生FX图和TorchAir优化后的图。
1 2 3 | # 输入tensor标记mark_static示例 inp = torch.randn(100, 128).npu() torch._dynamo.mark_static(inp) |
- 原生FX图结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
[INFO] TORCHAIR(1639859,python):2025-11-07 15:45:40.989.451 [npu_fx_compiler.py:451]1639859 compiler inputs [INFO] TORCHAIR(1639859,python):2025-11-07 15:45:40.989.644 [npu_fx_compiler.py:453]1639859 input 0: FakeTensor(..., device='npu:0', size=(64, 128)) [INFO] TORCHAIR(1639859,python):2025-11-07 15:45:40.989.992 [npu_fx_compiler.py:453]1639859 input 1: FakeTensor(..., device='npu:0', size=(64,)) [INFO] TORCHAIR(1639859,python):2025-11-07 15:45:40.990.312 [npu_fx_compiler.py:453]1639859 input 2: FakeTensor(..., device='npu:0', size=(100, 128)) [INFO] TORCHAIR(1639859,python):2025-11-07 15:45:40.990.627 [npu_fx_compiler.py:453]1639859 input 3: FakeTensor(..., device='npu:0', size=(64,)) [INFO] TORCHAIR(1639859,python):2025-11-07 15:45:40.990.955 [npu_fx_compiler.py:453]1639859 input 4: s0 [INFO] TORCHAIR(1639859,python):2025-11-07 15:45:40.991.110 [npu_fx_compiler.py:454]1639859 graph: graph(): %primals_1 : [num_users=1] = placeholder[target=primals_1] %primals_2 : [num_users=1] = placeholder[target=primals_2] %primals_3 : [num_users=2] = placeholder[target=primals_3] %primals_4 : [num_users=1] = placeholder[target=primals_4] %primals_5 : [num_users=2] = placeholder[target=primals_5] %t : [num_users=1] = call_function[target=torch.ops.aten.t.default](args = (%primals_1,), kwargs = {}) %addmm : [num_users=1] = call_function[target=torch.ops.aten.addmm.default](args = (%primals_2, %primals_3, %t), kwargs = {}) %sub : [num_users=1] = call_function[target=torch.ops.aten.sub.Tensor](args = (%addmm, %primals_4), kwargs = {}) %mul : [num_users=1] = call_function[target=torch.ops.aten.mul.Tensor](args = (%sub, %primals_5), kwargs = {}) return (mul, primals_3, primals_5)
FX图中所有输入shape都是固定常量,但输入scalar值是符号(对应脚本中的scalar输入),是一个FX动态图。
- TorchAir优化后的图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
# -*- coding: utf-8 -*- from torch import tensor from torchair._ge_concrete_graph import ge_apis as ge from torchair.ge._ge_graph import get_default_ge_graph primals_1_0 = ge.Data(index=0, dtype=0, shape=[64, 128], placement="NPU", node_name="primals_1") primals_2_0 = ge.Data(index=1, dtype=0, shape=[64], placement="NPU", node_name="primals_2") primals_3_0 = ge.Data(index=2, dtype=0, shape=[100, 128], placement="NPU", node_name="primals_3") primals_4_0 = ge.Data(index=3, dtype=0, shape=[64], placement="NPU", node_name="primals_4") primals_5_0 = ge.Data(index=4, dtype=9, shape=[], placement="CPU", node_name="primals_5") # File "/npu/pytorch/test/test.py", line 24, in forward x = self.linear1(x) ## FX Code: t: torch.float32[128, 64]npu:0 = torch.ops.aten.t.default(primals_1: torch.float32[64, 128]npu:0) ## FX Code: addmm: torch.float32[100, 64]npu:0 = torch.ops.aten.addmm.default(primals_2: torch.float32[64]npu:0, primals_3: torch.float32[100, 128]npu:0, t: torch.float32[128, 64]npu:0) Transpose_0 = ge.Transpose(primals_1_0, [1, 0], node_name="Transpose") MatMulV2_0 = ge.MatMulV2(primals_3_0, Transpose_0, None, None, node_name="MatMulV2") Mul_0 = ge.Mul(MatMulV2_0, ge.Const(1, dtype=0), node_name="Mul") Add_0 = ge.Add(Mul_0, primals_2_0, node_name="Add") # File "/npu/pytorch/test/test.py", line 25, in forward x = x - self.running_mean ## FX Code: sub: torch.float32[100, 64]npu:0 = torch.ops.aten.sub.Tensor(addmm: torch.float32[100, 64]npu:0, primals_4: torch.float32[64]npu:0) Sub_0 = ge.Sub(Add_0, primals_4_0, node_name="Sub") # File "/npu/d00885544/pytorch/every_test/1107_doc/test.py", line 26, in forward x = x * scale_factor ## FX Code: mul: torch.float32[100, 64]npu:0 = torch.ops.aten.mul.Tensor(sub: torch.float32[100, 64]npu:0, primals_5: "Sym(s0)") Cast_0 = ge.Cast(primals_5_0, dst_type=0, node_name="Cast") Mul_1_0 = ge.Mul(Sub_0, Cast_0, node_name="Mul_1") Cast_1_0 = ge.Cast(Mul_1_0, dst_type=0, node_name="Cast_1") _ = ge.NetOutput([Cast_1_0], dependencies=[])
TorchAir优化后的图Aten IR已经转换为GE IR,FX图中的输入shape都是固定形状,scalar符号输入转换成了ge.Data节点,ge.Data的shape是固定常量,优化后是一个静态GE原图。
父主题: 动/静态图展示