import torch data = torch.rand(64, 3, 64, 128).cuda() batch, channel, height, width = data.shape result = data.view(batch, height, channel*width).transpose(1, 0)
import torch import torch_npu from torch_npu.contrib import transfer_to_npu data = torch.rand(64, 3, 64, 128).cuda() batch, channel, height, width = data.shape result = data.transpose(1, 2).contiguous().view(batch, height, channel*width)
在替换为NPU亲和梯度裁剪API之前,请确保代码中已使用NPU亲和优化器。
import torch optimizer = torch.optim.AdamW(model.parameters(), lr = lr) torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=10, norm_type=2)
import torch import torch_npu from torch_npu.contrib import transfer_to_npu optimizer = torch_npu.optim.NpuFusedAdamW(model.parameters(), lr = lr) optimizer.clip_grad_norm_fused_(max_norm=10, norm_type=2)