在import部分后添加一键迁移逻辑:
import torch_npu from torch_npu.contrib import transfer_to_npu
在import部分后添加一键迁移逻辑:
import torch_npu from torch_npu.contrib import transfer_to_npu
仅Atlas 训练系列产品需要开启二进制。
在主函数前,关闭即时编译开关。
修改前:
if __name__ == "__main__": main()
修改后:
if __name__ == "__main__": torch_npu.npu.set_compile_mode(jit_compile=False) main()
在主函数前,关闭即时编译开关。
修改前:
if __name__ == "__main__": main()
修改后:
if __name__ == "__main__": torch_npu.npu.set_compile_mode(jit_compile=False) main()
仅Atlas 训练系列产品需要关闭私有格式。
修改“tools/train_net.py”脚本。
在主函数前,关闭私有格式开关。
修改前:
if __name__ == "__main__": torch_npu.npu.set_compile_mode(jit_compile=False) main()
修改后:
if __name__ == "__main__": torch_npu.npu.set_compile_mode(jit_compile=False) torch.npu.config.allow_internal_format = False main()
修改“maskrcnn_benchmark/utils/imports.py”脚本。
修改对torch<=1.9的依赖。
修改前:
import torch if torch._six.PY37: import importlib import importlib.util import sys
修改后:
import sys import torch PY37 = sys.version_info[0] == 3 and sys.version_info[1] >= 7 if PY37: import importlib import importlib.util import sys
原仓自定义的ROIAlign算子替换成mmcv库里适配的同名算子。
修改前:
from .roi_align import ROIAlign from .roi_align import roi_align from .roi_align import ROIAlignV2 …… __all__ = ["nms", "ml_nms", "soft_nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", "swish", "FrozenBatchNorm2d", "NaiveSyncBatchNorm2d", "SigmoidFocalLoss", "TokenSigmoidFocalLoss", "IOULoss", "IOUWHLoss", "Scale", "DeformConv", "ModulatedDeformConv", "DyHead", "DropBlock2D", "DropBlock3D", "EvoNorm2d", "DYReLU", "SELayer", "SEBlock", "HungarianMatcher", "SetCriterion", "ROIAlignV2", "_NewEmptyTensorOp"]
修改后:
from mmcv.ops import roi_align from mmcv.ops import RoIAlign as ROIAlign …… __all__ = ["nms", "ml_nms", "soft_nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", "swish", "FrozenBatchNorm2d", "NaiveSyncBatchNorm2d", "SigmoidFocalLoss", "TokenSigmoidFocalLoss", "IOULoss", "IOUWHLoss", "Scale", "DeformConv", "ModulatedDeformConv", "DyHead", "DropBlock2D", "DropBlock3D", "EvoNorm2d", "DYReLU", "SELayer", "SEBlock", "HungarianMatcher", "SetCriterion", "_NewEmptyTensorOp"]
将无用的算子注释,同时根据mmcv同名算子用法修改调用方式。
修改前:
from maskrcnn_benchmark.layers import ROIAlign, ROIAlignV2 …… class Pooler(nn.Module): def __init__(self, output_size, scales, sampling_ratio, use_v2=False): …… for scale in scales: poolers.append( ROIAlignV2( output_size, spatial_scale=scale, sampling_ratio=sampling_ratio if use_v2 else ROIAlign( output_size, spatial_scale=scale, sampling_ratio=sampling_ratio ) )
修改后:
from maskrcnn_benchmark.layers import ROIAlign …… class Pooler(nn.Module): def __init__(self, output_size, scales, sampling_ratio, use_v2=False): …… for scale in scales: poolers.append( ROIAlign( output_size, spatial_scale=scale, sampling_ratio=sampling_ratio , aligned=True if use_v2 else ROIAlign( output_size, spatial_scale=scale, sampling_ratio=sampling_ratio , aligned=False ) )
根据mmcv同名算子用法修改调用方式。
修改前:
if self.cfg.MODEL.DYHEAD.FUSE_CONFIG.USE_BACKBONE_SHALLOW_CONTRASTIVE_LOSS: # choice 1:use features from SWINT backbone layer (c4) before vl fusion from maskrcnn_benchmark.layers.roi_align import ROIAlignV2 pooler = ROIAlignV2((1, 1), 1./16, 0)
修改后:
if self.cfg.MODEL.DYHEAD.FUSE_CONFIG.USE_BACKBONE_SHALLOW_CONTRASTIVE_LOSS: # choice 1:use features from SWINT backbone layer (c4) before vl fusion from maskrcnn_benchmark.layers import ROIAlign pooler = ROIAlign((1, 1), 1. / 16, 0)
修改“maskrcnn_benchmark/layers/__init__.py”脚本。
注释原仓自定义的ROIPool算子(COCO微调任务没有用到)。
修改前:
from .roi_pool import ROIPool from .roi_pool import roi_pool …… __all__ = ["nms", "ml_nms", "soft_nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", "swish", "FrozenBatchNorm2d", "NaiveSyncBatchNorm2d", "SigmoidFocalLoss", "TokenSigmoidFocalLoss", "IOULoss", "IOUWHLoss", "Scale", "DeformConv", "ModulatedDeformConv", "DyHead", "DropBlock2D", "DropBlock3D", "EvoNorm2d", "DYReLU", "SELayer", "SEBlock", "HungarianMatcher", "SetCriterion", "_NewEmptyTensorOp"]
修改后:
…… __all__ = ["nms", "ml_nms", "soft_nms", "roi_align", "ROIAlign", "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", "swish", "FrozenBatchNorm2d", "NaiveSyncBatchNorm2d", "SigmoidFocalLoss", "TokenSigmoidFocalLoss", "IOULoss", "IOUWHLoss", "Scale", "DeformConv", "ModulatedDeformConv", "DyHead", "DropBlock2D", "DropBlock3D", "EvoNorm2d", "DYReLU", "SELayer", "SEBlock", "HungarianMatcher", "SetCriterion", "_NewEmptyTensorOp"]
删除无用算子DeformConv的调用,原仓自定义的ModulatedDeformConv算子替换成mmcv库里适配的同名算子。
修改前:
from .deform_conv import DeformConv, ModulatedDeformConv __all__ = ["nms", "ml_nms", "soft_nms", "roi_align", "ROIAlign", "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", "swish", "FrozenBatchNorm2d", "NaiveSyncBatchNorm2d", "SigmoidFocalLoss", "TokenSigmoidFocalLoss", "IOULoss", "IOUWHLoss", "Scale", "DeformConv", "ModulatedDeformConv", "DyHead", "DropBlock2D", "DropBlock3D", "EvoNorm2d", "DYReLU", "SELayer", "SEBlock", "HungarianMatcher", "SetCriterion", "_NewEmptyTensorOp"]
修改后:
from mmcv.ops import ModulatedDeformConv2d as ModulatedDeformConv __all__ = ["nms", "ml_nms", "soft_nms", "roi_align", "ROIAlign", "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", "swish", "FrozenBatchNorm2d", "NaiveSyncBatchNorm2d", "SigmoidFocalLoss", "TokenSigmoidFocalLoss", "IOULoss", "IOUWHLoss", "Scale", "ModulatedDeformConv", "DyHead", "DropBlock2D", "DropBlock3D", "EvoNorm2d", "DYReLU", "SELayer", "SEBlock", "HungarianMatcher", "SetCriterion", "_NewEmptyTensorOp"]
根据mmcv同名算子用法修改调用方式。
修改前:
from .deform_conv import ModulatedDeformConv
修改后:
from mmcv.ops import ModulatedDeformConv2d as ModulatedDeformConv
在输入Shape和offsets shape不一致的场景,模型侧提前修改数据排布方式,再调用NPU算子。
修改前:
class DyConv(torch.nn.Module): def forward(self, inputs): …… for level, feature in enumerate(visual_feats): …… if level > 0: temp_fea.append(self.DyConv[2](visual_feats[level - 1], **conv_args)) if level < len(visual_feats) - 1: temp_fea.append(F.upsample_bilinear(self.DyConv[0](visual_feats[level + 1], **conv_args), size=[feature.size(2), feature.size(3)])) mean_fea = torch.mean(torch.stack(temp_fea), dim=0, keepdim=False)
修改后:
def reorder(input_nchw, output_h, output_w): n, c, h, w = input_nchw.shape input_nchw = input_nchw.view(n, -1) input_nchw = input_nchw[:, :c * output_h * output_w] input_nchw = input_nchw.view(n, c, output_h, output_w) return input_nchw class DyConv(torch.nn.Module): def forward(self, inputs): …… for level, feature in enumerate(visual_feats): …… if level > 0: temp_fea.append(self.DyConv[2](visual_feats[level - 1], **conv_args)) if level < len(visual_feats) - 1: offset = conv_args.get('offset', None) mask = conv_args.get('mask', None) out_h, out_w = visual_feats[level + 1].shape[2:] offset_nchw = reorder(offset, out_h, out_w) mask_nchw = reorder(mask, out_h, out_w) newconv_args = dict(offset=offset_nchw, mask=mask_nchw) temp_fea.append(F.upsample_bilinear(self.DyConv[0](visual_feats[level + 1], **newconv_args), size=[feature.size(2), feature.size(3)])) mean_fea = torch.mean(torch.stack(temp_fea), dim=0, keepdim=False)
修改nms和ml_nms算子调用方式。
修改前:
from maskrcnn_benchmark import _C try: import torchvision from torchvision.ops import nms except: nms = _C.nms ml_nms = _C.ml_nms soft_nms = _C.soft_nms
修改后:
from mmcv.ops import nms, soft_nms ml_nms = nms soft_nms = soft_nms
修改box_ml_nms算子调用方式,同时修正源码bug。
修改前:
def boxlist_ml_nms(boxlist, nms_thresh, max_proposals=-1, score_field="scores", label_field="labels"): …… if boxes.device==torch.device("cpu"): keep = [] unique_labels = torch.unique(labels) print(unique_labels) for j in unique_labels: inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) keep_j = _box_nms(boxes_j, scores_j, nms_thresh) keep += keep_j else: keep = _box_ml_nms(boxes, scores, labels.float(), nms_thresh)
修改后:
def boxlist_ml_nms(boxlist, nms_thresh, max_proposals=-1, score_field="scores", label_field="labels"): …… if boxes.device==torch.device("cpu"): keep = [] unique_labels = torch.unique(labels) print(unique_labels) for j in unique_labels: inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) keep_j = _box_nms(boxes_j, scores_j, nms_thresh) keep_j = inds[keep_j].cpu().int().numpy().tolist() keep += keep_j keep = torch.Tensor(keep).long() else: keep = [] unique_labels = torch.unique(labels) for j in unique_labels: inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) _, keep_j = _box_ml_nms(boxes_j, scores_j, nms_thresh) keep_j = inds[keep_j].cpu().int().numpy().tolist() keep += keep_j keep = torch.Tensor(keep).long()
修改“maskrcnn_benchmark/layers/sigmoid_focal_loss.py”脚本。
删除自定义cuda算子,修改SigmoidFocalLoss算子的CPU实现。
修改前:
from maskrcnn_benchmark import _C # TODO: Use JIT to replace CUDA implementation in the future. class _SigmoidFocalLoss(Function): @staticmethod def forward(ctx, logits, targets, gamma, alpha): ctx.save_for_backward(logits, targets) num_classes = logits.shape[1] ctx.num_classes = num_classes ctx.gamma = gamma ctx.alpha = alpha losses = _C.sigmoid_focalloss_forward( logits, targets, num_classes, gamma, alpha ) return losses @staticmethod @once_differentiable def backward(ctx, d_loss): logits, targets = ctx.saved_tensors num_classes = ctx.num_classes gamma = ctx.gamma alpha = ctx.alpha d_loss = d_loss.contiguous() d_logits = _C.sigmoid_focalloss_backward( logits, targets, d_loss, num_classes, gamma, alpha ) return d_logits, None, None, None, None sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply class SigmoidFocalLoss(nn.Module): …… def forward(self, logits, targets): if logits.is_cuda: loss_func = sigmoid_focal_loss_cuda else: loss_func = sigmoid_focal_loss_cpu loss = loss_func(logits, targets, self.gamma, self.alpha) return loss.sum()
修改后:
class SigmoidFocalLoss(nn.Module): …… def forward(self, logits, targets): loss_func = sigmoid_focal_loss_cpu loss = loss_func(logits, targets, self.gamma, self.alpha) return loss.sum()