在这里插入图片描述

由于可变形卷积在采样时可以更贴近物体的形状和尺寸,更具有鲁棒性。所以想在自己数据集尝试一波,看看是否能涨点。
Paper: Deformable Convolutional Networks

在这里插入图片描述


python 增加 dcnv2 🚀​

  1. 安装 dcnv2 https://github.com/jinfagang/DCNv2_latest

    python3 setup.py build develop
    
  2. common.py 增加 dcn op

    # --------------------------DCNv2 start--------------------------  
    import _ext as _backend  
      
    class _DCNv2(Function):  
        @staticmethod  
        def forward(ctx, input, offset, mask, weight, bias, stride, padding, dilation, deformable_groups):  
            is_convert = input.dtype != torch.float32  
            if is_convert:  
                input = input.float()  
                offset = offset.float()  
                mask = mask.float()  
                weight = weight.float()  
                bias = bias.float()  
      
            ctx.stride = _pair(stride)  
            ctx.padding = _pair(padding)  
            ctx.dilation = _pair(dilation)  
            ctx.kernel_size = _pair(weight.shape[2:4])  
            ctx.deformable_groups = deformable_groups  
            output = _backend.dcn_v2_forward(  
                input,  
                weight,  
                bias,  
                offset,  
                mask,  
                ctx.kernel_size[0],  
                ctx.kernel_size[1],  
                ctx.stride[0],  
                ctx.stride[1],  
                ctx.padding[0],  
                ctx.padding[1],  
                ctx.dilation[0],  
                ctx.dilation[1],  
                ctx.deformable_groups,  
            )  
            ctx.save_for_backward(input, offset, mask, weight, bias)  
            if is_convert:  
                return output.half()  
            return output  
      
        @staticmethod  
        @once_differentiable    
        def backward(ctx, grad_output):  
            is_convert = grad_output.dtype != torch.float32  
            if is_convert:  
                grad_output = grad_output.float()  
      
            input, offset, mask, weight, bias = ctx.saved_tensors  
            grad_input, grad_offset, grad_mask, grad_weight, grad_bias = _backend.dcn_v2_backward(  
                input,  
                weight,  
                bias,  
                offset,  
                mask,  
                grad_output,  
                ctx.kernel_size[0],  
                ctx.kernel_size[1],  
                ctx.stride[0],  
                ctx.stride[1],  
                ctx.padding[0],  
                ctx.padding[1],  
                ctx.dilation[0],  
                ctx.dilation[1],  
                ctx.deformable_groups,  
            )  
            if is_convert:  
                grad_input = grad_input.half()  
                grad_offset = grad_offset.half()  
                grad_mask = grad_mask.half()  
                grad_weight = grad_weight.half()  
                grad_bias = grad_bias.half()  
      
            return grad_input, grad_offset, grad_mask, grad_weight, grad_bias, None, None, None, None  
      
        @staticmethod  
        def symbolic(g, input, offset, mask, weight, bias, stride, padding, dilation, deformable_groups):  
            from torch.nn.modules.utils import _pair  
            stride = _pair(stride)  
            padding = _pair(padding)  
            dilation = _pair(dilation)  
            # as of trt 7, the dcn operation will be translated again by modifying the onnx file  
            # so the exporting code is kept to resemble the forward()        return g.op(  
                "DCNv2_2",  
                input,  
                offset,  
                mask,  
                weight,  
                bias,  
                stride_i=stride,  
                padding_i=padding,  
                dilation_i=dilation,  
                deformable_groups_i=deformable_groups,  
            ) 
      
    dcn_v2_conv = _DCNv2.apply   
      
    class DCNv2(nn.Module):  
        def __init__(  
                self,  
                in_channels,  
                out_channels,  
                kernel_size,  
                stride,  
                padding=1,  
                dilation=1,  
                deformable_groups=1,  
        ):  
            super(DCNv2, self).__init__()  
            self.in_channels = in_channels  
            self.out_channels = out_channels  
            self.kernel_size = _pair(kernel_size)  
            self.stride = _pair(stride)  
            self.padding = _pair(padding)  
            self.dilation = _pair(dilation)  
            self.deformable_groups = deformable_groups  
      
            self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size))  
            self.bias = nn.Parameter(torch.Tensor(out_channels))  
            self.reset_parameters()  
      
        def reset_parameters(self):  
            n = self.in_channels  
            for k in self.kernel_size:  
                n *= k  
            stdv = 1.0 / math.sqrt(n)  
            self.weight.data.uniform_(-stdv, stdv)  
            self.bias.data.zero_()  
      
        def forward(self, input, offset, mask):  
            assert (  
                    2 * self.deformable_groups * self.kernel_size[0] * self.kernel_size[1]  
                    == offset.shape[1]  
            )  
            assert self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == mask.shape[1]  
            return dcn_v2_conv(  
                input,  
                offset,  
                mask,  
                self.weight,  
                self.bias,  
                self.stride,  
                self.padding,  
                self.dilation,  
                self.deformable_groups,  
            )  
        
    class DCN(DCNv2):  
        def __init__(  
                self,  
                in_channels,  
                out_channels,  
                kernel_size,  
                stride,  
                padding=1,  
                dilation=1,  
                deformable_groups=1,  
        ):  
            super(DCN, self).__init__(  
                in_channels, out_channels, kernel_size, stride, padding, dilation, deformable_groups  
            )  
      
            channels_ = self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1]  
            self.conv_offset_mask = nn.Conv2d(  
                self.in_channels,  
                channels_,  
                kernel_size=self.kernel_size,  
                stride=self.stride,  
                padding=self.padding,  
                bias=True,  
            )  
            self.init_offset()  
            set_amp(self.training)  
      
        def init_offset(self):  
            self.conv_offset_mask.weight.data.zero_()  
            self.conv_offset_mask.bias.data.zero_()  
      
        def forward(self, input):  
            out = self.conv_offset_mask(input)  
            o1, o2, mask = torch.chunk(out, 3, dim=1)  
            offset = torch.cat((o1, o2), dim=1)  
            mask = torch.sigmoid(mask)  
            return dcn_v2_conv(  
                input,  
                offset,  
                mask,  
                self.weight,  
                self.bias,  
                self.stride,  
                self.padding,  
                self.dilation,  
                self.deformable_groups,  
            )  
      
    # ---------------------------DCNv2 end---------------------------
    
  3. yolo.py 增加 dcnv2 , parse_model 函数

    # 增加 DCNV2
    if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,  
             BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, C3HB, C3RFEM, MultiSEAM, SEAM, C3STR, MobileOneBlock, DCN]:
    
  4. 编辑 训练 config ,增加 DCN 模块, 这里增加3层DCN, 如果增加4层的话,在我训练集上训练, 几个 epoch 后 ,梯度会消失,原因未知.

    # Parameters
    nc: 6  # number of classes
    depth_multiple: 1.33  # model depth multiple
    width_multiple: 1.25  # layer channel multiple
    anchors:
      - [10,13, 16,30, 33,23]  # P3/8
      - [30,61, 62,45, 59,119]  # P4/16
      - [116,90, 156,198, 373,326]  # P5/32
    
    # YOLOv5 v6.0 backbone
    backbone:
      # [from, number, module, args]
      [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
       [-1, 1, DCN, [128, 3, 2]],  # 1-P2/4
       [-1, 3, C3, [128]],
       [-1, 1, DCN, [256, 3, 2]],  # 3-P3/8
       [-1, 6, C3, [256]],
       [-1, 1, DCN, [512, 3, 2]],  # 5-P4/16
       [-1, 9, C3, [512]],
       [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
       [-1, 3, C3, [1024]],
       [-1, 1, SPPF, [1024, 5]],  # 9
      ]
    
    # YOLOv5 v6.0 head
    head:
      [[-1, 1, Conv, [512, 1, 1]],
       [-1, 1, nn.Upsample, [None, 2, 'nearest']],
       [[-1, 6], 1, Concat, [1]],  # cat backbone P4
       [-1, 3, C3, [512, False]],  # 13
    
       [-1, 1, Conv, [256, 1, 1]],
       [-1, 1, nn.Upsample, [None, 2, 'nearest']],
       [[-1, 4], 1, Concat, [1]],  # cat backbone P3
       [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
    
       [-1, 1, Conv, [256, 3, 2]],
       [[-1, 14], 1, Concat, [1]],  # cat head P4
       [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
    
       [-1, 1, Conv, [512, 3, 2]],
       [[-1, 10], 1, Concat, [1]],  # cat head P5
       [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
    
       [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
      ]
    
    
  5. expected scalar type Float but found Half

    当训练时,会发生如上错误。大概意思就是输入input 不支持半精度, 只支持单精度的。参考了下这位兄弟的代码 https://github.com/CharlesShang/DCNv2/issues/43#issuecomment-648127833,稍微改动了一下。就是如果输入的数据不是单精度,会将数据先转换成单精度32位的,然后最后再把输出的数据转换成半精度16位的。

使用 tensorrt 编写 DCN plugins 🤖​

  1. dcnv2.h 参考: https://github.com/SsisyphusTao/Pytorch-TensorRT-Plugins dcnv2Plugin.h

  2. dcnv2.cpp 参考: https://github.com/SsisyphusTao/Pytorch-TensorRT-Plugins dcnv2Plugin.cpp

  3. 这里改动了几个地方, 参考如下.

    // enqueue 函数
    // 屏蔽 mask
    // const float* mask = offset_mask + deformable_group * 2 * kernel_size * kernel_size * height * width;
    // mask 改成 offset_mask
    modulated_deformable_im2col_cuda(stream, input, offset, offset_mask,
    1, in_channels, height, width,
    height_out, width_out, kernel_size, kernel_size,
    padding, padding, stride, stride, dilation, dilation,
    deformable_group, mColumn);
    
    // createPlugin 函数 感觉原来的参数传的太费劲
    IPluginV2Ext* DCNv2PluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) TRT_NOEXCEPT
    {
    	assert(fc->nbFields == 3);
    
    	std::vector<float> weight;
    	std::vector<float> bias;
    	int in_channel, out_channel, kernel, deformable_group, groups, padding, stride, dilation;
    	const PluginField* fields = fc->fields;
    	for (int i = 0; i < fc->nbFields; ++i)
    	{
    		const char* attrName = fields[i].name;
    		if (!strcmp(attrName, "netinfo"))
    		{
    			int* p_netinfo = (int*)(fields[i].data);
    			in_channel = p_netinfo[0];
    			out_channel = p_netinfo[1];
    			kernel = p_netinfo[2];
    			deformable_group = p_netinfo[3];
    			dilation = p_netinfo[4];
    			stride = p_netinfo[5];
    			padding = p_netinfo[6];
    			groups = p_netinfo[7];
    		}
    		else if (!strcmp(attrName, "weight"))
    		{
    			assert(fields[i].type == PluginFieldType::kFLOAT32);
    			int size = fields[i].length;
    			weight.reserve(size);
    			const auto* w = static_cast<const float*>(fields[i].data);
    			for (int j = 0; j < size; j++)
    			{
    				weight.push_back(*w);
    				w++;
    			}
    		}
    		else if (!strcmp(attrName, "bias"))
    		{
    			assert(fields[i].type == PluginFieldType::kFLOAT32);
    			int size = fields[i].length;
    			bias.reserve(size);
    			const auto* w = static_cast<const float*>(fields[i].data);
    			for (int j = 0; j < size; j++)
    			{
    				bias.push_back(*w);
    				w++;
    			}
    		}
    	}
    
    	Weights mWeight{ DataType::kFLOAT, weight.data(), (int64_t)weight.size() };
    	Weights mBias{ DataType::kFLOAT, bias.data(), (int64_t)bias.size() };
    
    	DCNv2Plugin* obj = new DCNv2Plugin(out_channel,
    		kernel,
    		deformable_group,
    		dilation,
    		padding,
    		stride,
    		&mWeight, &mBias);
    	obj->setPluginNamespace(mNamespace.c_str());
    	return obj;
    }
    
  4. dcn_v2_im2col_cuda.hdcn_v2_im2col_cuda.cu 参考: https://github.com/jinfagang/DCNv2_latest/tree/master/src/cuda

  5. 使用 gen_wts.py 生成 wts.

    import sys  
    import argparse  
    import os  
    import struct  
    import torch  
    from utils.torch_utils import select_device  
    
    def parse_args():  
        parser = argparse.ArgumentParser(description='Convert .pt file to .wts')  
        parser.add_argument('-w', '--weights', default="weights/test_dcn.pt",  
                            help='Input weights (.pt) file path (required)')  
        parser.add_argument(  
            '-o', '--output', default='weights', help='Output (.wts) file path (optional)')  
        parser.add_argument(  
            '-t', '--type', type=str, default='detect', choices=['detect', 'cls'],  
            help='determines the model is detection/classification')  
        args = parser.parse_args()  
        if not os.path.isfile(args.weights):  
            raise SystemExit('Invalid input file')  
        if not args.output:  
            args.output = os.path.splitext(args.weights)[0] + '.wts'  
        elif os.path.isdir(args.output):  
            args.output = os.path.join(  
                args.output,  
                os.path.splitext(os.path.basename(args.weights))[0] + '.wts')  
        return args.weights, args.output, args.type  
       
    pt_file, wts_file, m_type = parse_args()  
      
    # Initialize  
    device = select_device('cpu')  
    # Load model  
    model = torch.load(pt_file, map_location=device)  # load to FP32  
    model = model['ema' if model.get('ema') else 'model'].float()  
      
    if m_type == "detect":  
        # update anchor_grid info  
        anchor_grid = model.model[-1].anchors * \  
            model.model[-1].stride[..., None, None]  
        # model.model[-1].anchor_grid = anchor_grid  
        delattr(model.model[-1], 'anchor_grid')  # model.model[-1] is detect layer  
        # The parameters are saved in the OrderDict through the "register_buffer" method, and then saved to the weight.    model.model[-1].register_buffer("anchor_grid", anchor_grid)  
      
    model.to(device).eval()  
      
    with open(wts_file, 'w') as f:  
        f.write('{}\n'.format(len(model.state_dict().keys())))  
        for k, v in model.state_dict().items():  
            vr = v.reshape(-1).cpu().numpy()  
            f.write('{} {} '.format(k, len(vr)))  
            for vv in vr:  
                f.write(' ')  
                f.write(struct.pack('>f', float(vv)).hex())  
            f.write('\n')
    
  6. 查看网络模型结构, 可以看到只需将 conv 层 改成 dcnv2层 就行,其他的不用变. 参考 : https://github.com/wang-xinyu/tensorrtx/tree/master/yolov5
    在这里插入图片描述

  7. 实现 DCN 层

    nvinfer1::ILayer* convDcn(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights>& weightMap, nvinfer1::ITensor& input, std::string lname, int outch, int inChannel, int outChannel, int kernelSize = 3, int stride = 2, int padding = 1)
    {
    	IConvolutionLayer* conv_offset_mask = network->addConvolutionNd(input, outch, DimsHW{ kernelSize, kernelSize }, weightMap[lname + ".conv_offset_mask.weight"], weightMap[lname + ".conv_offset_mask.bias"]);
    	conv_offset_mask->setPaddingNd(DimsHW{ padding, padding });
    	conv_offset_mask->setStrideNd(DimsHW{ stride, stride });
    	assert(conv_offset_mask);
    
    	Dims conv_offset_mask_dim = conv_offset_mask->getOutput(0)->getDimensions();
    
    	ISliceLayer* o1 = network->addSlice(*conv_offset_mask->getOutput(0),
    		Dims3{ 0, 0, 0 }, Dims3{ conv_offset_mask_dim.d[0] / 3 , conv_offset_mask_dim.d[1], conv_offset_mask_dim.d[2] }, Dims3{ 1, 1, 1 });
    
    	ISliceLayer* o2 = network->addSlice(*conv_offset_mask->getOutput(0),
    		Dims3{ conv_offset_mask_dim.d[0] / 3, 0, 0 }, Dims3{ conv_offset_mask_dim.d[0] / 3 , conv_offset_mask_dim.d[1], conv_offset_mask_dim.d[2] }, Dims3{ 1, 1, 1 });
    
    	ISliceLayer* mask = network->addSlice(*conv_offset_mask->getOutput(0),
    		Dims3{ conv_offset_mask_dim.d[0] / 3 * 2, 0, 0 }, Dims3{ conv_offset_mask_dim.d[0] / 3 , conv_offset_mask_dim.d[1], conv_offset_mask_dim.d[2] }, Dims3{ 1, 1, 1 });
    
    	ITensor* concatTensors[] = { o1->getOutput(0),o2->getOutput(0) };
    	IConcatenationLayer* offset = network->addConcatenation(concatTensors, 2);
    	assert(offset);
    	auto sigmoid_conv_offset_mask = network->addActivation(*mask->getOutput(0), ActivationType::kSIGMOID);
    
    	auto creator = getPluginRegistry()->getPluginCreator("DCNv2", "1");
    	PluginField plugin_fields[3];
    	plugin_fields[0].data = weightMap[lname + ".weight"].values;
    	plugin_fields[0].length = weightMap[lname + ".weight"].count;
    	plugin_fields[0].name = "weight";
    	plugin_fields[0].type = PluginFieldType::kFLOAT32;
    
    	plugin_fields[1].data = weightMap[lname + ".bias"].values;
    	plugin_fields[1].length = weightMap[lname + ".bias"].count;
    	plugin_fields[1].name = "bias";
    	plugin_fields[1].type = PluginFieldType::kFLOAT32;
    
    	int dilation = 1;
    	int groups = 1;
    	int deformable_group = 1;
    	int netinfo[8] = { inChannel, outChannel, kernelSize, deformable_group, dilation, stride, padding, groups };
    	plugin_fields[2].data = netinfo;
    	plugin_fields[2].length = 8;
    	plugin_fields[2].name = "netinfo";
    	plugin_fields[2].type = PluginFieldType::kINT32;
    
    	PluginFieldCollection plugin_data;
    	plugin_data.nbFields = 3;
    	plugin_data.fields = plugin_fields;
    	IPluginV2* plugin_obj = creator->createPlugin("dcnlayer", &plugin_data);
    	std::vector<ITensor*> input_tensors;
    	input_tensors.push_back(&input);
    	input_tensors.push_back(offset->getOutput(0));
    	input_tensors.push_back(sigmoid_conv_offset_mask->getOutput(0));
    	auto dcn = network->addPluginV2(&input_tensors[0], input_tensors.size(), *plugin_obj);
    	return dcn;
    }
    
  8. 修改 yolov5 backbone

    /* ------ yolov5 backbone------ */
    	auto conv0 = convBlock(network, weightMap, *data, get_width(64, gw), 6, 2, 1, "model.0");
    	assert(conv0);
    	// 160 * 160 * 160
    	auto dcnconv1 = convDcn(network, weightMap, *conv0->getOutput(0), "model.1", 27, get_width(64, gw), get_width(128, gw));
    	assert(dcnconv1);
    	auto bottleneck_csp2 = C3(network, weightMap, *dcnconv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
    	// 320 * 80 * 80
    	auto dcnconv3 = convDcn(network, weightMap, *bottleneck_csp2->getOutput(0), "model.3", 27, get_width(128, gw), get_width(256, gw));
    	assert(dcnconv3);
    	auto bottleneck_csp4 = C3(network, weightMap, *dcnconv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
    	// 640 * 40 * 40
    	auto dcnconv5 = convDcn(network, weightMap, *bottleneck_csp4->getOutput(0), "model.5", 27, get_width(256, gw), get_width(512, gw));
    	assert(dcnconv5);
    	auto bottleneck_csp6 = C3(network, weightMap, *dcnconv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
    	auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
    	auto bottleneck_csp8 = C3(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
    	auto spp9 = SPPF(network, weightMap, *bottleneck_csp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.9");
    
  9. 增加模型 这里我增加 x_dcn

    bool parse_args(int argc, char** argv, std::string& wts, std::string& engine, bool& is_p6, float& gd, float& gw, std::string& img_dir) {
    	if (argc < 4) return false;
    	if (std::string(argv[1]) == "-s" && (argc == 5 || argc == 7)) {
    	wts = std::string(argv[2]);
    	engine = std::string(argv[3]);
    	auto net = std::string(argv[4]);
    	if (net[0] == 'n') {
    		gd = 0.33;
    		gw = 0.25;
    	} else if (net[0] == 's') {
    		gd = 0.33;
    		gw = 0.50;
    	} else if (net[0] == 'm') {
    		gd = 0.67;
    		gw = 0.75;
    	} else if (net[0] == 'l') {
    		gd = 1.0;
    		gw = 1.0;
    	} else if (net[0] == 'x') {
    		gd = 1.33;
    		gw = 1.25;
    	} else if (net[0] == 'x_dcn') {
    		gd = 1.33;
    		gw = 1.25;
    	} else if (net[0] == 'c' && argc == 7) {
    		gd = atof(argv[5]);
    		gw = atof(argv[6]);
    	} else {
    		return false;
    	}
    	if (net.size() == 2 && net[1] == '6') {
    		is_p6 = true;
    	}
    	} else if (std::string(argv[1]) == "-d" && argc == 4) {
    		engine = std::string(argv[2]);
    		img_dir = std::string(argv[3]);
    	} else {
    		return false;
    	}
    		return true;
    }
    
  10. 序列化模型和推理

     // serialize model to plan file
     ./yolov5_det -s [.wts] [.engine] [n/s/m/l/x/x_dcn/n6/s6/m6/l6/x6 or c/c6 gd gw]  
     // deserialize and run inference, the images in [image folder] will be processed.
     ./yolov5_det -d [.engine] [image folder]  
    

关于验证结论 👉

在自己数据集上验证,ap 提高了 0.5. 感觉效果不是太大。有的作者反馈提升了5个ap, 感觉没有那么邪乎,反正仁者见仁智者见智,可能针对我的数据集不是太适合,大家有啥好的想法欢迎讨论。

在这里插入图片描述


DEMO 🔊​
参考 tensorrtx yolov5 , 实现 yolov5 + dcnv2

参考 👀​


END 🎭​

  1. 以上记录了实现全部过程,有需要小伙伴可以参考下。
  2. 学习过程难免有差错,有不对的地方欢迎大佬指正.
  3. 最后码字不易,欢迎三连。

If I have seen further, it is by standing on the shoulders of giants.

在这里插入图片描述

Logo

有“AI”的1024 = 2048,欢迎大家加入2048 AI社区

更多推荐