论文复现

TS-CAM

Debug

os.path.join造成的linux和window路径的区别

# original:路径的分隔符不正确，导致路径不对
os.path.join(root, 'images.txt') 
# 应修改为
os.path.join(root, 'images.txt').replace('\\','/')

KeyError: ‘_ _ getstate _ _’

由num_workers引起, 多线程执行时出现错误，具体原因不清楚，这次的解决方法是将多线程语句(torch.nn.DataParallel)删去,并将num_workers改为0.查看自己电脑线程数的方法：
1.WIN+R
2.输入wmic回车
3.输入cpu get numberOfLogicalProcessors回车

RuntimeError: CUDA out of memory.

减小batch_size或图像大小，一开始在default文件中更改，没有效果，依然报错，后来发现default文件中是在config的yaml文件之前设置的，还需要更改config文件中的设置

代码分析

train_cam

parameters&work_dir:

—config_file ./configs/CUB/deit_tscam_tiny_patch16_224.yaml —lr 5e-5 MODEL.CAM_THR 0.1

运行位置应该在主目录，因为没有直接使用bash运行shell文件

利用python的注册器创建模型(deit.py&registry.py)

参考

superpixel-fcn

Debug

scipy版本造成的问题

# from scipy.ndimage import imread
from imageio import imread
# from scipy.misc import imsave
from imageio import imsave

cudnn.benchmark

用于加速，参考

代码分析

run_demo.py

使用的网格大小是16x16,输入图像为224x224时，生成14x14个网格

SpixelNet

def forward(self, x):
    # input: x [bs,3,224,224]
    # out1 [1, 16, 224, 224]
    out1 = self.conv0b(self.conv0a(x)) 
    # out2 [1, 32, 112, 112]
    out2 = self.conv1b(self.conv1a(out1)) 
    # out3 [1, 64, 56, 56]
    out3 = self.conv2b(self.conv2a(out2))
    # out4 [1, 128, 28, 28]
    out4 = self.conv3b(self.conv3a(out3)) 
    # out1 [1, 256, 14, 14]
    out5 = self.conv4b(self.conv4a(out4)) 
    
	# out_deconv3 [1, 128, 28, 28] 
    out_deconv3 = self.deconv3(out5)
    # 与out4堆叠得到 concat3 [1, 256, 28, 28] 
    concat3 = torch.cat((out4, out_deconv3), 1)
    # 利用卷积融合得到 out_conv3_1 [1, 128, 28, 28] 
    out_conv3_1 = self.conv3_1(concat3)

    # out_deconv2 [1, 64, 56, 56] 
    out_deconv2 = self.deconv2(out_conv3_1)
    # concat2 [1, 128, 56, 56] 
    concat2 = torch.cat((out3, out_deconv2), 1)
    # out_conv2_1 [1, 64, 56, 56] 
    out_conv2_1 = self.conv2_1(concat2)

    # out_deconv1 [1, 32, 112, 112]
    out_deconv1 = self.deconv1(out_conv2_1)
    # concat2 [1, 64, 112, 112] 
    concat1 = torch.cat((out2, out_deconv1), 1)
    # out_conv1_1 [1, 32, 112, 112]
    out_conv1_1 = self.conv1_1(concat1)

    # out_deconv0 [1, 16, 224, 224]
    out_deconv0 = self.deconv0(out_conv1_1)
    # concat0 [1, 32, 224, 224]
    concat0 = torch.cat((out1, out_deconv0), 1)
    # out_deconv0 [1, 16, 224, 224]
    out_conv0_1 = self.conv0_1(concat0)
    # self.pred_mask0 = nn.Conv2d(in_planes, channel, kernel_size=3, stride=1, padding=1, bias=True)
    # in_planes=16 channel=self.assign_ch=9
    # mask0 [1,9,224,224] 
    mask0 = self.pred_mask0(out_conv0_1)
    # Q [1,9,224,224] 
    prob0 = self.softmax(mask0)

    return prob0

main.py

输入图像尺寸为208*208，bs=4，label代表

# label [4,1,208,208]
# label2one_hot_torch可以将一个整数标签转换成一个c维的one-hot编码
# label_1hot [4,50,208,208]
label_1hot = label2one_hot_torch(label.to(device), C=50)

loss.py

# 程序中使用的是compute_semantic_pos_loss，并未使用RGBXY
# 重建的是50维的语义向量和2维的位置向量，然后语义向量使用crossentropy计算，xy使用l2距离

# 下采样16倍 pooled_labxy  [4,52,13,13]
pooled_labxy = poolfeat(labxy_feat, prob, kernel_size, kernel_size)
# 上采样16倍 reconstr_feat  [4,52,208,208]
reconstr_feat = upfeat(pooled_labxy, prob, kernel_size, kernel_size)
# 求重建的xy特征和原本的xy特征的差
loss_map = reconstr_feat[:,-2:,:,:] - labxy_feat[:,-2:,:,:]

# self def cross entropy  -- the official one combined softmax
# 公式中的第一项 crossentropy
logit = torch.log(reconstr_feat[:, :-2, :, :] + 1e-8)
loss_sem = - torch.sum(logit * labxy_feat[:, :-2, :, :]) / b
# 公式中的第二项
loss_pos = torch.norm(loss_map, p=2, dim=1).sum() / b * m / S

AFA

win10安装swig

mmcv安装

mmseg单卡训练

将分布式训练改为单卡训练

#183 dist.init_process_group(backend=args.backend,)
#212 train_sampler = DistributedSampler(train_dataset,shuffle=True)
#219 sampler=train_sampler,
#220 prefetch_factor=4
#285 wetr = DistributedDataParallel(wetr, device_ids=[args.local_rank], find_unused_parameters=True)
#287 train_sampler.set_epoch(np.random.randint(cfg.train.max_iters))
#300 train_sampler.set_epoch(np.random.randint(cfg.train.max_iters))

segformer_head.py中
norm_cfg=dict(type='BN', requires_grad=True)
# norm_cfg=dict(type='SyncBN', requires_grad=True)

utils中的imutils.py

def tensorboard_label(labels=None):
    ## labels
    labels_cmap = encode_cmap(np.squeeze(labels))
    # labels_cmap = torch.from_numpy(labels_cmap).permute([0, 3, 1, 2])
    labels_cmap = torch.from_numpy(labels_cmap).permute([2, 0, 1])
    grid_labels = torchvision.utils.make_grid(tensor=labels_cmap, nrow=2)

    return grid_labels

def cams_to_affinity_label(cam_label, mask=None, ignore_index=255):
    # cam_label [1,224,224]
    b,h,w = cam_label.shape
    # np.set_printoptions(threshold=np.inf)
    # print(cam_label.cpu().numpy())
    # cam_label_resized [1,1,14,14]
    cam_label_resized = F.interpolate(cam_label.unsqueeze(1).type(torch.float32), size=[h//16, w//16], mode="nearest")
	# _cam_label [1,1,196]
    _cam_label = cam_label_resized.reshape(b, 1, -1)
    # _cam_label_rep [1,196,196],把_cam_label中的重复196次（每行都一样）
    _cam_label_rep = _cam_label.repeat([1, _cam_label.shape[-1], 1])
    # 转置
    _cam_label_rep_t = _cam_label_rep.permute(0,2,1)
    # 转置的矩阵的第n行实际上是cam标签打平的序列中第n个点的值，所以其实是在求每个点和其他所有点的匹配
    # 相同标签的置1，不同置0
    aff_label = (_cam_label_rep == _cam_label_rep_t).type(torch.long)
    #aff_label[(_cam_label_rep+_cam_label_rep_t) == 0] = ignore_index
    for i in range(b):

        if mask is not None:
            aff_label[i, mask==0] = ignore_index
		# 因为上面的置1置0操作，需要重新赋值忽略区域
        aff_label[i, :, _cam_label_rep[i, 0, :]==ignore_index] = ignore_index
        aff_label[i, _cam_label_rep[i, 0, :]==ignore_index, :] = ignore_index

    return aff_label

def get_aff_loss(inputs, targets):

    pos_label = (targets == 1).type(torch.int16)
    pos_count = pos_label.sum() + 1
    neg_label = (targets == 0).type(torch.int16)
    neg_count = neg_label.sum() + 1
    #inputs = torch.sigmoid(input=inputs)

    pos_loss = torch.sum(pos_label * (1 - inputs)) / pos_count
    neg_loss = torch.sum(neg_label * (inputs)) / neg_count

    return 0.5 * pos_loss + 0.5 * neg_loss, pos_count, neg_count

PAR实现

###
#local pixel refinement
###

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

def get_kernel():
    
    weight = torch.zeros(8, 1, 3, 3)
    weight[0, 0, 0, 0] = 1
    weight[1, 0, 0, 1] = 1
    weight[2, 0, 0, 2] = 1

    weight[3, 0, 1, 0] = 1
    weight[4, 0, 1, 2] = 1

    weight[5, 0, 2, 0] = 1
    weight[6, 0, 2, 1] = 1
    weight[7, 0, 2, 2] = 1

    return weight

class PAR(nn.Module):

    def __init__(self, dilations, num_iter,):
        super().__init__()
        self.dilations = dilations
        self.num_iter = num_iter
        kernel = get_kernel()
        self.register_buffer('kernel', kernel)
        self.pos = self.get_pos()
        self.dim = 2
        self.w1 = 0.3
        self.w2 = 0.01

    def get_dilated_neighbors(self, x):
        b, c, h, w = x.shape
        x_aff = []
        for d in self.dilations:
            # 复制模式,[d]*4=[1,1,1,1],在周围复制一圈
            # [1,3,98,112]->[1,3,100,114],先做padding，配合后面的卷积保持特征图大小不变
            _x_pad = F.pad(x, [d]*4, mode='replicate', value=0)
            # [1,3,100,114]->[3,1,100,114],对每张图像的每个通道进行操作，所以变为[b*c,1,h,w]
            _x_pad = _x_pad.reshape(b*c, -1, _x_pad.shape[-2], _x_pad.shape[-1])
            # self.kernel [8,1,3,3] 8种1通道的3×3卷积核，其实对应的9宫格的8个位置
            # [3,1,100,114]->[3,8,98,112]->[1,3,8,98,112]
            _x = F.conv2d(_x_pad, self.kernel, dilation=d).view(b, c, -1, h, w)
            x_aff.append(_x)
        # [1,3,8,98,112]->[1,3,48,98,112],6种空洞卷积得到的结果堆叠
        return torch.cat(x_aff, dim=2)

    def get_pos(self):
        pos_xy = []

        ker = torch.ones(1, 1, 8, 1, 1)
        ker[0, 0, 0, 0, 0] = np.sqrt(2)
        ker[0, 0, 2, 0, 0] = np.sqrt(2)
        ker[0, 0, 5, 0, 0] = np.sqrt(2)
        ker[0, 0, 7, 0, 0] = np.sqrt(2)
        
        for d in self.dilations:
            pos_xy.append(ker*d)
        return torch.cat(pos_xy, dim=2)

    def forward(self, imgs, masks):
        # 掩码下采样到图像尺寸
        # imgs [1,3,98,112] masks [1,3,197,224]->[1,3,98,112]
        masks = F.interpolate(masks, size=imgs.size()[-2:], mode="bilinear", align_corners=True)
		
        b, c, h, w = imgs.shape
        # 使用6种尺寸的空洞卷积进行8领域采样，如果不使用空段卷积采样，则为[1,3,8,h,w],也就是原图像每个像素点的8领域采样值
        # [1,3,48,98,112]
        _imgs = self.get_dilated_neighbors(imgs)
        # 位置编码
        # [1,1,48,1,1]
        _pos = self.pos.to(_imgs.device)
        # 将原图像与8领域采样值相减
        # [1,3,98,112]->[1,3,1,98,112]->[1,3,48,98,112]
        _imgs_rep = imgs.unsqueeze(self.dim).repeat(1, 1, _imgs.shape[self.dim], 1, 1)
        # [1,1,48,1,1]->[b,1,48,1,1]
        _pos_rep = _pos.repeat(b, 1, 1, h, w)
        # [1,3,48,98,112]
        _imgs_abs = torch.abs(_imgs - _imgs_rep)
        # [1,3,1,98,112]
        _imgs_std = torch.std(_imgs, dim=self.dim, keepdim=True)
        # [1,1,1,98,112]
        _pos_std = torch.std(_pos_rep, dim=self.dim, keepdim=True)
        # [1,3,48,98,112]
        aff = -(_imgs_abs / (_imgs_std + 1e-8) / self.w1)**2
        # rgb3通道取均值
        # [1,3,48,98,112]->[1,1,48,98,112]
        aff = aff.mean(dim=1, keepdim=True)
        # [1,1,48,98,112]
        pos_aff = -(_pos_rep / (_pos_std + 1e-8) / self.w1)**2
        #pos_aff = pos_aff.mean(dim=1, keepdim=True)
        # 归一化
        # [1,1,48,98,112]
        aff = F.softmax(aff, dim=2) + self.w2 * F.softmax(pos_aff, dim=2)

        for _ in range(self.num_iter):
            # 领域采样，并没有采样自身
            # [1,3,48,98,112]
            _masks = self.get_dilated_neighbors(masks)
            # 领域采样值，乘以aff中每个采样值的权重，求和得到更新的mask
            # [1,3,48,98,112] * [1,1,48,98,112] -> [1,3,48,98,112] -> [1,3,98,112]
            masks = (_masks * aff).sum(2)

        return masks

TransFG

apex安装

windows10安装apex

参考

1
2
3

$ git clone https://github.com/NVIDIA/apex
$ cd apex
$ python setup.py install

路径报错

scipy的imread报错，替换为imageio.imread

内存占用很大，memery error

CutLER

需要手动将TokenCut代码放入third_party文件夹中