0%

论文复现

论文复现

TS-CAM

Debug

os.path.join造成的linux和window路径的区别

1
2
3
4
# original:路径的分隔符不正确,导致路径不对
os.path.join(root, 'images.txt')
# 应修改为
os.path.join(root, 'images.txt').replace('\\','/')

KeyError: ‘_ _ getstate _ _’

由num_workers引起, 多线程执行时出现错误,具体原因不清楚,这次的解决方法是将多线程语句(torch.nn.DataParallel)删去,并将num_workers改为0.查看自己电脑线程数的方法:
1.WIN+R
2.输入wmic回车
3.输入cpu get numberOfLogicalProcessors回车

RuntimeError: CUDA out of memory.

减小batch_size或图像大小,一开始在default文件中更改,没有效果,依然报错,后来发现default文件中是在config的yaml文件之前设置的,还需要更改config文件中的设置

代码分析

train_cam

parameters&work_dir:

—config_file ./configs/CUB/deit_tscam_tiny_patch16_224.yaml —lr 5e-5 MODEL.CAM_THR 0.1

运行位置应该在主目录,因为没有直接使用bash运行shell文件

利用python的注册器创建模型(deit.py&registry.py)

参考

superpixel-fcn

Debug

scipy版本造成的问题

1
2
3
4
# from scipy.ndimage import imread
from imageio import imread
# from scipy.misc import imsave
from imageio import imsave

cudnn.benchmark

用于加速,参考

代码分析

run_demo.py

使用的网格大小是16x16,输入图像为224x224时,生成14x14个网格

image-20220422195255321

SpixelNet

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def forward(self, x):
# input: x [bs,3,224,224]
# out1 [1, 16, 224, 224]
out1 = self.conv0b(self.conv0a(x))
# out2 [1, 32, 112, 112]
out2 = self.conv1b(self.conv1a(out1))
# out3 [1, 64, 56, 56]
out3 = self.conv2b(self.conv2a(out2))
# out4 [1, 128, 28, 28]
out4 = self.conv3b(self.conv3a(out3))
# out1 [1, 256, 14, 14]
out5 = self.conv4b(self.conv4a(out4))

# out_deconv3 [1, 128, 28, 28]
out_deconv3 = self.deconv3(out5)
# 与out4堆叠得到 concat3 [1, 256, 28, 28]
concat3 = torch.cat((out4, out_deconv3), 1)
# 利用卷积融合得到 out_conv3_1 [1, 128, 28, 28]
out_conv3_1 = self.conv3_1(concat3)

# out_deconv2 [1, 64, 56, 56]
out_deconv2 = self.deconv2(out_conv3_1)
# concat2 [1, 128, 56, 56]
concat2 = torch.cat((out3, out_deconv2), 1)
# out_conv2_1 [1, 64, 56, 56]
out_conv2_1 = self.conv2_1(concat2)

# out_deconv1 [1, 32, 112, 112]
out_deconv1 = self.deconv1(out_conv2_1)
# concat2 [1, 64, 112, 112]
concat1 = torch.cat((out2, out_deconv1), 1)
# out_conv1_1 [1, 32, 112, 112]
out_conv1_1 = self.conv1_1(concat1)

# out_deconv0 [1, 16, 224, 224]
out_deconv0 = self.deconv0(out_conv1_1)
# concat0 [1, 32, 224, 224]
concat0 = torch.cat((out1, out_deconv0), 1)
# out_deconv0 [1, 16, 224, 224]
out_conv0_1 = self.conv0_1(concat0)
# self.pred_mask0 = nn.Conv2d(in_planes, channel, kernel_size=3, stride=1, padding=1, bias=True)
# in_planes=16 channel=self.assign_ch=9
# mask0 [1,9,224,224]
mask0 = self.pred_mask0(out_conv0_1)
# Q [1,9,224,224]
prob0 = self.softmax(mask0)

return prob0

main.py

输入图像尺寸为208*208,bs=4,label代表

1
2
3
4
# label [4,1,208,208]
# label2one_hot_torch可以将一个整数标签转换成一个c维的one-hot编码
# label_1hot [4,50,208,208]
label_1hot = label2one_hot_torch(label.to(device), C=50)

loss.py

image-20220425230130709

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 程序中使用的是compute_semantic_pos_loss,并未使用RGBXY
# 重建的是50维的语义向量和2维的位置向量,然后语义向量使用crossentropy计算,xy使用l2距离

# 下采样16倍 pooled_labxy [4,52,13,13]
pooled_labxy = poolfeat(labxy_feat, prob, kernel_size, kernel_size)
# 上采样16倍 reconstr_feat [4,52,208,208]
reconstr_feat = upfeat(pooled_labxy, prob, kernel_size, kernel_size)
# 求重建的xy特征和原本的xy特征的差
loss_map = reconstr_feat[:,-2:,:,:] - labxy_feat[:,-2:,:,:]

# self def cross entropy -- the official one combined softmax
# 公式中的第一项 crossentropy
logit = torch.log(reconstr_feat[:, :-2, :, :] + 1e-8)
loss_sem = - torch.sum(logit * labxy_feat[:, :-2, :, :]) / b
# 公式中的第二项
loss_pos = torch.norm(loss_map, p=2, dim=1).sum() / b * m / S

AFA

win10安装swig

mmcv安装

mmseg单卡训练

将分布式训练改为单卡训练

1
2
3
4
5
6
7
8
9
10
11
#183 dist.init_process_group(backend=args.backend,)
#212 train_sampler = DistributedSampler(train_dataset,shuffle=True)
#219 sampler=train_sampler,
#220 prefetch_factor=4
#285 wetr = DistributedDataParallel(wetr, device_ids=[args.local_rank], find_unused_parameters=True)
#287 train_sampler.set_epoch(np.random.randint(cfg.train.max_iters))
#300 train_sampler.set_epoch(np.random.randint(cfg.train.max_iters))

segformer_head.py中
norm_cfg=dict(type='BN', requires_grad=True)
# norm_cfg=dict(type='SyncBN', requires_grad=True)

utils中的imutils.py

1
2
3
4
5
6
7
8
def tensorboard_label(labels=None):
## labels
labels_cmap = encode_cmap(np.squeeze(labels))
# labels_cmap = torch.from_numpy(labels_cmap).permute([0, 3, 1, 2])
labels_cmap = torch.from_numpy(labels_cmap).permute([2, 0, 1])
grid_labels = torchvision.utils.make_grid(tensor=labels_cmap, nrow=2)

return grid_labels
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def cams_to_affinity_label(cam_label, mask=None, ignore_index=255):
# cam_label [1,224,224]
b,h,w = cam_label.shape
# np.set_printoptions(threshold=np.inf)
# print(cam_label.cpu().numpy())
# cam_label_resized [1,1,14,14]
cam_label_resized = F.interpolate(cam_label.unsqueeze(1).type(torch.float32), size=[h//16, w//16], mode="nearest")
# _cam_label [1,1,196]
_cam_label = cam_label_resized.reshape(b, 1, -1)
# _cam_label_rep [1,196,196],把_cam_label中的重复196次(每行都一样)
_cam_label_rep = _cam_label.repeat([1, _cam_label.shape[-1], 1])
# 转置
_cam_label_rep_t = _cam_label_rep.permute(0,2,1)
# 转置的矩阵的第n行实际上是cam标签打平的序列中第n个点的值,所以其实是在求每个点和其他所有点的匹配
# 相同标签的置1,不同置0
aff_label = (_cam_label_rep == _cam_label_rep_t).type(torch.long)
#aff_label[(_cam_label_rep+_cam_label_rep_t) == 0] = ignore_index
for i in range(b):

if mask is not None:
aff_label[i, mask==0] = ignore_index
# 因为上面的置1置0操作,需要重新赋值忽略区域
aff_label[i, :, _cam_label_rep[i, 0, :]==ignore_index] = ignore_index
aff_label[i, _cam_label_rep[i, 0, :]==ignore_index, :] = ignore_index

return aff_label
1
2
3
4
5
6
7
8
9
10
11
12
def get_aff_loss(inputs, targets):

pos_label = (targets == 1).type(torch.int16)
pos_count = pos_label.sum() + 1
neg_label = (targets == 0).type(torch.int16)
neg_count = neg_label.sum() + 1
#inputs = torch.sigmoid(input=inputs)

pos_loss = torch.sum(pos_label * (1 - inputs)) / pos_count
neg_loss = torch.sum(neg_label * (inputs)) / neg_count

return 0.5 * pos_loss + 0.5 * neg_loss, pos_count, neg_count

PAR实现

image-20220710235231319

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
###
#local pixel refinement
###

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

def get_kernel():

weight = torch.zeros(8, 1, 3, 3)
weight[0, 0, 0, 0] = 1
weight[1, 0, 0, 1] = 1
weight[2, 0, 0, 2] = 1

weight[3, 0, 1, 0] = 1
weight[4, 0, 1, 2] = 1

weight[5, 0, 2, 0] = 1
weight[6, 0, 2, 1] = 1
weight[7, 0, 2, 2] = 1

return weight

class PAR(nn.Module):

def __init__(self, dilations, num_iter,):
super().__init__()
self.dilations = dilations
self.num_iter = num_iter
kernel = get_kernel()
self.register_buffer('kernel', kernel)
self.pos = self.get_pos()
self.dim = 2
self.w1 = 0.3
self.w2 = 0.01

def get_dilated_neighbors(self, x):
b, c, h, w = x.shape
x_aff = []
for d in self.dilations:
# 复制模式,[d]*4=[1,1,1,1],在周围复制一圈
# [1,3,98,112]->[1,3,100,114],先做padding,配合后面的卷积保持特征图大小不变
_x_pad = F.pad(x, [d]*4, mode='replicate', value=0)
# [1,3,100,114]->[3,1,100,114],对每张图像的每个通道进行操作,所以变为[b*c,1,h,w]
_x_pad = _x_pad.reshape(b*c, -1, _x_pad.shape[-2], _x_pad.shape[-1])
# self.kernel [8,1,3,3] 8种1通道的3×3卷积核,其实对应的9宫格的8个位置
# [3,1,100,114]->[3,8,98,112]->[1,3,8,98,112]
_x = F.conv2d(_x_pad, self.kernel, dilation=d).view(b, c, -1, h, w)
x_aff.append(_x)
# [1,3,8,98,112]->[1,3,48,98,112],6种空洞卷积得到的结果堆叠
return torch.cat(x_aff, dim=2)

def get_pos(self):
pos_xy = []

ker = torch.ones(1, 1, 8, 1, 1)
ker[0, 0, 0, 0, 0] = np.sqrt(2)
ker[0, 0, 2, 0, 0] = np.sqrt(2)
ker[0, 0, 5, 0, 0] = np.sqrt(2)
ker[0, 0, 7, 0, 0] = np.sqrt(2)

for d in self.dilations:
pos_xy.append(ker*d)
return torch.cat(pos_xy, dim=2)

def forward(self, imgs, masks):
# 掩码下采样到图像尺寸
# imgs [1,3,98,112] masks [1,3,197,224]->[1,3,98,112]
masks = F.interpolate(masks, size=imgs.size()[-2:], mode="bilinear", align_corners=True)

b, c, h, w = imgs.shape
# 使用6种尺寸的空洞卷积进行8领域采样,如果不使用空段卷积采样,则为[1,3,8,h,w],也就是原图像每个像素点的8领域采样值
# [1,3,48,98,112]
_imgs = self.get_dilated_neighbors(imgs)
# 位置编码
# [1,1,48,1,1]
_pos = self.pos.to(_imgs.device)
# 将原图像与8领域采样值相减
# [1,3,98,112]->[1,3,1,98,112]->[1,3,48,98,112]
_imgs_rep = imgs.unsqueeze(self.dim).repeat(1, 1, _imgs.shape[self.dim], 1, 1)
# [1,1,48,1,1]->[b,1,48,1,1]
_pos_rep = _pos.repeat(b, 1, 1, h, w)
# [1,3,48,98,112]
_imgs_abs = torch.abs(_imgs - _imgs_rep)
# [1,3,1,98,112]
_imgs_std = torch.std(_imgs, dim=self.dim, keepdim=True)
# [1,1,1,98,112]
_pos_std = torch.std(_pos_rep, dim=self.dim, keepdim=True)
# [1,3,48,98,112]
aff = -(_imgs_abs / (_imgs_std + 1e-8) / self.w1)**2
# rgb3通道取均值
# [1,3,48,98,112]->[1,1,48,98,112]
aff = aff.mean(dim=1, keepdim=True)
# [1,1,48,98,112]
pos_aff = -(_pos_rep / (_pos_std + 1e-8) / self.w1)**2
#pos_aff = pos_aff.mean(dim=1, keepdim=True)
# 归一化
# [1,1,48,98,112]
aff = F.softmax(aff, dim=2) + self.w2 * F.softmax(pos_aff, dim=2)

for _ in range(self.num_iter):
# 领域采样,并没有采样自身
# [1,3,48,98,112]
_masks = self.get_dilated_neighbors(masks)
# 领域采样值,乘以aff中每个采样值的权重,求和得到更新的mask
# [1,3,48,98,112] * [1,1,48,98,112] -> [1,3,48,98,112] -> [1,3,98,112]
masks = (_masks * aff).sum(2)

return masks

TransFG

apex安装

windows10安装apex

参考

1
2
3
$ git clone https://github.com/NVIDIA/apex
$ cd apex
$ python setup.py install

路径报错

scipy的imread报错,替换为imageio.imread

内存占用很大,memery error

CutLER

需要手动将TokenCut代码放入third_party文件夹中