原始链接
第一列是场景原图,第二列是稀疏数据,第三列是稠密数据,第四列是该模型的预测结果
该模型有三种训练模式:1.第一列RGB作为图像输入,第三列作为标签;2.第一列和第二列合并为4通道数据作为图像输入,第三列作为标签;3.第二列作为输入,第三列作为标签。
其中第二列是通过第三列采样得到。采样方式有两种,在dataloaders/dense_to_sparse.py脚本中,如果要跑自己的数据集那么需要准备的是第一列和第三列数据。

这个数据有30G左右,比较大,我下载了,网盘链接链接:https://pan.baidu.com/s/1SzQhDVZBJSy9gnMkr4UCMQ
提取码:tpql
--来自百度网盘超级会员V6的分享
解压后如下:


这里的h5文件其实就是数据的一种存储形式而已,内部结构和字典类似,在代码里有加载的函数(dataloaders/dataloader.py脚本中的h5_loader函数),包含了rgb和对应的depth数据。
该项目通过设置evaluate模式来做评价,下载好数据、模型后,直接创建数据文件夹data放解压的数据即可。然后命令行输入python main.py --evaluate model_best.pth会自动创建结果文件夹results,以及产生一个拼接的长图comparison_7.png


这个需要得到和val相同的效果,并且批量跑的情况下,需要在val的基础上改,改的地方比较多,下面我把改了的脚本都贴出来。
(1)数据加载部分
nyu_dataloader.py
- import numpy as np
- import dataloaders.transforms as transforms
- from dataloaders.dataloader import MyDataloader
-
- iheight, iwidth = 480, 640 # raw image size
-
- class NYUDataset(MyDataloader):
- def __init__(self, root, type, sparsifier=None, modality='rgb'):
- super(NYUDataset, self).__init__(root, type, sparsifier, modality)
- self.output_size = (228, 304)
-
- def train_transform(self, rgb, depth):
- s = np.random.uniform(1.0, 1.5) # random scaling
- depth_np = depth / s
- angle = np.random.uniform(-5.0, 5.0) # random rotation degrees
- do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip
-
- # perform 1st step of data augmentation
- transform = transforms.Compose([
- transforms.Resize(250.0 / iheight), # this is for computational efficiency, since rotation can be slow
- transforms.Rotate(angle),
- transforms.Resize(s),
- transforms.CenterCrop(self.output_size),
- transforms.HorizontalFlip(do_flip)
- ])
- rgb_np = transform(rgb)
- rgb_np = self.color_jitter(rgb_np) # random color jittering
- rgb_np = np.asfarray(rgb_np, dtype='float') / 255
- depth_np = transform(depth_np)
-
- return rgb_np, depth_np
-
- def val_transform(self, rgb, depth):
- depth_np = depth
- transform = transforms.Compose([
- transforms.Resize(240.0 / iheight),
- transforms.CenterCrop(self.output_size),
- ])
- rgb_np = transform(rgb)
- rgb_np = np.asfarray(rgb_np, dtype='float') / 255
- depth_np = transform(depth_np)
- return rgb_np, depth_np
-
- def test_transform(self, rgb, depth):
- depth_np = depth
- transform = transforms.Compose([
- transforms.Resize(240.0 / iheight),
- transforms.CenterCrop(self.output_size),
- ])
- rgb_np = transform(rgb)
- rgb_np = np.asfarray(rgb_np, dtype='float') / 255
- depth_np = transform(depth_np)
-
- return rgb_np, depth_np
dataloader.py
- import os
- import os.path
- import numpy as np
- import torch.utils.data as data
- import h5py
- import dataloaders.transforms as transforms
-
- IMG_EXTENSIONS = ['.h5',]
-
- def is_image_file(filename):
- return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
-
- def find_classes(dir):
- classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
- classes.sort()
- class_to_idx = {classes[i]: i for i in range(len(classes))}
- return classes, class_to_idx
-
- def make_dataset(dir, class_to_idx):
- images = []
- dir = os.path.expanduser(dir)
- for target in sorted(os.listdir(dir)):
- d = os.path.join(dir, target)
- if not os.path.isdir(d):
- continue
- for root, _, fnames in sorted(os.walk(d)):
- for fname in sorted(fnames):
- if is_image_file(fname):
- path = os.path.join(root, fname)
- item = (path, class_to_idx[target])
- images.append(item)
- return images
-
- def h5_loader(path):
- h5f = h5py.File(path, "r")
- rgb = np.array(h5f['rgb'])
- rgb = np.transpose(rgb, (1, 2, 0))
- depth = np.array(h5f['depth'])
- return rgb, depth
-
- # def rgb2grayscale(rgb):
- # return rgb[:,:,0] * 0.2989 + rgb[:,:,1] * 0.587 + rgb[:,:,2] * 0.114
-
- to_tensor = transforms.ToTensor()
-
- class MyDataloader(data.Dataset):
- modality_names = ['rgb', 'rgbd', 'd'] # , 'g', 'gd'
- color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)
-
- def __init__(self, root, type, sparsifier=None, modality='rgb', loader=h5_loader):
- classes, class_to_idx = find_classes(root)
- imgs = make_dataset(root, class_to_idx)
- assert len(imgs)>0, "Found 0 images in subfolders of: " + root + "\n"
- print("Found {} images in {} folder.".format(len(imgs), type))
- self.root = root
- self.imgs = imgs
- self.classes = classes
- self.class_to_idx = class_to_idx
- if type == 'train':
- self.transform = self.train_transform
- elif type == 'val':
- self.transform = self.val_transform
- elif type == 'test':
- self.transform = self.test_transform
- else:
- raise (RuntimeError("Invalid dataset type: " + type + "\n"
- "Supported dataset types are: train, val"))
- self.loader = loader
- self.sparsifier = sparsifier
-
- assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \
- "Supported dataset types are: " + ''.join(self.modality_names)
- self.modality = modality
-
- self.mark = type
-
- def train_transform(self, rgb, depth):
- raise (RuntimeError("train_transform() is not implemented. "))
-
- def val_transform(rgb, depth):
- raise (RuntimeError("val_transform() is not implemented."))
-
- def test_transform(rgb, depth):
- raise (RuntimeError("test_transform() is not implemented."))
-
- def create_sparse_depth(self, rgb, depth):
- if self.sparsifier is None:
- return depth
- else:
- mask_keep = self.sparsifier.dense_to_sparse(rgb, depth)
- sparse_depth = np.zeros(depth.shape)
- sparse_depth[mask_keep] = depth[mask_keep]
- return sparse_depth
-
- def create_rgbd(self, rgb, depth):
- sparse_depth = self.create_sparse_depth(rgb, depth)
- rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2)
- return rgbd
-
- def __getraw__(self, index):
- """
- Args:
- index (int): Index
- Returns:
- tuple: (rgb, depth) the raw data.
- """
- path, target = self.imgs[index]
- rgb, depth = self.loader(path)
- _, name = os.path.split(path)
- name = name.split('.')[0]
- return rgb, depth, name
-
- def __getitem__(self, index):
- rgb, depth, name = self.__getraw__(index)
- if self.transform is not None:
- rgb_np, depth_np = self.transform(rgb, depth)
- else:
- raise(RuntimeError("transform not defined"))
-
- # color normalization
- # rgb_tensor = normalize_rgb(rgb_tensor)
- # rgb_np = normalize_np(rgb_np)
-
- if self.modality == 'rgb':
- input_np = rgb_np
- elif self.modality == 'rgbd':
- input_np = self.create_rgbd(rgb_np, depth_np)
- elif self.modality == 'd':
- input_np = self.create_sparse_depth(rgb_np, depth_np)
-
- input_tensor = to_tensor(input_np)
- while input_tensor.dim() < 3:
- input_tensor = input_tensor.unsqueeze(0)
-
- if self.mark == 'test':
- depth_tensor = name
- else:
- depth_tensor = to_tensor(depth_np)
- depth_tensor = depth_tensor.unsqueeze(0)
-
- return input_tensor, depth_tensor
-
- def __len__(self):
- return len(self.imgs)
2.参数部分
util.py
- import os
-
- import cv2
- import torch
- import shutil
- import numpy as np
- import matplotlib.pyplot as plt
- from PIL import Image
-
- cmap = plt.cm.viridis
-
- def parse_command():
- model_names = ['resnet18', 'resnet50']
- loss_names = ['l1', 'l2']
- data_names = ['nyudepthv2', 'kitti']
- from dataloaders.dense_to_sparse import UniformSampling, SimulatedStereo
- sparsifier_names = [x.name for x in [UniformSampling, SimulatedStereo]]
- from models import Decoder
- decoder_names = Decoder.names
- from dataloaders.dataloader import MyDataloader
- modality_names = MyDataloader.modality_names
-
- import argparse
- parser = argparse.ArgumentParser(description='Sparse-to-Dense')
- parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18', choices=model_names,
- help='model architecture: ' + ' | '.join(model_names) + ' (default: resnet18)')
- parser.add_argument('--data', metavar='DATA', default='nyudepthv2',
- choices=data_names,
- help='dataset: ' + ' | '.join(data_names) + ' (default: nyudepthv2)')
- parser.add_argument('--modality', '-m', metavar='MODALITY', default='rgb', choices=modality_names,
- help='modality: ' + ' | '.join(modality_names) + ' (default: rgb)')
- parser.add_argument('-s', '--num-samples', default=0, type=int, metavar='N',
- help='number of sparse depth samples (default: 0)')
- parser.add_argument('--max-depth', default=-1.0, type=float, metavar='D',
- help='cut-off depth of sparsifier, negative values means infinity (default: inf [m])')
- parser.add_argument('--sparsifier', metavar='SPARSIFIER', default=UniformSampling.name, choices=sparsifier_names,
- help='sparsifier: ' + ' | '.join(sparsifier_names) + ' (default: ' + UniformSampling.name + ')')
- parser.add_argument('--decoder', '-d', metavar='DECODER', default='deconv2', choices=decoder_names,
- help='decoder: ' + ' | '.join(decoder_names) + ' (default: deconv2)')
- parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
- help='number of data loading workers (default: 10)')
- parser.add_argument('--epochs', default=15, type=int, metavar='N',
- help='number of total epochs to run (default: 15)')
- parser.add_argument('-c', '--criterion', metavar='LOSS', default='l1', choices=loss_names,
- help='loss function: ' + ' | '.join(loss_names) + ' (default: l1)')
- parser.add_argument('-b', '--batch-size', default=2, type=int, help='mini-batch size (default: 8)')
- parser.add_argument('--lr', '--learning-rate', default=0.01, type=float,
- metavar='LR', help='initial learning rate (default 0.01)')
- parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
- help='momentum')
- parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
- metavar='W', help='weight decay (default: 1e-4)')
- parser.add_argument('--print-freq', '-p', default=10, type=int,
- metavar='N', help='print frequency (default: 10)')
- parser.add_argument('--resume', default='', type=str, metavar='PATH',
- help='path to latest checkpoint (default: none)')
- parser.add_argument('-e', '--evaluate', dest='evaluate', type=str, default='',
- help='evaluate model on validation set')
- parser.add_argument('-t', '--test', dest='test', type=str, default='',
- help='test model on test set')
- parser.add_argument('--no-pretrain', dest='pretrained', action='store_false',
- help='not to use ImageNet pre-trained weights')
- parser.set_defaults(pretrained=True)
- args = parser.parse_args()
- if args.modality == 'rgb' and args.num_samples != 0:
- print("number of samples is forced to be 0 when input modality is rgb")
- args.num_samples = 0
- if args.modality == 'rgb' and args.max_depth != 0.0:
- print("max depth is forced to be 0.0 when input modality is rgb/rgbd")
- args.max_depth = 0.0
- return args
-
- def save_checkpoint(state, is_best, epoch, output_directory):
- checkpoint_filename = os.path.join(output_directory, 'checkpoint-' + str(epoch) + '.pth.tar')
- torch.save(state, checkpoint_filename)
- if is_best:
- best_filename = os.path.join(output_directory, 'model_best.pth.tar')
- shutil.copyfile(checkpoint_filename, best_filename)
- if epoch > 0:
- prev_checkpoint_filename = os.path.join(output_directory, 'checkpoint-' + str(epoch-1) + '.pth.tar')
- if os.path.exists(prev_checkpoint_filename):
- os.remove(prev_checkpoint_filename)
-
- def adjust_learning_rate(optimizer, epoch, lr_init):
- """Sets the learning rate to the initial LR decayed by 10 every 5 epochs"""
- lr = lr_init * (0.1 ** (epoch // 5))
- for param_group in optimizer.param_groups:
- param_group['lr'] = lr
-
- def get_output_directory(args):
- output_directory = os.path.join('results',
- '{}.sparsifier={}.samples={}.modality={}.arch={}.decoder={}.criterion={}.lr={}.bs={}.pretrained={}'.
- format(args.data, args.sparsifier, args.num_samples, args.modality, \
- args.arch, args.decoder, args.criterion, args.lr, args.batch_size, \
- args.pretrained))
- return output_directory
-
-
- def colored_depthmap(depth, d_min=None, d_max=None):
- if d_min is None:
- d_min = np.min(depth)
- if d_max is None:
- d_max = np.max(depth)
- depth_relative = (depth - d_min) / (d_max - d_min)
- return 255 * cmap(depth_relative)[:,:,:3] # H, W, C
-
-
- def merge_into_row(input, depth_target, depth_pred):
- rgb = 255 * np.transpose(np.squeeze(input.cpu().numpy()), (1,2,0)) # H, W, C
- depth_target_cpu = np.squeeze(depth_target.cpu().numpy())
- depth_pred_cpu = np.squeeze(depth_pred.data.cpu().numpy())
-
- d_min = min(np.min(depth_target_cpu), np.min(depth_pred_cpu))
- d_max = max(np.max(depth_target_cpu), np.max(depth_pred_cpu))
- depth_target_col = colored_depthmap(depth_target_cpu, d_min, d_max)
- depth_pred_col = colored_depthmap(depth_pred_cpu, d_min, d_max)
- img_merge = np.hstack([rgb, depth_target_col, depth_pred_col])
-
- return img_merge
-
-
- def merge_into_row_with_gt(input, depth_input, depth_target, depth_pred):
- rgb = 255 * np.transpose(np.squeeze(input.cpu().numpy()), (1,2,0)) # H, W, C
- depth_input_cpu = np.squeeze(depth_input.cpu().numpy())
- depth_target_cpu = np.squeeze(depth_target.cpu().numpy())
- depth_pred_cpu = np.squeeze(depth_pred.data.cpu().numpy())
-
- d_min = min(np.min(depth_input_cpu), np.min(depth_target_cpu), np.min(depth_pred_cpu))
- d_max = max(np.max(depth_input_cpu), np.max(depth_target_cpu), np.max(depth_pred_cpu))
- depth_input_col = colored_depthmap(depth_input_cpu, d_min, d_max)
- depth_target_col = colored_depthmap(depth_target_cpu, d_min, d_max)
- depth_pred_col = colored_depthmap(depth_pred_cpu, d_min, d_max)
-
- img_merge = np.hstack([rgb, depth_input_col, depth_target_col, depth_pred_col])
-
- return img_merge
-
-
- def add_row(img_merge, row):
- return np.vstack([img_merge, row])
-
-
- def save_image(img_merge, filename):
- img_merge = Image.fromarray(img_merge.astype('uint8'))
- img_merge.save(filename)
-
-
- def strentch_img(pred):
- depth_pred_cpu = np.squeeze(pred.data.cpu().numpy())
- d_min = np.min(depth_pred_cpu)
- d_max = np.max(depth_pred_cpu)
- depth_pred_cpu = cv2.resize(depth_pred_cpu, (1280, 720))
- depth_pred_col = colored_depthmap(depth_pred_cpu, d_min, d_max)
- return depth_pred_col
(3)主函数部分
main.py
- import os
- import time
- import csv
- import numpy as np
-
- import torch
- import torch.backends.cudnn as cudnn
- import torch.optim
- cudnn.benchmark = True
-
- from models import ResNet
- from metrics import AverageMeter, Result
- from dataloaders.dense_to_sparse import UniformSampling, SimulatedStereo
- import criteria
- import utils
- from PIL import Image
-
- torch.nn.Module.dump_patches = True
- args = utils.parse_command()
- print(args)
-
- fieldnames = ['mse', 'rmse', 'absrel', 'lg10', 'mae',
- 'delta1', 'delta2', 'delta3',
- 'data_time', 'gpu_time']
- best_result = Result()
- best_result.set_to_worst()
-
- def create_data_loaders(args):
- # Data loading code
- print("=> creating data loaders ...")
- traindir = os.path.join('data', args.data, 'train')
- valdir = os.path.join('data', args.data, 'val')
- testdir = os.path.join('data', args.data, 'test')
-
- train_loader = None
- val_loader = None
- test_loader = None
-
-
- # sparsifier is a class for generating random sparse depth input from the ground truth
- sparsifier = None
- max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf
- if args.sparsifier == UniformSampling.name:
- sparsifier = UniformSampling(num_samples=args.num_samples, max_depth=max_depth)
- elif args.sparsifier == SimulatedStereo.name:
- sparsifier = SimulatedStereo(num_samples=args.num_samples, max_depth=max_depth)
-
- if args.data == 'nyudepthv2':
- from dataloaders.nyu_dataloader import NYUDataset
- if args.evaluate:
- val_dataset = NYUDataset(valdir, type='val',
- modality=args.modality, sparsifier=sparsifier)
- # set batch size to be 1 for validation
- val_loader = torch.utils.data.DataLoader(val_dataset,
- batch_size=1, shuffle=False, num_workers=args.workers,
- pin_memory=True)
- elif args.test:
- test_dataset = NYUDataset(testdir, type='test',
- modality=args.modality, sparsifier=sparsifier)
-
- test_loader = torch.utils.data.DataLoader(test_dataset,
- batch_size=1, shuffle=False, num_workers=args.workers,
- pin_memory=True)
- else:
- train_dataset = NYUDataset(traindir, type='train',
- modality=args.modality, sparsifier=sparsifier)
-
- elif args.data == 'kitti':
- from dataloaders.kitti_dataloader import KITTIDataset
- if not args.evaluate:
- train_dataset = KITTIDataset(traindir, type='train',
- modality=args.modality, sparsifier=sparsifier)
- val_dataset = KITTIDataset(valdir, type='val',
- modality=args.modality, sparsifier=sparsifier)
-
- # set batch size to be 1 for validation
- val_loader = torch.utils.data.DataLoader(val_dataset,
- batch_size=1, shuffle=False, num_workers=args.workers,
- pin_memory=True)
-
- else:
- raise RuntimeError('Dataset not found.' +
- 'The dataset must be either of nyudepthv2 or kitti.')
-
-
- # put construction of train loader here, for those who are interested in testing only
- if not args.evaluate and not args.test:
- train_loader = torch.utils.data.DataLoader(
- train_dataset, batch_size=args.batch_size, shuffle=True,
- num_workers=args.workers, pin_memory=True, sampler=None,
- worker_init_fn=lambda work_id:np.random.seed(work_id))
- # worker_init_fn ensures different sampling patterns for each data loading thread
-
- print("=> data loaders created.")
- return train_loader, val_loader, test_loader
-
- test_save_path = './results/'
- def main():
- global args, best_result, output_directory, train_csv, test_csv
-
- # evaluation mode
- start_epoch = 0
- if args.evaluate:
- assert os.path.isfile(args.evaluate), \
- "=> no best model found at '{}'".format(args.evaluate)
- print("=> loading best model '{}'".format(args.evaluate))
- checkpoint = torch.load(args.evaluate)
- output_directory = os.path.dirname(args.evaluate)
- args = checkpoint['args']
- start_epoch = checkpoint['epoch'] + 1
- best_result = checkpoint['best_result']
- model = checkpoint['model']
- print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
- args.test = ''
- args.evaluate = True
- _, val_loader, _ = create_data_loaders(args)
- validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
- return
-
- elif args.test:
- assert os.path.isfile(args.test), \
- "=> no best model found at '{}'".format(args.test)
- print("=> loading best model '{}'".format(args.test))
- checkpoint = torch.load(args.test)
- output_directory = os.path.dirname(args.test)
- args = checkpoint['args']
- start_epoch = checkpoint['epoch'] + 1
- best_result = checkpoint['best_result']
- model = checkpoint['model']
- print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
- args.test = True
- _, _, test_loader = create_data_loaders(args)
- test(test_loader, model, test_save_path)
- return
-
- # optionally resume from a checkpoint
- elif args.resume:
- chkpt_path = args.resume
- assert os.path.isfile(chkpt_path), \
- "=> no checkpoint found at '{}'".format(chkpt_path)
- print("=> loading checkpoint '{}'".format(chkpt_path))
- checkpoint = torch.load(chkpt_path)
- args = checkpoint['args']
- start_epoch = checkpoint['epoch'] + 1
- best_result = checkpoint['best_result']
- model = checkpoint['model']
- optimizer = checkpoint['optimizer']
- output_directory = os.path.dirname(os.path.abspath(chkpt_path))
- print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
- train_loader, val_loader, test_loader = create_data_loaders(args)
- args.resume = True
-
- # create new model
- else:
- train_loader, val_loader, test_loader = create_data_loaders(args)
- print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
- in_channels = len(args.modality)
- if args.arch == 'resnet50':
- model = ResNet(layers=50, decoder=args.decoder, output_size=train_loader.dataset.output_size,
- in_channels=in_channels, pretrained=args.pretrained)
- elif args.arch == 'resnet18':
- model = ResNet(layers=18, decoder=args.decoder, output_size=train_loader.dataset.output_size,
- in_channels=in_channels, pretrained=args.pretrained)
- print("=> model created.")
- optimizer = torch.optim.SGD(model.parameters(), args.lr, \
- momentum=args.momentum, weight_decay=args.weight_decay)
-
- # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
- model = model.cuda()
-
- # define loss function (criterion) and optimizer
- if args.criterion == 'l2':
- criterion = criteria.MaskedMSELoss().cuda()
- elif args.criterion == 'l1':
- criterion = criteria.MaskedL1Loss().cuda()
-
- # create results folder, if not already exists
- output_directory = utils.get_output_directory(args)
- if not os.path.exists(output_directory):
- os.makedirs(output_directory)
- train_csv = os.path.join(output_directory, 'train.csv')
- test_csv = os.path.join(output_directory, 'test.csv')
- best_txt = os.path.join(output_directory, 'best.txt')
-
- # create new csv files with only header
- if not args.resume:
- with open(train_csv, 'w') as csvfile:
- writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
- writer.writeheader()
- with open(test_csv, 'w') as csvfile:
- writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
- writer.writeheader()
-
- for epoch in range(start_epoch, args.epochs):
- utils.adjust_learning_rate(optimizer, epoch, args.lr)
- train(train_loader, model, criterion, optimizer, epoch) # train for one epoch
- result, img_merge = validate(val_loader, model, epoch) # evaluate on validation set
-
- # remember best rmse and save checkpoint
- is_best = result.rmse < best_result.rmse
- if is_best:
- best_result = result
- with open(best_txt, 'w') as txtfile:
- txtfile.write("epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n".
- format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time))
- if img_merge is not None:
- img_filename = output_directory + '/comparison_best.png'
- utils.save_image(img_merge, img_filename)
-
- utils.save_checkpoint({
- 'args': args,
- 'epoch': epoch,
- 'arch': args.arch,
- 'model': model,
- 'best_result': best_result,
- 'optimizer' : optimizer,
- }, is_best, epoch, output_directory)
-
-
- def train(train_loader, model, criterion, optimizer, epoch):
- average_meter = AverageMeter()
- model.train() # switch to train mode
- end = time.time()
- for i, (input, target) in enumerate(train_loader):
-
- input, target = input.cuda(), target.cuda()
- torch.cuda.synchronize()
- data_time = time.time() - end
-
- # compute pred
- end = time.time()
- pred = model(input)
- loss = criterion(pred, target)
- optimizer.zero_grad()
- loss.backward() # compute gradient and do SGD step
- optimizer.step()
- torch.cuda.synchronize()
- gpu_time = time.time() - end
-
- # measure accuracy and record loss
- result = Result()
- result.evaluate(pred.data, target.data)
- average_meter.update(result, gpu_time, data_time, input.size(0))
- end = time.time()
-
- if (i + 1) % args.print_freq == 0:
- print('=> output: {}'.format(output_directory))
- print('Train Epoch: {0} [{1}/{2}]\t'
- 't_Data={data_time:.3f}({average.data_time:.3f}) '
- 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
- 'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
- 'MAE={result.mae:.2f}({average.mae:.2f}) '
- 'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
- 'REL={result.absrel:.3f}({average.absrel:.3f}) '
- 'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
- epoch, i+1, len(train_loader), data_time=data_time,
- gpu_time=gpu_time, result=result, average=average_meter.average()))
-
- avg = average_meter.average()
- with open(train_csv, 'a') as csvfile:
- writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
- writer.writerow({'mse': avg.mse, 'rmse': avg.rmse, 'absrel': avg.absrel, 'lg10': avg.lg10,
- 'mae': avg.mae, 'delta1': avg.delta1, 'delta2': avg.delta2, 'delta3': avg.delta3,
- 'gpu_time': avg.gpu_time, 'data_time': avg.data_time})
-
-
- def validate(val_loader, model, epoch, write_to_file=True):
- average_meter = AverageMeter()
- model.eval() # switch to evaluate mode
- end = time.time()
- for i, (input, target) in enumerate(val_loader):
- input, target = input.cuda(), target.cuda()
- torch.cuda.synchronize()
- data_time = time.time() - end
-
- # compute output
- end = time.time()
- with torch.no_grad():
- pred = model(input)
- torch.cuda.synchronize()
- gpu_time = time.time() - end
-
- # measure accuracy and record loss
- result = Result()
- result.evaluate(pred.data, target.data)
- average_meter.update(result, gpu_time, data_time, input.size(0))
- end = time.time()
-
- # save 8 images for visualization
- skip = 50
- if args.modality == 'd':
- img_merge = None
- else:
- if args.modality == 'rgb':
- rgb = input
- elif args.modality == 'rgbd':
- rgb = input[:,:3,:,:]
- depth = input[:,3:,:,:]
-
- if i == 0:
- if args.modality == 'rgbd':
- img_merge = utils.merge_into_row_with_gt(rgb, depth, target, pred)
- else:
- img_merge = utils.merge_into_row(rgb, target, pred)
- elif (i < 8*skip) and (i % skip == 0):
- if args.modality == 'rgbd':
- row = utils.merge_into_row_with_gt(rgb, depth, target, pred)
- else:
- row = utils.merge_into_row(rgb, target, pred)
- img_merge = utils.add_row(img_merge, row)
- elif i == 8*skip:
- filename = output_directory + '/comparison_' + str(epoch) + '.png'
- utils.save_image(img_merge, filename)
-
- if (i+1) % args.print_freq == 0:
- print('Test: [{0}/{1}]\t'
- 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
- 'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
- 'MAE={result.mae:.2f}({average.mae:.2f}) '
- 'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
- 'REL={result.absrel:.3f}({average.absrel:.3f}) '
- 'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
- i+1, len(val_loader), gpu_time=gpu_time, result=result, average=average_meter.average()))
-
- avg = average_meter.average()
-
- print('\n*\n'
- 'RMSE={average.rmse:.3f}\n'
- 'MAE={average.mae:.3f}\n'
- 'Delta1={average.delta1:.3f}\n'
- 'REL={average.absrel:.3f}\n'
- 'Lg10={average.lg10:.3f}\n'
- 't_GPU={time:.3f}\n'.format(
- average=avg, time=avg.gpu_time))
-
- if write_to_file:
- with open(test_csv, 'a') as csvfile:
- writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
- writer.writerow({'mse': avg.mse, 'rmse': avg.rmse, 'absrel': avg.absrel, 'lg10': avg.lg10,
- 'mae': avg.mae, 'delta1': avg.delta1, 'delta2': avg.delta2, 'delta3': avg.delta3,
- 'data_time': avg.data_time, 'gpu_time': avg.gpu_time})
- return avg, img_merge
-
-
- def test(test_loader, model, save_path):
- average_meter = AverageMeter()
- model.eval() # switch to evaluate mode
- for i, (input, target) in enumerate(test_loader):
- input, name = input.cuda(), target
- torch.cuda.synchronize()
-
- # compute output
- end = time.time()
- with torch.no_grad():
- pred = model(input)
-
- torch.cuda.synchronize()
- pred1 = utils.strentch_img(pred)
- save_to_file = os.path.join(save_path, name[0] + '.png')
- utils.save_image(pred1, save_to_file)
-
- save_to_tif = os.path.join(save_path, name[0] + '_ori.tiff')
- depth_pred_cpu = np.squeeze(pred.data.cpu().numpy())
- img = Image.fromarray(depth_pred_cpu)
- img = img.resize((1280, 720))
- img.save(save_to_tif)
-
-
- if __name__ == '__main__':
- main()
(4)测试
改好上面后,创建test文件夹,放入数据


接着命令行输入下面的命令
python main.py --test model_best.pth
白色的图是结果,彩色图是白色图可视化后的结果 ,存放位置在mian.py的第90行改(test_save_path = './results/')
