about the performance on middlebury dataset.

hello, thanks for your nice job. I test the finetuning_model on some middlebury images, however, in some cases the performance is not satisfying. do you know the reason.
below is the code I used for testing.
from future import print_function, division import argparse import os import glob from PIL import Image from matplotlib import pyplot as plt import cv2 import torch import torch.nn as nn import torch.nn.parallel import torch.backends.cudnn as cudnn import torch.optim as optim import torch.utils.data from torch.autograd import Variable import torchvision.utils as vutils import torch.nn.functional as F import numpy as np import time from datasets import datasets from models import models from utils import * import PIL.Image from torch.utils.data import DataLoader from datasets import listfiles as ls from datasets import MiddleburyLoader as DA import sys import gc import skimage
cudnn.benchmark = False
parser = argparse.ArgumentParser(description='Cascade and Fused Cost Volume for Robust Stereo Matching(CFNet)') parser.add_argument('--model', default='cfnet', help='select a model structure', choices=models.keys()) parser.add_argument('--maxdisp', type=int, default=256, help='maximum disparity')
parser.add_argument('--dataset', default='kitti', help='dataset name', choices=datasets.keys()) parser.add_argument('--loadckpt', default='/home/jucic/my_code/CFNet/finetuning_model', help='load the weights from a specific checkpoint')
parse arguments
args = parser.parse_args()
model, optimizer
model = modelsargs.model model = nn.DataParallel(model) model.cuda() model.eval()
load parameters
print("loading model {}".format(args.loadckpt)) state_dict = torch.load(args.loadckpt) model.load_state_dict(state_dict['model'])
def save_pfm(file, image, scale = 1): color = None
if image.dtype.name != 'float32': raise Exception('Image dtype must be float32.')
if len(image.shape) == 3 and image.shape[2] == 3: # color image color = True elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale color = False else: raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')
file.write('PF\n' if color else 'Pf\n') file.write('%d %d\n' % (image.shape[1], image.shape[0]))
endian = image.dtype.byteorder
if endian == '<' or endian == '=' and sys.byteorder == 'little': scale = -scale
file.write('%f\n' % scale)
image.tofile(file)
def test():
data_path = '/home/jucic/my_code/RAFT_Opti/topdownshelfframe'
with torch.no_grad():
left_images = glob.glob(os.path.join(data_path,'left/*.png')) + \
glob.glob(os.path.join(data_path,'left/*.jpg'))
right_images = glob.glob(os.path.join(data_path,'right/*.png')) + \
glob.glob(os.path.join(data_path,'right/*.jpg'))
left_images = sorted(left_images)
right_images = sorted(right_images)
count = 1
for imfile1, imfile2 in zip(left_images, right_images):
image1 = np.array(Image.open(imfile1).convert('RGB'))
image2 = np.array(Image.open(imfile2).convert('RGB'))
height = image1.shape[0]/2
width = image1.shape[1]/2
height = int(height+(((height // 32) + 1) * 32 - height) % 32)
width = int(width+(((width // 32) + 1) * 32 - width) % 32)
image1 = cv2.resize(image1, (width, height))
image2 = cv2.resize(image2, (width, height))
image1 = image1/255.0
image2 = image2/255.0
image1 = torch.from_numpy(image1).permute(2,0,1)[None].float()
image2 = torch.from_numpy(image2).permute(2,0,1)[None].float()
print(image1.shape)
begin = time.time()
disp_ests, pred3_s3, pred_s4 = model(image1.cuda(), image2.cuda())
print("{}ms elapsed by cfnet".format((time.time()-begin)*1000))
result_folder = os.path.join('/home/jucic/my_code/CFNet', "result_topdownstereo")
if not os.path.isdir(result_folder):
os.mkdir(result_folder)
plt.imsave("{}/{}.png".format(result_folder,str(count).zfill(7)),(disp_ests[-1].cpu().numpy().squeeze()))
count += 1
if name == 'main': test()
I update the code by adding the get_transform() function and the result seems right now!
