/test.py
Python | 158 lines | 129 code | 24 blank | 5 comment | 16 complexity | 5077f2480bee6673cfb8338686db471c MD5 | raw file
- import argparse
- import glob
- import os
- import PIL.Image as pil
- import cv2
- from monolayout import model
- import numpy as np
- import torch
- from torchvision import transforms
- def get_args():
- parser = argparse.ArgumentParser(
- description="Testing arguments for MonoLayout")
- parser.add_argument("--image_path", type=str,
- help="path to folder of images", required=True)
- parser.add_argument("--model_path", type=str,
- help="path to MonoLayout model", required=True)
- parser.add_argument(
- "--ext",
- type=str,
- default="png",
- help="extension of images in the folder")
- parser.add_argument("--out_dir", type=str,
- default="output directory to save topviews")
- parser.add_argument("--type", type=str,
- default="static/dynamic/both")
- return parser.parse_args()
- def save_topview(idx, tv, name_dest_im):
- tv_np = tv.squeeze().cpu().numpy()
- true_top_view = np.zeros((tv_np.shape[1], tv_np.shape[2]))
- true_top_view[tv_np[1] > tv_np[0]] = 255
- dir_name = os.path.dirname(name_dest_im)
- if not os.path.exists(dir_name):
- os.makedirs(dir_name)
- cv2.imwrite(name_dest_im, true_top_view)
- print("Saved prediction to {}".format(name_dest_im))
- def test(args):
- models = {}
- device = torch.device("cuda")
- encoder_path = os.path.join(args.model_path, "encoder.pth")
- encoder_dict = torch.load(encoder_path, map_location=device)
- feed_height = encoder_dict["height"]
- feed_width = encoder_dict["width"]
- models["encoder"] = model.Encoder(18, feed_width, feed_height, False)
- filtered_dict_enc = {
- k: v for k,
- v in encoder_dict.items() if k in models["encoder"].state_dict()}
- models["encoder"].load_state_dict(filtered_dict_enc)
- if args.type == "both":
- static_decoder_path = os.path.join(
- args.model_path, "static_decoder.pth")
- dynamic_decoder_path = os.path.join(
- args.model_path, "dynamic_decoder.pth")
- models["static_decoder"] = model.Decoder(
- models["encoder"].resnet_encoder.num_ch_enc)
- models["static_decoder"].load_state_dict(
- torch.load(static_decoder_path, map_location=device))
- models["dynamic_decoder"] = model.Decoder(
- models["encoder"].resnet_encoder.num_ch_enc)
- models["dynamic_decoder"].load_state_dict(
- torch.load(dynamic_decoder_path, map_location=device))
- else:
- decoder_path = os.path.join(args.model_path, "decoder.pth")
- models["decoder"] = model.Decoder(
- models["encoder"].resnet_encoder.num_ch_enc)
- models["decoder"].load_state_dict(
- torch.load(decoder_path, map_location=device))
- for key in models.keys():
- models[key].to(device)
- models[key].eval()
- if os.path.isfile(args.image_path):
- # Only testing on a single image
- paths = [args.image_path]
- output_directory = os.path.dirname(args.image_path)
- elif os.path.isdir(args.image_path):
- # Searching folder for images
- paths = glob.glob(os.path.join(
- args.image_path, '*.{}'.format(args.ext)))
- output_directory = args.out_dir
- try:
- os.mkdir(output_directory)
- except BaseException:
- pass
- else:
- raise Exception(
- "Can not find args.image_path: {}".format(
- args.image_path))
- print("-> Predicting on {:d} test images".format(len(paths)))
- # PREDICTING ON EACH IMAGE IN TURN
- with torch.no_grad():
- for idx, image_path in enumerate(paths):
- # Load image and preprocess
- input_image = pil.open(image_path).convert('RGB')
- original_width, original_height = input_image.size
- input_image = input_image.resize(
- (feed_width, feed_height), pil.LANCZOS)
- input_image = transforms.ToTensor()(input_image).unsqueeze(0)
- # PREDICTION
- input_image = input_image.to(device)
- features = models["encoder"](input_image)
- output_name = os.path.splitext(os.path.basename(image_path))[0]
- print(
- "Processing {:d} of {:d} images- ".format(idx + 1, len(paths)))
- if args.type == "both":
- static_tv = models["static_decoder"](
- features, is_training=False)
- dynamic_tv = models["dynamic_decoder"](
- features, is_training=False)
- save_topview(
- idx,
- static_tv,
- os.path.join(
- args.out_dir,
- "static",
- "{}.png".format(output_name)))
- save_topview(
- idx,
- dynamic_tv,
- os.path.join(
- args.out_dir,
- "dynamic",
- "{}.png".format(output_name)))
- else:
- tv = models["decoder"](features, is_training=False)
- save_topview(
- idx,
- tv,
- os.path.join(
- args.out_dir,
- args.type,
- "{}.png".format(output_name)))
- print('-> Done!')
- if __name__ == "__main__":
- args = get_args()
- test(args)