# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Phototourism dataset parser. Datasets and documentation here: http://phototour.cs.washington.edu/datasets/"""
from __future__ import annotations
import math
from dataclasses import dataclass, field
from pathlib import Path
from typing import Literal, Type
import numpy as np
import torch
from nerfstudio.cameras import camera_utils
from nerfstudio.cameras.cameras import Cameras, CameraType
from nerfstudio.data.dataparsers.base_dataparser import DataParser, DataParserConfig, DataparserOutputs
from nerfstudio.data.scene_box import SceneBox
# TODO(1480) use pycolmap instead of colmap_parsing_utils
# import pycolmap
from nerfstudio.data.utils.colmap_parsing_utils import read_cameras_binary, read_images_binary
from nerfstudio.utils.rich_utils import CONSOLE
[docs]@dataclass
class PhototourismDataParserConfig(DataParserConfig):
"""Phototourism dataset parser config"""
_target: Type = field(default_factory=lambda: Phototourism)
"""target class to instantiate"""
data: Path = Path("data/phototourism/brandenburg-gate")
"""Directory specifying location of data."""
scale_factor: float = 3.0
"""How much to scale the camera origins by."""
alpha_color: str = "white"
"""alpha color of background"""
train_split_fraction: float = 0.9
"""The fraction of images to use for training. The remaining images are for eval."""
scene_scale: float = 1.0
"""How much to scale the region of interest by."""
orientation_method: Literal["pca", "up", "vertical", "none"] = "up"
"""The method to use for orientation."""
center_method: Literal["poses", "focus", "none"] = "poses"
"""The method to use to center the poses."""
auto_scale_poses: bool = True
"""Whether to automatically scale the poses to fit in +/- 1 bounding box."""
[docs]@dataclass
class Phototourism(DataParser):
"""Phototourism dataset. This is based on https://github.com/kwea123/nerf_pl/blob/nerfw/datasets/phototourism.py
and uses colmap's utils file to read the poses.
"""
config: PhototourismDataParserConfig
def __init__(self, config: PhototourismDataParserConfig):
super().__init__(config=config)
self.data: Path = config.data
def _generate_dataparser_outputs(self, split="train"):
image_filenames = []
poses = []
with CONSOLE.status(f"[bold green]Reading phototourism images and poses for {split} split...") as _:
# TODO(1480) use pycolmap
# recon = pycolmap.Reconstruction(self.data / "dense" / "sparse")
# cams = recon.cameras
# imgs = recon.images
cams = read_cameras_binary(self.data / "dense/sparse/cameras.bin")
imgs = read_images_binary(self.data / "dense/sparse/images.bin")
poses = []
fxs = []
fys = []
cxs = []
cys = []
image_filenames = []
flip = torch.eye(3)
flip[0, 0] = -1.0
flip = flip.double()
for _id, cam in cams.items():
img = imgs[_id]
assert cam.model == "PINHOLE", "Only pinhole (perspective) camera model is supported at the moment"
pose = torch.cat([torch.tensor(img.qvec2rotmat()), torch.tensor(img.tvec.reshape(3, 1))], dim=1)
pose = torch.cat([pose, torch.tensor([[0.0, 0.0, 0.0, 1.0]])], dim=0)
poses.append(torch.linalg.inv(pose))
fxs.append(torch.tensor(cam.params[0]))
fys.append(torch.tensor(cam.params[1]))
cxs.append(torch.tensor(cam.params[2]))
cys.append(torch.tensor(cam.params[3]))
image_filenames.append(self.data / "dense/images" / img.name)
poses = torch.stack(poses).float()
poses[..., 1:3] *= -1
fxs = torch.stack(fxs).float()
fys = torch.stack(fys).float()
cxs = torch.stack(cxs).float()
cys = torch.stack(cys).float()
# filter image_filenames and poses based on train/eval split percentage
num_images = len(image_filenames)
num_train_images = math.ceil(num_images * self.config.train_split_fraction)
num_eval_images = num_images - num_train_images
i_all = np.arange(num_images)
i_train = np.linspace(
0, num_images - 1, num_train_images, dtype=int
) # equally spaced training images starting and ending at 0 and num_images-1
i_eval = np.setdiff1d(i_all, i_train) # eval images are the remaining images
i_all = torch.tensor(i_all)
i_train = torch.tensor(i_train, dtype=torch.long)
i_eval = torch.tensor(i_eval, dtype=torch.long)
assert len(i_eval) == num_eval_images
if split == "train":
indices = i_train
elif split in ["val", "test"]:
indices = i_eval
else:
raise ValueError(f"Unknown dataparser split {split}")
poses, transform_matrix = camera_utils.auto_orient_and_center_poses(
poses, method=self.config.orientation_method, center_method=self.config.center_method
)
# Scale poses
scale_factor = 1.0
if self.config.auto_scale_poses:
scale_factor /= float(torch.max(torch.abs(poses[:, :3, 3])))
scale_factor *= self.config.scale_factor
poses[:, :3, 3] *= scale_factor
# in x,y,z order
# assumes that the scene is centered at the origin
aabb_scale = self.config.scene_scale
scene_box = SceneBox(
aabb=torch.tensor(
[[-aabb_scale, -aabb_scale, -aabb_scale], [aabb_scale, aabb_scale, aabb_scale]], dtype=torch.float32
)
)
cameras = Cameras(
camera_to_worlds=poses[:, :3, :4],
fx=fxs,
fy=fys,
cx=cxs,
cy=cys,
camera_type=CameraType.PERSPECTIVE,
)
cameras = cameras[indices]
image_filenames = [image_filenames[i] for i in indices]
assert len(cameras) == len(image_filenames)
dataparser_outputs = DataparserOutputs(
image_filenames=image_filenames,
cameras=cameras,
scene_box=scene_box,
dataparser_scale=scale_factor,
dataparser_transform=transform_matrix,
)
return dataparser_outputs