Source code for

# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
"""Datapaser for sdfstudio formatted data"""

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
from typing import Type

import torch

from nerfstudio.cameras import camera_utils
from nerfstudio.cameras.cameras import Cameras, CameraType
from import (
from import SceneBox
from import load_from_json

[docs]@dataclass class SDFStudioDataParserConfig(DataParserConfig): """Scene dataset parser config""" _target: Type = field(default_factory=lambda: SDFStudio) """target class to instantiate""" data: Path = Path("data/DTU/scan65") """Directory specifying location of data.""" include_mono_prior: bool = False """whether or not to load monocular depth and normal """ depth_unit_scale_factor: float = 1e-3 """Scales the depth values to meters. Default value is 0.001 for a millimeter to meter conversion.""" include_foreground_mask: bool = False """whether or not to load foreground mask""" downscale_factor: int = 1 scene_scale: float = 2.0 """ Sets the bounding cube to have edge length of this size. The longest dimension of the axis-aligned bbox will be scaled to this value. """ skip_every_for_val_split: int = 1 """sub sampling validation images""" auto_orient: bool = True
[docs]@dataclass class SDFStudio(DataParser): """SDFStudio Dataset""" config: SDFStudioDataParserConfig def _generate_dataparser_outputs(self, split="train"): # load meta data meta = load_from_json( / "meta_data.json") indices = list(range(len(meta["frames"]))) # subsample to avoid out-of-memory for validation set if split != "train" and self.config.skip_every_for_val_split >= 1: indices = indices[:: self.config.skip_every_for_val_split] image_filenames = [] depth_filenames = [] normal_filenames = [] transform = None fx = [] fy = [] cx = [] cy = [] camera_to_worlds = [] for i, frame in enumerate(meta["frames"]): if i not in indices: continue image_filename = / frame["rgb_path"] depth_filename = frame.get("mono_depth_path") normal_filename = frame.get("mono_normal_path") intrinsics = torch.tensor(frame["intrinsics"]) camtoworld = torch.tensor(frame["camtoworld"]) # append data image_filenames.append(image_filename) if depth_filename is not None and normal_filename is not None: depth_filenames.append( / depth_filename) normal_filenames.append( / normal_filename) fx.append(intrinsics[0, 0]) fy.append(intrinsics[1, 1]) cx.append(intrinsics[0, 2]) cy.append(intrinsics[1, 2]) camera_to_worlds.append(camtoworld) fx = torch.stack(fx) fy = torch.stack(fy) cx = torch.stack(cx) cy = torch.stack(cy) c2w_colmap = torch.stack(camera_to_worlds) camera_to_worlds = torch.stack(camera_to_worlds) # Convert from COLMAP's/OPENCV's camera coordinate system to nerfstudio camera_to_worlds[:, 0:3, 1:3] *= -1 if self.config.auto_orient: camera_to_worlds, transform = camera_utils.auto_orient_and_center_poses( camera_to_worlds, method="up", center_method="none", ) # scene box from meta data meta_scene_box = meta["scene_box"] aabb = torch.tensor(meta_scene_box["aabb"], dtype=torch.float32) scene_box = SceneBox( aabb=aabb, ) height, width = meta["height"], meta["width"] cameras = Cameras( fx=fx, fy=fy, cx=cx, cy=cy, height=height, width=width, camera_to_worlds=camera_to_worlds[:, :3, :4], camera_type=CameraType.PERSPECTIVE, ) # TODO supports downsample # cameras.rescale_output_resolution(scaling_factor=1.0 / self.config.downscale_factor) if self.config.include_mono_prior: assert meta["has_mono_prior"], f"no mono prior in {}" dataparser_outputs = DataparserOutputs( image_filenames=image_filenames, cameras=cameras, scene_box=scene_box, metadata={ "depth_filenames": depth_filenames if len(depth_filenames) > 0 else None, "normal_filenames": normal_filenames if len(normal_filenames) > 0 else None, "transform": transform, # required for normal maps, these are in colmap format so they require c2w before conversion "camera_to_worlds": c2w_colmap if len(c2w_colmap) > 0 else None, "include_mono_prior": self.config.include_mono_prior, "depth_unit_scale_factor": self.config.depth_unit_scale_factor, }, ) return dataparser_outputs