diff --git a/doc/source/devel/biaps/biap_0009.rst b/doc/source/devel/biaps/biap_0009.rst new file mode 100644 index 0000000000..897bd1c3ba --- /dev/null +++ b/doc/source/devel/biaps/biap_0009.rst @@ -0,0 +1,335 @@ +.. _biap9: + +################################ +BIAP9 - The Coordinate Image API +################################ + +:Author: Chris Markiewicz +:Status: Draft +:Type: Standards +:Created: 2021-09-16 + +********** +Background +********** + +Surface data is generally kept separate from geometric metadata +=============================================================== + +In contrast to volumetric data, whose geometry can be fully encoded in the +shape of a data array and a 4x4 affine matrix, data sampled to a surface +require the location of each sample to be explicitly represented by a +coordinate. In practice, the most common approach is to have a geometry file +and a data file. + +A geometry file consists of a vertex coordinate array and a triangle array +describing the adjacency of vertices, while a data file is an n-dimensional +array with one axis corresponding to vertex. + +Keeping these files separate is a pragmatic optimization to avoid costly +reproductions of geometric data, but presents an administrative burden to +direct consumers of the data. + +Terminology +=========== + +For the purposes of this BIAP, the following terms are used: + +* Coordinate - a triplet of floating point values in RAS+ space +* Vertex - an index into a table of coordinates +* Triangle (or face) - a triplet of adjacent vertices (A-B-C); + the normal vector for the face is ($\overline{AB}\times\overline{AC}$) +* Topology - vertex adjacency data, independent of vertex coordinates, + typically in the form of a list of triangles +* Geometry - topology + a specific set of coordinates for a surface +* Parcel - a subset of vertices; can be the full topology. Special cases include: + * Patch - a connected subspace + * Decimated mesh - a subspace that has a desired density of vertices +* Subspace sequence - an ordered set of subspaces +* Data array - an n-dimensional array with one axis corresponding to the + vertices (typical) OR faces (more rare) in a patch sequence + +Currently supported surface formats +=================================== + +* FreeSurfer + * Geometry (e.g. ``lh.pial``): + :py:func:`~nibabel.freesurfer.io.read_geometry` / + :py:func:`~nibabel.freesurfer.io.write_geometry` + * Data + * Morphometry: + :py:func:`~nibabel.freesurfer.io.read_morph_data` / + :py:func:`~nibabel.freesurfer.io.write_morph_data` + * Labels: :py:func:`~nibabel.freesurfer.io.read_label` + * MGH: :py:class:`~nibabel.freesurfer.mghformat.MGHImage` +* GIFTI: :py:class:`~nibabel.gifti.gifti.GiftiImage` + * Every image contains a collection of data arrays, which may be + coordinates, topology, or data (further subdivided by type and intent) +* CIFTI-2: :py:class:`~nibabel.cifti2.cifti2.Cifti2Image` + * Pure data array, with image header containing flexible axes + * The ``BrainModelAxis`` is a subspace sequence including patches for + each hemisphere (cortex without the medial wall) and subcortical + structures defined by indices into three-dimensional array and an + affine matrix + * Geometry referred to by an associated ``wb.spec`` file + (no current implementation in NiBabel) + * Possible to have one with no geometric information, e.g., parcels x time + +Other relevant formats +====================== + +* MNE's STC (source time course) format. Contains: + * Subject name (resolvable with a FreeSurfer ``SUBJECTS_DIR``) + * Index arrays into left and right hemisphere surfaces (subspace sequence) + * Data, one of: + * ndarray of shape ``(n_verts, n_times)`` + * tuple of ndarrays of shapes ``(n_verts, n_sensors)`` and ``(n_sensors, n_times)`` + * Time start + * Time step + +***************************************** +Desiderata for an API supporting surfaces +***************************************** + +The following are provisional guiding principles + +1. A surface image (data array) should carry a reference to geometric metadata + that is easily transferred to a new image. +2. Partial images (data only or geometry only) should be possible. Absence of + components should have a well-defined signature, such as a property that is + ``None`` or a specific ``Exception`` is raised. +3. All arrays (coordinates, triangles, data arrays) should be proxied to + avoid excess memory consumption +4. Selecting among coordinates (e.g., gray/white boundary, inflated surface) + for a single topology should be possible. +5. Combining multiple brain structures (canonically, left and right hemispheres) + in memory should be easy; serializing to file may be format-specific. +6. Splitting a data array into independent patches that can be separately + operated on and serialized should be possible. + + +Prominent use cases +=================== + +We consider the following use cases for working with surface data. +A good API will make retrieving the components needed for each use case +straightforward, as well as storing the results in new images. + +* Arithmetic/modeling - per-vertex mathematical operations +* Smoothing - topology/geometry-respecting smoothing +* Plotting - paint the data array as a texture on a surface +* Decimation - subsampling a topology (possibly a subset, possibly with + interpolated vertex locations) +* Resampling to a geometrically-aligned surface + * Downsampling by decimating, smoothing, resampling + * Inter-subject resampling by using ``?h.sphere.reg`` +* Interpolation of per-vertex and per-face data arrays + +When possible, we prefer to expose NumPy ``ndarray``\s and +allow use of numpy, scipy, scikit-learn. In some cases, it may +make sense for NiBabel to provide methods. + +******** +Proposal +******** + +A ``CoordinateImage`` is an N-dimensional array, where one axis corresponds +to a sequence of points in one or more parcels. + +.. code-block:: python + + class CoordinateImage: + """ + Attributes + ---------- + header : a file-specific header + coordaxis : ``CoordinateAxis`` + dataobj : array-like + """ + + class CoordinateAxis: + """ + Attributes + ---------- + parcels : list of ``Parcel`` objects + """ + + def load_structures(self, mapping): + """ + Associate parcels to ``Pointset`` structures + """ + + def __getitem__(self, slicer): + """ + Return a sub-sampled CoordinateAxis containing structures + matching the indices provided. + """ + + def get_indices(self, parcel, indices=None): + """ + Return the indices in the full axis that correspond to the + requested parcel. If indices are provided, further subsample + the requested parcel. + """ + + class Parcel: + """ + Attributes + ---------- + name : str + structure : ``Pointset`` + indices : object that selects a subset of coordinates in structure + """ + +To describe coordinate geometry, the following structures are proposed: + +.. code-block:: python + + class Pointset: + @property + def n_coords(self): + """ Number of coordinates """ + + def get_coords(self, name=None): + """ Nx3 array of coordinates in RAS+ space """ + + + class TriangularMesh(Pointset): + @property + def n_triangles(self): + """ Number of faces """ + + def get_triangles(self, name=None): + """ Mx3 array of indices into coordinate table """ + + def get_mesh(self, name=None): + return self.get_coords(name=name), self.get_triangles(name=name) + + def get_names(self): + """ List of surface names that can be passed to + ``get_{coords,triangles,mesh}`` + """ + + def decimate(self, *, n_coords=None, ratio=None): + """ Return a TriangularMesh with a smaller number of vertices that + preserves the geometry of the original """ + # To be overridden when a format provides optimization opportunities + + + class NdGrid(Pointset): + """ + Attributes + ---------- + shape : 3-tuple + number of coordinates in each dimension of grid + """ + def get_affine(self, name=None): + """ 4x4 array """ + + +The ``NdGrid`` class allows raveled volumetric data to be treated the same as +triangular mesh or other coordinate data. + +Finally, a structure for containing a collection of related geometric files is +defined: + +.. code-block:: python + + class GeometryCollection: + """ + Attributes + ---------- + structures : dict + Mapping from structure names to ``Pointset`` + """ + + @classmethod + def from_spec(klass, pathlike): + """ Load a collection of geometries from a specification. """ + +The canonical example of a geometry collection is a left hemisphere mesh, +right hemisphere mesh. + +Here we present common use cases: + + +Modeling +======== + +.. code-block:: python + + from nilearn.glm.first_level import make_first_level_design_matrix, run_glm + + bold = CoordinateImage.from_filename("/data/func/hemi-L_bold.func.gii") + dm = make_first_level_design_matrix(...) + labels, results = run_glm(bold.get_fdata(), dm) + betas = CoordinateImage(results["betas"], bold.coordaxis, bold.header) + betas.to_filename("/data/stats/hemi-L_betas.mgz") + +In this case, no reference to the surface structure is needed, as the operations +occur on a per-vertex basis. +The coordinate axis and header are preserved to ensure that any metadata is +not lost. + +Here we assume that ``CoordinateImage`` is able to make the appropriate +translations between formats (GIFTI, MGH). This is not guaranteed in the final +API. + +Smoothing +========= + +.. code-block:: python + + bold = CoordinateImage.from_filename("/data/func/hemi-L_bold.func.gii") + bold.coordaxis.load_structures({"lh": "/data/anat/hemi-L_midthickness.surf.gii"}) + # Not implementing networkx weighted graph here, so assume we have a function + # that retrieves a graph for each structure + graphs = get_graphs(bold.coordaxis) + distances = distance_matrix(graphs['lh']) # n_coords x n_coords matrix + weights = normalize(gaussian(distances, sigma)) + # Wildly inefficient smoothing algorithm + smoothed = CoordinateImage(weights @ bold.get_fdata(), bold.coordaxis, bold.header) + smoothed.to_filename(f"/data/func/hemi-L_smooth-{sigma}_bold.func.gii") + + +Plotting +======== + +Nilearn currently provides a +`plot_surf `_ function. +With the proposed API, we could interface as follows: + +.. code-block:: python + + def plot_surf_img(img, surface="inflated"): + from nilearn.plotting import plot_surf + coords, triangles = img.coordaxis.parcels[0].get_mesh(name=surface) + + data = img.get_fdata() + + return plot_surf((triangles, coords), data) + + tstats = CoordinateImage.from_filename("/data/stats/hemi-L_contrast-taskVsBase_tstat.mgz") + # Assume a GeometryCollection that reads a FreeSurfer subject directory + fs_subject = FreeSurferSubject.from_spec("/data/subjects/fsaverage5") + tstats.coordaxis.load_structures(fs_subject.get_structure("lh")) + plot_surf_img(tstats) + +Subsampling CIFTI-2 +=================== + +.. code-block:: python + + img = nb.load("sub-01_task-rest_bold.dtseries.nii") # Assume CIFTI CoordinateImage + parcel = nb.load("sub-fsLR_hemi-L_label-DLPFC_mask.label.gii") # GiftiImage + structure = parcel.meta.metadata['AnatomicalStructurePrimary'] # "CortexLeft" + vtx_idcs = np.where(parcel.agg_data())[0] + dlpfc_idcs = img.coordaxis.get_indices(parcel=structure, indices=vtx_idcs) + + # Subsampled coordinate axes will override any duplicate information from header + dlpfc_img = CoordinateImage(img.dataobj[dlpfc_idcs], img.coordaxis[dlpfc_idcs], img.header) + + # Now load geometry so we can plot + wbspec = CaretSpec("fsLR.wb.spec") + dlpfc_img.coordaxis.load_structures(wbspec) + ... diff --git a/doc/source/devel/biaps/index.rst b/doc/source/devel/biaps/index.rst index 1c30b3cd3c..42dcd276e3 100644 --- a/doc/source/devel/biaps/index.rst +++ b/doc/source/devel/biaps/index.rst @@ -19,6 +19,7 @@ proposals. biap_0006 biap_0007 biap_0008 + biap_0009 .. toctree:: :hidden: diff --git a/nibabel/surfaceimages.py b/nibabel/surfaceimages.py new file mode 100644 index 0000000000..719bae9047 --- /dev/null +++ b/nibabel/surfaceimages.py @@ -0,0 +1,226 @@ +from nibabel.affines import apply_affine +from nibabel.filebasedimages import FileBasedHeader +from nibabel.dataobj_images import DataobjImage + + +class Pointset: + """ A collection of related sets of coordinates in 3D space. + + Coordinates are in RAS+ orientation, i.e., $(x, y, z)$ refers to + a point $x\mathrm{mm}$ right, $y\mathrm{mm}$ anterior and $z\mathrm{mm}$ + superior of the origin. + + The order of coordinates should be assumed to be significant. + """ + + def get_coords(self, name=None): + """ Get coordinate data in RAS+ space + + Parameters + ---------- + name : str + Name of one of a family of related coordinates. + For example, ``"white"`` and ``"pial"`` surfaces + + Returns + ------- + coords : (N, 3) array-like + Coordinates in RAS+ space + """ + raise NotImplementedError + + @property + def n_coords(self): + """ Number of coordinates + + The default implementation loads coordinates. Subclasses may + override with more efficient implementations. + """ + return self.get_coords().shape[0] + + +class TriangularMesh(Pointset): + """ A triangular mesh is a description of a surface tesselated into triangles """ + + def __init__(self, meshes=None): + """ Triangular mesh objects have access to an internal + ``_meshes`` dictionary that has keys that are mesh names. + The values may be any structure that permits the class to + provide coordinates and triangles on request. + """ + if meshes is None: + meshes = {} + self._meshes = meshes + super().__init__() + + @property + def n_triangles(self): + """ Number of triangles (faces) + + The default implementation loads triangles. Subclasses may + override with more efficient implementations. + """ + return self.get_triangles().shape[0] + + def get_triangles(self, name=None): + """ Mx3 array of indices into coordinate table """ + raise NotImplementedError + + def get_mesh(self, name=None): + return self.get_coords(name=name), self.get_triangles(name=name) + + def get_names(self): + """ List of surface names that can be passed to + ``get_{coords,triangles,mesh}`` + """ + return list(self._meshes.keys()) + + def decimate(self, *, ncoords=None, ratio=None): + """ Return a TriangularMesh with a smaller number of vertices that + preserves the geometry of the original. + + Please contribute a generic decimation algorithm at + https://github.com/nipy/nibabel + """ + raise NotImplementedError + + def load_vertex_data(self, pathlike): + """ Return a SurfaceImage with data corresponding to each vertex """ + raise NotImplementedError + + def load_face_data(self, pathlike): + """ Return a SurfaceImage with data corresponding to each face """ + raise NotImplementedError + + +class SurfaceHeader(FileBasedHeader): + """ Template class to implement SurfaceHeader protocol """ + def get_geometry(self): + """ Generate ``TriangularMesh`` object from header object + + If no default geometry can be provided, returns ``None``. + """ + return None + + +class SurfaceImage(DataobjImage): + header_class = SurfaceHeader + + def __init__(self, dataobj, header=None, geometry=None, extra=None, file_map=None): + """ Initialize image + + The image is a combination of + """ + super().__init__(dataobj, header=header, extra=extra, file_map=file_map) + if geometry is None: + geometry = self.header.get_geometry() + self._geometry = geometry + + def load_geometry(self, pathlike): + """ Specify a header to a data-only image """ + + +class VolumeGeometry(Pointset): + """ + + .. testsetup: + + >>> from nibabel.tests.nibabel_data import needs_nibabel_data + >>> needs_nibabel_data('nitest-cifti2')() + + >>> import nibabel as nb + >>> from nibabel.tests.nibabel_data import get_nibabel_data + >>> from pathlib import Path + >>> dscalar = nb.load(Path(get_nibabel_data()) / 'nitest-cifti2' / "ones.dscalar.nii") + >>> brainmodel_axis = dscalar.header.get_axis(1) + >>> volgeom = VolumeGeometry( + ... brainmodel_axis.affine, + ... brainmodel_axis.voxel[brainmodel_axis.volume_mask], + ... brainmodel_axis.volume_shape) + """ + + def __init__(self, affines, indices, shape=None): + try: + self._affines = dict(affines) + except TypeError: + self._affines = {"affine": np.array(affines)} + self._default = next(iter(self._affines)) + + self._indices = np.array(indices) + if self._indices.ndim != 2 or self._indices.shape[1] != 3: + raise ValueError("Indices must be an Nx3 matrix") + + if shape is None: + shape = self._indices.max(axis=0) + self._shape = shape + + def get_coords(self, name=None): + if name is None: + name = self._default + return apply_affine(self._affines[name], self._indices) + + @property + def n_coords(self): + return self._indices.shape[0] + + def get_indices(self): + return self._indices + + def to_mask(self, name=None, shape=None): + if name is None: + name = self._default + if shape is None: + shape = self._shape + dataobj = np.zeros(shape, dtype=np.bool_) + dataobj[self._indices] = True + return SpatialImage(dataobj, self._affines[name]) + + @classmethod + def from_mask(klass, img): + affine = img.affine + indices = np.vstack(np.where(img.dataobj)).T + return klass(affine, indices=indices) + + +class GeometryCollection: + def __init__(self, structures=()): + self._structures = dict(structures) + + def get_structure(self, name): + return self._structures[name] + + @property + def names(self): + return list(self._structures) + + @classmethod + def from_spec(klass, pathlike): + """ Load a collection of geometries from a specification, broadly construed. """ + raise NotImplementedError + + +class PointsetSequence(GeometryCollection, Pointset): + def __init__(self, structures=()): + super().__init__(structures) + self._indices = {} + next_index = 0 + for name, struct in self._structures.items(): + end = next_index + struct.n_coords + self._indices[name] = slice(next_index, end) + next_index = end + 1 + + def get_indices(self, *names): + if len(names) == 1: + return self._indices[name] + return [self._indices[name] for name in names] + + # def get_structures(self, *, names=None, indices=None): + # """ We probably want some way to get a subset of structures """ + + def get_coords(self, name=None): + return np.vstack([struct.get_coords(name=name) + for struct in self._structures.values()]) + + @property + def n_coords(self): + return sum(struct.n_coords for struct in self._structures.values()) diff --git a/nibabel/tests/test_surfaceimages.py b/nibabel/tests/test_surfaceimages.py new file mode 100644 index 0000000000..a822cfcb22 --- /dev/null +++ b/nibabel/tests/test_surfaceimages.py @@ -0,0 +1,220 @@ +import os +from nibabel.surfaceimages import * +from nibabel.arrayproxy import ArrayProxy +from nibabel.optpkg import optional_package + +from nibabel.tests.test_filebasedimages import FBNumpyImage + +import numpy as np +from pathlib import Path + +h5, has_h5py, _ = optional_package('h5py') + + +class H5ArrayProxy: + def __init__(self, file_like, dataset_name): + self.file_like = file_like + self.dataset_name = dataset_name + with h5.File(file_like, "r") as h5f: + arr = h5f[dataset_name] + self._shape = arr.shape + self._dtype = arr.dtype + + @property + def is_proxy(self): + return True + + @property + def shape(self): + return self._shape + + @property + def ndim(self): + return len(self.shape) + + @property + def dtype(self): + return self._dtype + + def __array__(self, dtype=None): + with h5.File(self.file_like, "r") as h5f: + return np.asanyarray(h5f[self.dataset_name], dtype) + + def __slicer__(self, slicer): + with h5.File(self.file_like, "r") as h5f: + return h5f[self.dataset_name][slicer] + + +class H5Geometry(TriangularMesh): + """Simple Geometry file structure that combines a single topology + with one or more coordinate sets + """ + @classmethod + def from_filename(klass, pathlike): + meshes = {} + with h5.File(pathlike, "r") as h5f: + triangles = h5f['topology'] + for name, coords in h5f['coordinates'].items(): + meshes[name] = (coords, triangles) + return klass(meshes) + + def to_filename(self, pathlike): + topology = None + coordinates = {} + for name, mesh in self.meshes.items(): + coords, faces = mesh + if topology is None: + topology = faces + elif not np.array_equal(faces, topology): + raise ValueError("Inconsistent topology") + coordinates[name] = coords + + with h5.File(pathlike, "w") as h5f: + h5f.create_dataset("/topology", topology) + for name, coord in coordinates.items(): + h5f.create_dataset(f"/coordinates/{name}", coord) + + def get_coords(self, name=None): + if name is None: + name = next(iter(self._meshes)) + coords, _ = self._meshes[name] + return coords + + def get_triangles(self, name=None): + if name is None: + name = next(iter(self._meshes)) + _, triangles = self._meshes[name] + return triangles + + +class NPSurfaceImage(SurfaceImage): + valid_exts = ('.npy',) + files_types = (('image', '.npy'),) + + @classmethod + def from_file_map(klass, file_map): + with file_map['image'].get_prepare_fileobj('rb') as fobj: + arr = np.load(fobj) + return klass(arr) + + def to_file_map(self, file_map=None): + file_map = self.file_map if file_map is None else file_map + with file_map['image'].get_prepare_fileobj('wb') as fobj: + np.save(fobj, self.arr) + + def get_data_dtype(self): + return self.dataobj.dtype + + def set_data_dtype(self, dtype): + self.dataobj = self.dataobj.astype(dtype) + + +class FSGeometryProxy: + def __init__(self, pathlike): + self._file_like = str(Path(pathlike)) + self._offset = None + self._vnum = None + self._fnum = None + + def _peek(self): + from nibabel.freesurfer.io import _fread3 + with open(self._file_like, "rb") as fobj: + magic = _fread3(fobj) + if magic != 16777214: + raise NotImplementedError("Triangle files only!") + fobj.readline() + fobj.readline() + self._vnum = np.fromfile(fobj, ">i4", 1)[0] + self._fnum = np.fromfile(fobj, ">i4", 1)[0] + self._offset = fobj.tell() + + @property + def vnum(self): + if self._vnum is None: + self._peek() + return self._vnum + + @property + def fnum(self): + if self._fnum is None: + self._peek() + return self._fnum + + @property + def offset(self): + if self._offset is None: + self._peek() + return self._offset + + @property + def coords(self): + ap = ArrayProxy(self._file_like, ((self.vnum, 3), ">f4", self.offset)) + ap.order = 'C' + return ap + + @property + def triangles(self): + offset = self.offset + 12 * self.vnum + ap = ArrayProxy(self._file_like, ((self.fnum, 3), ">i4", offset)) + ap.order = 'C' + return ap + + +class FreeSurferHemisphere(TriangularMesh): + @classmethod + def from_filename(klass, pathlike): + path = Path(pathlike) + hemi, default = path.name.split(".") + mesh_names = ('orig', 'white', 'smoothwm', + 'pial', 'inflated', 'sphere', + 'midthickness', 'graymid') # Often created + if default not in mesh_names: + mesh_names.append(default) + meshes = {} + for mesh in mesh_names: + fpath = path.parent / f"{hemi}.{mesh}" + if fpath.exists(): + meshes[mesh] = FSGeometryProxy(fpath) + hemi = klass(meshes) + hemi._default = default + return hemi + + def get_coords(self, name=None): + if name is None: + name = self._default + return self._meshes[name].coords + + def get_triangles(self, name=None): + if name is None: + name = self._default + return self._meshes[name].triangles + + @property + def n_coords(self): + return self.meshes[self._default].vnum + + @property + def n_triangles(self): + return self.meshes[self._default].fnum + + +class FreeSurferSubject(GeometryCollection): + @classmethod + def from_subject(klass, subject_id, subjects_dir=None): + """ Load a FreeSurfer subject by ID """ + if subjects_dir is None: + subjects_dir = os.environ["SUBJECTS_DIR"] + return klass.from_spec(Path(subjects_dir) / subject_id) + + @classmethod + def from_spec(klass, pathlike): + """ Load a FreeSurfer subject from its directory structure """ + subject_dir = Path(pathlike) + surfs = subject_dir / "surf" + structures = { + "lh": FreeSurferHemisphere.from_filename(surfs / "lh.white"), + "rh": FreeSurferHemisphere.from_filename(surfs / "rh.white"), + } + subject = super().__init__(structures) + subject._subject_dir = subject_dir + return subject