Source code for nigsp.io

#!/usr/bin/env python3
"""
I/O and related utils.

Attributes
----------
EXT_1D : list
    List of supported TXT/1D file extensions, in lower case.
EXT_MAT : list
    List of supported matlab file extensions, in lower case.
EXT_NIFTI : list
    List of supported nifti file extensions, in lower case.
EXT_XLS : list
    List of supported XLS-like file extensions, in lower case.
EXT_ALL : list
    All supported file extensions, in lower case.
EXT_DICT : dictionary
    Dictionary associating values to extension lists
LOADMAT_DICT : dictionary
    Dictionary assocting the same values in EXT_DICT to loading functions.
"""

import logging
from os import makedirs
from os.path import exists, join

import numpy as np

from .utils import change_var_type

EXT_1D = [".txt", ".csv", ".tsv", ".1d", ".par", ".tsv.gz", ".csv.gz"]
EXT_MAT = [".mat"]
EXT_NIFTI = [".nii", ".nii.gz"]
EXT_XLS = [".xls"]
EXT_ALL = EXT_1D + EXT_XLS + EXT_MAT + EXT_NIFTI

EXT_DICT = {"1D": EXT_1D, "xls": EXT_XLS, "mat": EXT_MAT, "nifti": EXT_NIFTI}

LGR = logging.getLogger(__name__)


[docs] def check_ext(all_ext, fname, scan=False, remove=False): """Check which extension a file has, and possibly remove it. Parameters ---------- all_ext : list All possible extensions to check within. fname : str or os.PathLike The filename to check. scan : bool, optional Scan the given path to see if there is a file with that extension If True and no path declared, check if fname has a path, if not scan '.' If False, don't scan any folder. remove : bool, optional Remove the extension from fname if it has one. Returns ------- obj_return : Uses a list to return variable amount of options. has_ext : boolean True if the extension is found, false otherwise. fname : str or os.PathLike If ``remove`` is True, return (extensionless) fname. ext : str If both ``remove`` and ``has_ext`` are True, returns also found extension. """ has_ext = False all_ext = change_var_type(all_ext, list, stop=False, silent=True) for ext in all_ext: if fname.lower().endswith(ext): has_ext = True LGR.debug(f"{fname} ends with extension {ext}") break if not has_ext and scan: for ext in all_ext: if exists(f"{fname}{ext}"): fname = f"{fname}{ext}" LGR.warning(f"Found {fname}{ext}, using it as input henceforth") has_ext = True break obj_return = [has_ext] if remove: if has_ext: obj_return += [fname[: -len(ext)], ext] # case insensitive solution else: obj_return += [fname, None] else: obj_return += [fname] return obj_return[:]
[docs] def check_nifti_dim(fname, data, dim=4): """Check number of dimensions in nifti file. Parameters ---------- fname : str The name of the file representing ``data``. data : numpy.ndarray The data which dimensionality needs to be checked. dim : int, optional The amount of dimensions expected/desired in the data. Returns ------- numpy.ndarray If ``data.ndim = dim``, returns data. Raises ------ ValueError If ``data`` has different dimensions than ``dim``. """ data = data.squeeze() if data.ndim != dim: raise ValueError( f"A {dim}D nifti file is required, but {fname} is {data.ndim}D. Please " "check the input file." ) return data
[docs] def check_mtx_dim(fname, data, shape=None): """Check dimensions of a matrix. Parameters ---------- fname : str The name of the file representing ``data``. data : np.ndarray The data which dimensionality needs to be checked. shape : None | ``'square'`` | ``'rectangle'`` Shape of matrix, if empty, skip shape check. Returns ------- np.ndarray If ``data.ndim = 2``, returns data. If ``data.ndim = 1`` and ``shape == 'rectangle'``, returns data with added empty axis. Raises ------ NotImplementedError If ``data`` has more than 3 dimensions. If ``shape`` is not None but `data` is 3D. ValueError If ``data`` is empty If ``shape == 'square'`` and ``data`` dimensions have different lengths. """ data = data.squeeze() LGR.info("Checking data shape.") if data.shape[0] == 0: raise ValueError(f"{fname} is empty!") if data.ndim > 3: raise NotImplementedError( f"Only matrices up to 3D are supported, but given matrix is {data.ndim}D." ) if shape is not None: if data.ndim > 2: raise NotImplementedError("Cannot check shape of 3D matrix.") if data.ndim == 1 and shape == "rectangle": data = data[..., np.newaxis] LGR.warning( f"Rectangular matrix required, but {fname} is a vector. " "Adding empty dimension." ) if shape == "square" and data.shape[0] != data.shape[1]: raise ValueError( f"Square matrix required, but {fname} matrix has shape {data.shape}." ) return data
[docs] def load_nifti_get_mask(fname, is_mask=False, ndim=4): """Load a nifti file and returns its data, its image, and a 3D mask. Parameters ---------- fname : str The filename to read in. is_mask : bool, optional If the file contains a mask. Default to ``False``. ndim : int or None, optional The number of dimensions expected in the data. If None (default), 4 dimensions are expected, unless ``is_mask=True``. In the latter case, 3 dimensions will be checked. Returns ------- data : numpy.ndarray Data from nifti file. mask : numpy.ndarray If ``is_mask`` is ``False``, numpy.ndarray of one dimension less than data, in which any element that has at least a value different from zero in the last dimension of ``data`` is True. If ``is_mask`` is ``True``, mask is a boolean representation of data. img : nib.img Image object from nibabel. """ try: import nibabel as nib except ImportError: raise ImportError( "nibabel is required to import nifti files. Please see install " "instructions." ) LGR.info(f"Loading {fname}.") img = nib.load(fname) data = img.get_fdata() if ndim is None: ndim = 3 if is_mask else 4 data = check_nifti_dim(fname, data, dim=ndim) if is_mask: mask = data != 0 LGR.info(f"{fname} loaded as mask.") else: mask = data.any(axis=-1).squeeze() LGR.info(f"Data loaded from {fname}.") return data, mask, img
[docs] def load_txt(fname, shape=None): """Read files in textual format. Parameters ---------- fname : str | os.PathLike Path to the txt file. shape : None | ``'square'`` | ``'rectangle'`` Shape of matrix, if empty, skip check. Returns ------- mtx : numpy.ndarray Data matrix. See Also -------- check_mtx_dim """ LGR.info(f"Loading {fname}.") _, _, ext = check_ext(EXT_1D, fname, scan=True, remove=True) if ext in [".csv", ".csv.gz"]: delimiter = "," elif ext in [".tsv", ".tsv.gz"]: delimiter = "\t" elif ext in [".txt", ".1d", ".par"]: delimiter = " " else: delimiter = None mtx = np.genfromtxt(fname, delimiter=delimiter) mtx = check_mtx_dim(fname, mtx, shape) return mtx
[docs] def load_mat(fname, shape=None): """Read files in MATLAB format. Assumes the existence of a matrix/vector in the mat file, rendered as a numpy.ndarray. If there is more than a matrix, the one with the largest size will be selected. Parameters ---------- fname : str | os.PathLike Path to the ``.mat`` file. shape : None | ``'square'`` | ``'rectangle'`` Shape of matrix, if empty, skip check. Returns ------- mtx : numpy.ndarray Data matrix. Notes ----- Requires module ``pymatreader`` to work. See Also -------- check_mtx_dim Raises ------ EOFError If the mat file does not contain matrix or vectors. ImportError If pymatreader is not installed or can't be read. """ try: from pymatreader import read_mat except ImportError: raise ImportError( "pymatreader is required to import mat files. " "Please see install instructions." ) LGR.info(f"Loading {fname}.") data = read_mat(fname) data_keys = [] for k in data.keys(): # Check data key only if it's not hidden # (skip '__header__', '__version__', '__global__') if "__" not in k: LGR.info( f"Checking {fname} key {str(k)} content for data " "(float array/matrices in MATLAB)." ) if type(data[k]) is np.ndarray: data_keys.append(k) if len(data_keys) < 1: raise EOFError(f"{fname} does not seem to contain a numeric matrix.") elif len(data_keys) > 1: LGR.warning( "Found multiple possible arrays to load. " "Selecting the biggest (highest pythonic size)." ) key = data_keys[0] for k in data_keys[1:]: if data[k].size > data[key].size: key = k LGR.info(f"Selected data from MATLAB variable {key}") mtx = data[key] mtx = check_mtx_dim(fname, mtx, shape) return mtx
[docs] def load_xls(fname, shape=""): """Read files in xls format. Parameters ---------- fname : str | os.PathLike Path to the xls file. shape : None | ``'square'`` | ``'rectangle'`` Shape of matrix, if empty, skip check. See Also -------- check_mtx_dim Raises ------ NotImplementedError Spreadheet loading is not implemented yet. """ raise NotImplementedError("Spreadsheet loading is not implemented yet")
[docs] def export_nifti(data, img, fname): """Export a nifti file. Parameters ---------- data : numpy.ndarray Data to be exported. img : nib.img Nibabel image object. fname : str | os.PathLike Name of the output file. """ try: import nibabel as nib except ImportError: raise ImportError( "nibabel is required to export nifti files. Please see install " "instructions." ) has_ext, fname, ext = check_ext(EXT_NIFTI, fname, remove=True) if ext is None: ext = ".nii.gz" LGR.info(f"Exporting nifti data into {fname}{ext}.") out_img = nib.Nifti1Image(data, img.affine, img.header) out_img.to_filename(f"{fname}{ext}") return 0
[docs] def export_txt(data, fname, ext=None): """Export data into a text-like or mat file. Parameters ---------- data : np.ndarray Data to be exported. fname : str or os.PathLike Name of the output file. ext : str or None, optional Selected extension for export. Returns ------- 0 On a successful run """ has_ext, fname, ext_check = check_ext(EXT_ALL, fname, remove=True) if has_ext: if ext is not None: LGR.warning( f"Specified filename {fname}{ext_check} has an extension, but the " f"extension {ext} was specified. Forcing specified extension." ) else: ext = ext_check else: if ext is None: LGR.warning("Extension not specified. Forcing export in TSV.GZ format.") ext = ".tsv.gz" if ext.lower() in [".csv", ".csv.gz", "", None]: delimiter = "," elif ext.lower() in [".tsv", ".tsv.gz"]: delimiter = "\t" elif ext.lower() in [".txt", ".1d", ".par"]: delimiter = " " else: delimiter = None if data.ndim < 3: np.savetxt(f"{fname}{ext}", data, fmt="%.6f", delimiter=delimiter) elif data.ndim == 3: makedirs(fname, exist_ok=True) for i in range(data.shape[-1]): np.savetxt( join(fname, f"{i:03d}{ext}"), data[:, :, i], fmt="%.6f", delimiter=delimiter, ) return 0
[docs] def export_mtx(data, fname, ext=None): """Export data into a text-like or mat file. Parameters ---------- data : np.ndarray Data to be exported. fname : str | os.PathLike Name of the output file. ext : str or None, optional Selected extension for export. Notes ----- Requires module scipy to export in .mat format. (Will require other modules to export in XLS-like format) Raises ------ BrokenPipeError If somewhat an extension that is not supported passes all checks. (This should never happen) ImportError If scipy is not installed or cannot be found. NotImplementedError Spreadheet output is not implemented yet. Returns ------- 0 On a successful run """ has_ext, fname, ext_check = check_ext(EXT_ALL, fname, remove=True) if has_ext: if ext is None: ext = ext_check else: LGR.warning( f"Specified filename {fname}{ext_check} has an extension, but the " f"extension {ext} was specified. Forcing specified extension." ) if ext in [None, ""]: LGR.warning( "Extension not specified, or specified extension not " "supported. Forcing export in TSV.GZ format." ) ext = ".tsv.gz" elif ext.lower() in EXT_NIFTI: LGR.warning("Found nifti extension, exporting data in .1D instead") ext = ".1D" LGR.info(f"Exporting data into {fname}{ext}.") if ext.lower() in EXT_MAT: try: import scipy except ImportError: raise ImportError( "To export .mat files, scipy is required. Please install it." ) scipy.io.savemat(f"{fname}{ext}", {"data": data}) elif ext.lower() in EXT_XLS: raise NotImplementedError("Spreadsheet output is not implemented yet") elif ext.lower() in EXT_1D: export_txt(data, fname, ext) else: raise BrokenPipeError( f"This should not have happened: {ext} was the selected extension." ) return 0
LOADMAT_DICT = { "1D": load_txt, "xls": load_xls, "mat": load_mat, "nifti": load_nifti_get_mask, } """ Copyright 2022, Stefano Moia. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """