Skip to content

API reference

The reference below is generated from the public Python API. Functions and classes inherit docstrings directly from the source code to keep behavior descriptions synchronized.

Core parcellator

volume

VolumetricParcellator

Base volumetric parcellator.

The parcellator assumes an integer-valued atlas where each non-background voxel stores the parcel identifier. Parcellation is performed by sampling a scalar map image and aggregating values inside each region. Resampling to the atlas grid is handled by default to keep atlas boundaries consistent across inputs.

Source code in src/parcellate/parcellation/volume.py
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
class VolumetricParcellator:
    """Base volumetric parcellator.

    The parcellator assumes an integer-valued atlas where each non-background
    voxel stores the parcel identifier. Parcellation is performed by sampling a
    scalar map image and aggregating values inside each region. Resampling to
    the atlas grid is handled by default to keep atlas boundaries consistent
    across inputs.
    """

    REQUIRED_LUT_COLUMNS: ClassVar[set[str]] = {"index", "label"}
    BUILTIN_STANDARD_MASKS: ClassVar[Mapping[str, str]] = {
        "gm": load_mni152_gm_mask,
        "wm": load_mni152_wm_mask,
        "brain": load_mni152_brain_mask,
    }

    def __init__(
        self,
        atlas_img: nib.Nifti1Image | str | Path,
        labels: Mapping[int, str] | Sequence[str] | None = None,
        lut: pd.DataFrame | str | Path | None = None,
        *,
        mask: nib.Nifti1Image | str | Path | None = None,
        mask_threshold: float = 0.0,
        atlas_threshold: float = 0.0,
        background_label: int = 0,
        resampling_target: Literal["data", "labels", "atlas", None] = "data",
        stat_functions: Mapping[str, Callable[..., float]] | None = None,
        stat_tier: str | None = None,
    ) -> None:
        """
        Initialize a volumetric parcellator

        Parameters
        ----------
        atlas_img : nib.Nifti1Image | str | Path
            The atlas image defining the parcellation.
        labels : Mapping[int, str] | Sequence[str] | None, optional
            Region labels mapping or sequence, by default None
        lut : pd.DataFrame | str | Path | None, optional
            Lookup table for region labels, by default None. Must include columns
            "index" and "label" following the BIDS standard.
        mask : nib.Nifti1Image | str | Path | None, optional
            Optional mask to apply to the atlas, by default None
        mask_threshold : float, optional
            Threshold for the mask image. Voxels with mask values strictly greater
            than this threshold are included; all others are excluded. Default is
            ``0.0``, which preserves the original behaviour of including every
            non-zero voxel. Use values in [0, 1] for probability maps (e.g.
            ``0.5`` to keep only voxels with >50 % gray-matter probability).
        atlas_threshold : float, optional
            Threshold for probabilistic (4D) atlas volumes. Voxels with probability
            strictly greater than this value are included in each region's mask.
            Default is ``0.0`` (any non-zero probability voxel is included). Only
            used when the atlas image is 4D. Ignored for 3D discrete atlases.
        background_label : int, optional
            Label value to treat as background, by default 0
        resampling_target : Literal["data", "labels", None], optional
            Resampling target for input maps, by default "data"
        stat_functions : Mapping[str, StatFunction] | None, optional
            Mapping of statistic names to functions, by default None.
            When provided, takes precedence over ``stat_tier``.
        stat_tier : str | None, optional
            Named statistics tier to use when ``stat_functions`` is ``None``.
            Valid values: ``"core"``, ``"extended"``, ``"diagnostic"``, ``"all"``.
            Defaults to ``None``, which selects all built-in statistics
            (equivalent to ``"diagnostic"``).
        """
        self.atlas_img = _load_nifti(atlas_img)
        self.lut = self._load_atlas_lut(lut) if lut is not None else None
        self.mask_threshold = float(mask_threshold)
        self.atlas_threshold = float(atlas_threshold)
        self.mask = self._load_mask(mask) if mask is not None else None
        self.background_label = int(background_label)
        self.resampling_target = resampling_target
        self._atlas_data = self._load_atlas_data()
        self._is_probabilistic = self._atlas_data.ndim == 4
        self._regions = self._build_regions(labels)
        self._stat_functions = self._prepare_stat_functions(stat_functions, stat_tier=stat_tier)
        self._fitted_scalar_id: str | int | None = None

    def _load_mask(self, mask: nib.Nifti1Image | str | Path) -> nib.Nifti1Image:
        """
        Load a mask image, supporting built-in standard masks.

        Parameters
        ----------
        mask : nib.Nifti1Image | str | Path
            Mask image to load.

        Returns
        -------
        nib.Nifti1Image
            Loaded mask image.
        """
        if isinstance(mask, str) and mask in self.BUILTIN_STANDARD_MASKS:
            return self.BUILTIN_STANDARD_MASKS[mask](threshold=self.mask_threshold)
        return _load_nifti(mask)

    def _get_labels(self, labels: Mapping[int, str] | Sequence[str] | None) -> list[int]:
        """
        Get labels from those required by the user and the ones from the lut/image

        Parameters
        ----------
        labels : Mapping[int, str] | Sequence[str] | None
            Labels provided by the user.

        Returns
        -------
        list[int]
            List of labels to use.
        """
        if labels is not None:
            if isinstance(labels, Mapping):
                return list(labels.keys())
            elif isinstance(labels, Sequence):
                return list(labels)
        if self.lut is not None:
            return self.lut["index"].tolist()
        if self._atlas_data.ndim == 4:
            return list(range(1, self._atlas_data.shape[3] + 1))
        return list(np.unique(self._atlas_data[self._atlas_data != self.background_label]).astype(int))

    def _load_atlas_lut(self, lut: pd.DataFrame | str | Path) -> pd.DataFrame:
        """
        Load atlas lookup table and make sure it contains required columns

        Parameters
        ----------
        lut : pd.DataFrame | str | Path
            Lookup table to load.

        Returns
        -------
        pd.DataFrame
            Loaded lookup table.

        Raises
        ------
        ValueError
            If required columns are missing.
        """
        lut_df = lut if isinstance(lut, pd.DataFrame) else pd.read_csv(lut, sep="\t")
        required_columns = self.REQUIRED_LUT_COLUMNS
        if not required_columns.issubset(lut_df.columns):
            missing = required_columns - set(lut_df.columns)
            raise MissingLUTColumnsError(missing)

        return lut_df

    @property
    def regions(self) -> tuple[int, ...]:
        """Tuple of regions defined in the atlas."""
        return self._regions

    def _apply_mask_to_atlas(self) -> nib.Nifti1Image:
        """
        Apply masking to parcellation atlas.

        Returns
        -------
        nib.Nifti1Image
            Masked atlas image.
        """
        atlas_data = np.asarray(self._prepared_atlas_img.get_fdata())
        mask_data = np.asarray(self._prepared_mask.get_fdata()) > self.mask_threshold
        if atlas_data.ndim == 4:
            atlas_data[~mask_data] = 0.0
        else:
            atlas_data[~mask_data] = self.background_label
        return nib.Nifti1Image(atlas_data, self._prepared_atlas_img.affine, self._prepared_atlas_img.header)

    def _prepare_map(
        self,
        source: nib.Nifti1Image,
        reference: nib.Nifti1Image,
        interpolation: str = "nearest",
    ) -> nib.Nifti1Image:
        """Resample source image to reference image grid if needed.

        Parameters
        ----------
        source : nib.Nifti1Image
            Source image to resample.
        reference : nib.Nifti1Image
            Reference image defining the target grid.

        Returns
        -------
        nib.Nifti1Image
            Resampled image.
        """
        return resample_to_img(
            source,
            reference,
            interpolation=interpolation,
            force_resample=True,
            copy_header=True,
        )

    def _build_regions(self, labels: Mapping[int, str] | Sequence[str] | None) -> tuple[int, ...]:
        """
        Build region definitions from atlas data and optional labels.

        Parameters
        ----------
        labels : Mapping[int, str] | Sequence[str] | None
            Optional labels provided by the user.
        """
        atlas_ids = set(self._get_labels(labels))
        if not self._is_probabilistic:
            atlas_ids.discard(self.background_label)
        return tuple(sorted(atlas_ids))

    def _load_atlas_data(self) -> np.ndarray:
        atlas_data = np.asarray(self.atlas_img.get_fdata())
        if atlas_data.ndim == 3:
            return atlas_data.astype(int)
        elif atlas_data.ndim == 4:
            return atlas_data.astype(np.float32)
        raise AtlasShapeError(ndim=atlas_data.ndim)

    def _prepare_stat_functions(
        self,
        stat_functions: Mapping[str, Callable[..., float]] | None = None,
        *,
        fallback: Mapping[str, Callable[..., float]] | None = None,
        stat_tier: str | None = None,
    ) -> list[Statistic]:
        """
        Generate a list of summary statistics to describe each ROI.

        Parameters
        ----------
        stat_functions : Mapping[str, Callable[..., float]] | None
            Either a mapping of statistic names to functions or None to use defaults.
            When provided, takes precedence over ``stat_tier``.
        fallback : Mapping[str, Callable[..., float]] | None, optional
            Fallback statistics to use if stat_functions is None, by default None.
            Ignored when ``stat_tier`` is set.
        stat_tier : str | None, optional
            Named tier to select when ``stat_functions`` is ``None``.
            Valid values: ``"core"``, ``"extended"``, ``"diagnostic"``, ``"all"``.

        Returns
        -------
        list[Statistic]
            List of prepared statistics.

        Raises
        ------
        ValueError
            If no statistical functions are provided, or if an unknown tier name
            is supplied.
        """
        if stat_functions is None:
            if stat_tier is not None:
                if stat_tier not in STATISTIC_TIERS:
                    valid = ", ".join(f'"{k}"' for k in STATISTIC_TIERS)
                    raise MissingStatisticalFunctionError(
                        message=f"Unknown stat_tier {stat_tier!r}. Valid tiers: {valid}."
                    )
                return STATISTIC_TIERS[stat_tier]
            if fallback is None:
                return BUILTIN_STATISTICS
            return fallback
        if isinstance(stat_functions, Mapping):
            prepared = [Statistic(name, func) for name, func in stat_functions.items()]
        elif isinstance(stat_functions, Sequence):
            if all(isinstance(s, Statistic) for s in stat_functions):
                prepared = list(stat_functions)
            else:
                raise MissingStatisticalFunctionError(message="Statistic sequence must contain Statistic instances.")
        else:
            raise MissingStatisticalFunctionError(
                message=f"stat_functions must be a Mapping or Sequence, got {type(stat_functions).__name__}"
            )

        if not prepared or len(prepared) == 0:
            raise MissingStatisticalFunctionError()
        return prepared

    def fit(self, scalar_img: nib.Nifti1Image | str | Path) -> None:
        """
        Fit the parcellator to a scalar image.

        Parameters
        ----------
        scalar_img : nib.Nifti1Image | str | Path
            Scalar image to fit to.
        """
        self.scalar_img = _load_nifti(scalar_img)
        if self.resampling_target in ("labels", "atlas"):
            ref_img = self.atlas_img
            interpolation = "continuous"
        else:
            ref_img = self.scalar_img
            interpolation = "nearest"

        # Cache atlas resampling by reference image identity (optimization 4.2)
        ref_id = id(ref_img)
        if not hasattr(self, "_cached_atlas_ref_id") or self._cached_atlas_ref_id != ref_id:
            atlas_interpolation = "continuous" if self._is_probabilistic else "nearest"
            self._prepared_atlas_img = self._prepare_map(self.atlas_img, ref_img, interpolation=atlas_interpolation)
            self._cached_atlas_ref_id = ref_id
            # Reset mask cache when atlas cache is invalidated
            if hasattr(self, "_cached_mask_ref_id"):
                delattr(self, "_cached_mask_ref_id")

        self._prepared_scalar_img = self._prepare_map(self.scalar_img, ref_img, interpolation=interpolation)

        # Cache mask resampling by reference image identity (optimization 4.1)
        if self.mask is not None:
            if not hasattr(self, "_cached_mask_ref_id") or self._cached_mask_ref_id != ref_id:
                self._prepared_mask = self._prepare_map(self.mask, ref_img, interpolation="nearest")
                self._cached_mask_ref_id = ref_id
            self._prepared_atlas_img = self._apply_mask_to_atlas()

        self.ref_img = ref_img
        if isinstance(scalar_img, (str, Path)):
            self._fitted_scalar_id = str(scalar_img)
        else:
            self._fitted_scalar_id = id(scalar_img)

    def transform(self, scalar_img: str | Path | nib.Nifti1Image) -> pd.DataFrame:
        """
        Apply the parcellation to the fitted scalar image.

        Returns
        -------
        pd.DataFrame
            DataFrame containing parcellation statistics for each region.
        """
        if not hasattr(self, "_prepared_atlas_img") or not hasattr(self, "_prepared_scalar_img"):
            raise ParcellatorNotFittedError()

        current_scalar_id: str | int = str(scalar_img) if isinstance(scalar_img, (str, Path)) else id(scalar_img)

        if self._fitted_scalar_id == current_scalar_id:
            prepared_scalar_img = self._prepared_scalar_img
        else:
            prepared_scalar_img = self._prepare_map(
                _load_nifti(scalar_img),
                self.ref_img,
                interpolation="continuous",
            )

        scalar_data = np.asarray(prepared_scalar_img.get_fdata())
        if self._is_probabilistic:
            atlas_data = np.asarray(self._prepared_atlas_img.get_fdata())
        else:
            atlas_data = np.asarray(self._prepared_atlas_img.get_fdata()).astype(int)
        if self.lut is not None:
            result = self.lut.copy()
        else:
            result = pd.DataFrame({"index": self._regions, "label": [str(r) for r in self._regions]})

        valid_region_ids = set(result["index"])
        stats_data = {}

        for region_id in self._regions:
            if region_id not in valid_region_ids:
                logger.warning("Region ID %d not found in LUT; skipping.", region_id)
                continue
            if self._is_probabilistic:
                vol_idx = region_id - 1  # 1-based LUT index → 0-based volume index
                prob_map = atlas_data[:, :, :, vol_idx]
                parcel_mask = prob_map > self.atlas_threshold
            else:
                parcel_mask = atlas_data == region_id
            parcel_values = scalar_data[parcel_mask]

            region_stats = {}
            for stat in self._stat_functions:
                stat_name = stat.name
                stat_func = stat.function
                if stat.requires_image:
                    stat_value = stat_func(parcel_values, prepared_scalar_img)
                else:
                    stat_value = stat_func(parcel_values)
                region_stats[stat_name] = stat_value
            stats_data[region_id] = region_stats

        stats_df = pd.DataFrame.from_dict(stats_data, orient="index")
        stats_df.index.name = "index"
        result = result.merge(stats_df, on="index", how="left")

        return result

regions property

Tuple of regions defined in the atlas.

__init__(atlas_img, labels=None, lut=None, *, mask=None, mask_threshold=0.0, atlas_threshold=0.0, background_label=0, resampling_target='data', stat_functions=None, stat_tier=None)

Initialize a volumetric parcellator

Parameters:

Name Type Description Default
atlas_img Nifti1Image | str | Path

The atlas image defining the parcellation.

required
labels Mapping[int, str] | Sequence[str] | None

Region labels mapping or sequence, by default None

None
lut DataFrame | str | Path | None

Lookup table for region labels, by default None. Must include columns "index" and "label" following the BIDS standard.

None
mask Nifti1Image | str | Path | None

Optional mask to apply to the atlas, by default None

None
mask_threshold float

Threshold for the mask image. Voxels with mask values strictly greater than this threshold are included; all others are excluded. Default is 0.0, which preserves the original behaviour of including every non-zero voxel. Use values in [0, 1] for probability maps (e.g. 0.5 to keep only voxels with >50 % gray-matter probability).

0.0
atlas_threshold float

Threshold for probabilistic (4D) atlas volumes. Voxels with probability strictly greater than this value are included in each region's mask. Default is 0.0 (any non-zero probability voxel is included). Only used when the atlas image is 4D. Ignored for 3D discrete atlases.

0.0
background_label int

Label value to treat as background, by default 0

0
resampling_target Literal['data', 'labels', None]

Resampling target for input maps, by default "data"

'data'
stat_functions Mapping[str, StatFunction] | None

Mapping of statistic names to functions, by default None. When provided, takes precedence over stat_tier.

None
stat_tier str | None

Named statistics tier to use when stat_functions is None. Valid values: "core", "extended", "diagnostic", "all". Defaults to None, which selects all built-in statistics (equivalent to "diagnostic").

None
Source code in src/parcellate/parcellation/volume.py
def __init__(
    self,
    atlas_img: nib.Nifti1Image | str | Path,
    labels: Mapping[int, str] | Sequence[str] | None = None,
    lut: pd.DataFrame | str | Path | None = None,
    *,
    mask: nib.Nifti1Image | str | Path | None = None,
    mask_threshold: float = 0.0,
    atlas_threshold: float = 0.0,
    background_label: int = 0,
    resampling_target: Literal["data", "labels", "atlas", None] = "data",
    stat_functions: Mapping[str, Callable[..., float]] | None = None,
    stat_tier: str | None = None,
) -> None:
    """
    Initialize a volumetric parcellator

    Parameters
    ----------
    atlas_img : nib.Nifti1Image | str | Path
        The atlas image defining the parcellation.
    labels : Mapping[int, str] | Sequence[str] | None, optional
        Region labels mapping or sequence, by default None
    lut : pd.DataFrame | str | Path | None, optional
        Lookup table for region labels, by default None. Must include columns
        "index" and "label" following the BIDS standard.
    mask : nib.Nifti1Image | str | Path | None, optional
        Optional mask to apply to the atlas, by default None
    mask_threshold : float, optional
        Threshold for the mask image. Voxels with mask values strictly greater
        than this threshold are included; all others are excluded. Default is
        ``0.0``, which preserves the original behaviour of including every
        non-zero voxel. Use values in [0, 1] for probability maps (e.g.
        ``0.5`` to keep only voxels with >50 % gray-matter probability).
    atlas_threshold : float, optional
        Threshold for probabilistic (4D) atlas volumes. Voxels with probability
        strictly greater than this value are included in each region's mask.
        Default is ``0.0`` (any non-zero probability voxel is included). Only
        used when the atlas image is 4D. Ignored for 3D discrete atlases.
    background_label : int, optional
        Label value to treat as background, by default 0
    resampling_target : Literal["data", "labels", None], optional
        Resampling target for input maps, by default "data"
    stat_functions : Mapping[str, StatFunction] | None, optional
        Mapping of statistic names to functions, by default None.
        When provided, takes precedence over ``stat_tier``.
    stat_tier : str | None, optional
        Named statistics tier to use when ``stat_functions`` is ``None``.
        Valid values: ``"core"``, ``"extended"``, ``"diagnostic"``, ``"all"``.
        Defaults to ``None``, which selects all built-in statistics
        (equivalent to ``"diagnostic"``).
    """
    self.atlas_img = _load_nifti(atlas_img)
    self.lut = self._load_atlas_lut(lut) if lut is not None else None
    self.mask_threshold = float(mask_threshold)
    self.atlas_threshold = float(atlas_threshold)
    self.mask = self._load_mask(mask) if mask is not None else None
    self.background_label = int(background_label)
    self.resampling_target = resampling_target
    self._atlas_data = self._load_atlas_data()
    self._is_probabilistic = self._atlas_data.ndim == 4
    self._regions = self._build_regions(labels)
    self._stat_functions = self._prepare_stat_functions(stat_functions, stat_tier=stat_tier)
    self._fitted_scalar_id: str | int | None = None

fit(scalar_img)

Fit the parcellator to a scalar image.

Parameters:

Name Type Description Default
scalar_img Nifti1Image | str | Path

Scalar image to fit to.

required
Source code in src/parcellate/parcellation/volume.py
def fit(self, scalar_img: nib.Nifti1Image | str | Path) -> None:
    """
    Fit the parcellator to a scalar image.

    Parameters
    ----------
    scalar_img : nib.Nifti1Image | str | Path
        Scalar image to fit to.
    """
    self.scalar_img = _load_nifti(scalar_img)
    if self.resampling_target in ("labels", "atlas"):
        ref_img = self.atlas_img
        interpolation = "continuous"
    else:
        ref_img = self.scalar_img
        interpolation = "nearest"

    # Cache atlas resampling by reference image identity (optimization 4.2)
    ref_id = id(ref_img)
    if not hasattr(self, "_cached_atlas_ref_id") or self._cached_atlas_ref_id != ref_id:
        atlas_interpolation = "continuous" if self._is_probabilistic else "nearest"
        self._prepared_atlas_img = self._prepare_map(self.atlas_img, ref_img, interpolation=atlas_interpolation)
        self._cached_atlas_ref_id = ref_id
        # Reset mask cache when atlas cache is invalidated
        if hasattr(self, "_cached_mask_ref_id"):
            delattr(self, "_cached_mask_ref_id")

    self._prepared_scalar_img = self._prepare_map(self.scalar_img, ref_img, interpolation=interpolation)

    # Cache mask resampling by reference image identity (optimization 4.1)
    if self.mask is not None:
        if not hasattr(self, "_cached_mask_ref_id") or self._cached_mask_ref_id != ref_id:
            self._prepared_mask = self._prepare_map(self.mask, ref_img, interpolation="nearest")
            self._cached_mask_ref_id = ref_id
        self._prepared_atlas_img = self._apply_mask_to_atlas()

    self.ref_img = ref_img
    if isinstance(scalar_img, (str, Path)):
        self._fitted_scalar_id = str(scalar_img)
    else:
        self._fitted_scalar_id = id(scalar_img)

transform(scalar_img)

Apply the parcellation to the fitted scalar image.

Returns:

Type Description
DataFrame

DataFrame containing parcellation statistics for each region.

Source code in src/parcellate/parcellation/volume.py
def transform(self, scalar_img: str | Path | nib.Nifti1Image) -> pd.DataFrame:
    """
    Apply the parcellation to the fitted scalar image.

    Returns
    -------
    pd.DataFrame
        DataFrame containing parcellation statistics for each region.
    """
    if not hasattr(self, "_prepared_atlas_img") or not hasattr(self, "_prepared_scalar_img"):
        raise ParcellatorNotFittedError()

    current_scalar_id: str | int = str(scalar_img) if isinstance(scalar_img, (str, Path)) else id(scalar_img)

    if self._fitted_scalar_id == current_scalar_id:
        prepared_scalar_img = self._prepared_scalar_img
    else:
        prepared_scalar_img = self._prepare_map(
            _load_nifti(scalar_img),
            self.ref_img,
            interpolation="continuous",
        )

    scalar_data = np.asarray(prepared_scalar_img.get_fdata())
    if self._is_probabilistic:
        atlas_data = np.asarray(self._prepared_atlas_img.get_fdata())
    else:
        atlas_data = np.asarray(self._prepared_atlas_img.get_fdata()).astype(int)
    if self.lut is not None:
        result = self.lut.copy()
    else:
        result = pd.DataFrame({"index": self._regions, "label": [str(r) for r in self._regions]})

    valid_region_ids = set(result["index"])
    stats_data = {}

    for region_id in self._regions:
        if region_id not in valid_region_ids:
            logger.warning("Region ID %d not found in LUT; skipping.", region_id)
            continue
        if self._is_probabilistic:
            vol_idx = region_id - 1  # 1-based LUT index → 0-based volume index
            prob_map = atlas_data[:, :, :, vol_idx]
            parcel_mask = prob_map > self.atlas_threshold
        else:
            parcel_mask = atlas_data == region_id
        parcel_values = scalar_data[parcel_mask]

        region_stats = {}
        for stat in self._stat_functions:
            stat_name = stat.name
            stat_func = stat.function
            if stat.requires_image:
                stat_value = stat_func(parcel_values, prepared_scalar_img)
            else:
                stat_value = stat_func(parcel_values)
            region_stats[stat_name] = stat_value
        stats_data[region_id] = region_stats

    stats_df = pd.DataFrame.from_dict(stats_data, orient="index")
    stats_df.index.name = "index"
    result = result.merge(stats_df, on="index", how="left")

    return result

Metrics

base

Statistic dataclass

Container for a parcellation statistic.

Source code in src/parcellate/metrics/base.py
@dataclass
class Statistic:
    """Container for a parcellation statistic."""

    name: str
    function: Callable[..., Any]
    requires_image: bool = False

volume

A battery of volumetric parcellation statistics.

volume(parcel_values, scalar_img)

Compute the actual tissue volume within a mask using modulated images.

Parameters:

Name Type Description Default
parcel_values ndarray

The modulated tissue segment values within the ROI.

required
scalar_img Nifti1Image

The modulated tissue segment (e.g., mwp1.nii).

required
Source code in src/parcellate/metrics/volume.py
def volume(parcel_values: np.ndarray, scalar_img: nib.Nifti1Image) -> float:
    """Compute the actual tissue volume within a mask using modulated images.

    Parameters
    ----------
    parcel_values : np.ndarray
        The modulated tissue segment values within the ROI.
    scalar_img : nib.Nifti1Image
        The modulated tissue segment (e.g., mwp1.nii).
    """
    # Calculate the volume of a single voxel in mm^3
    voxel_sizes = scalar_img.header.get_zooms()[:3]
    voxel_volume = np.prod(voxel_sizes)

    # Correct step: Sum the intensities within the mask
    # This represents the sum of tissue fractions/volume units
    tissue_sum = np.nansum(parcel_values)

    # Total volume in mm^3
    total_volume = tissue_sum * voxel_volume

    return float(total_volume)

voxel_count(parcel_values)

Compute the count of non-zero voxels in a parcel.

This function counts the number of voxels in a parcel that have non-zero and non-NaN values. This is useful for modulated tissue maps (e.g., CAT12's mwp* files) where zero values indicate no tissue present.

Parameters:

Name Type Description Default
parcel_values ndarray

An array of scalar values for voxels within the parcel.

required

Returns:

Type Description
int

The number of non-zero, non-NaN voxels in the parcel.

Source code in src/parcellate/metrics/volume.py
def voxel_count(parcel_values: np.ndarray) -> int:
    """Compute the count of non-zero voxels in a parcel.

    This function counts the number of voxels in a parcel that have
    non-zero and non-NaN values. This is useful for modulated tissue
    maps (e.g., CAT12's mwp* files) where zero values indicate no
    tissue present.

    Parameters
    ----------
    parcel_values : np.ndarray
        An array of scalar values for voxels within the parcel.

    Returns
    -------
    int
        The number of non-zero, non-NaN voxels in the parcel.
    """
    num_voxels = np.sum(parcel_values.astype(bool))
    return int(num_voxels)

z_filtered_mean(values, z_thresh=3.0)

Compute the mean of values after applying a z-score filter.

Parameters:

Name Type Description Default
values ndarray

The array of values to filter and compute the mean from.

required
z_thresh float

The z-score threshold for filtering, by default 3.0.

3.0

Returns:

Type Description
float

The mean of the filtered values.

Source code in src/parcellate/metrics/volume.py
def z_filtered_mean(values: np.ndarray, z_thresh: float = 3.0) -> float:
    """Compute the mean of values after applying a z-score filter.

    Parameters
    ----------
    values : np.ndarray
        The array of values to filter and compute the mean from.
    z_thresh : float, optional
        The z-score threshold for filtering, by default 3.0.

    Returns
    -------
    float
        The mean of the filtered values.
    """
    filtered = _z_score_filter(values, z_thresh)
    if filtered.size == 0:
        return float(np.nanmean(values))
    return float(np.nanmean(filtered))

z_filtered_std(values, z_thresh=3.0)

Compute the standard deviation of values after applying a z-score filter.

Parameters:

Name Type Description Default
values ndarray

The array of values to filter and compute the standard deviation from.

required
z_thresh float

The z-score threshold for filtering, by default 3.0.

3.0

Returns:

Type Description
float

The standard deviation of the filtered values.

Source code in src/parcellate/metrics/volume.py
def z_filtered_std(values: np.ndarray, z_thresh: float = 3.0) -> float:
    """Compute the standard deviation of values after applying a z-score filter.

    Parameters
    ----------
    values : np.ndarray
        The array of values to filter and compute the standard deviation from.
    z_thresh : float, optional
        The z-score threshold for filtering, by default 3.0.

    Returns
    -------
    float
        The standard deviation of the filtered values.
    """
    # If std is 0, all values are identical, so filtered std is also 0
    if np.nanstd(values) == 0:
        return 0.0
    filtered = _z_score_filter(values, z_thresh)
    if filtered.size == 0:
        return float(np.nanstd(values))
    return float(np.nanstd(filtered))

iqr_filtered_mean(values, factor=1.5)

Compute the mean of values after applying an interquartile range (IQR) filter.

Parameters:

Name Type Description Default
values ndarray

The array of values to filter and compute the mean from.

required
factor float

The IQR factor for filtering, by default 1.5.

1.5

Returns:

Type Description
float

The mean of the filtered values.

Source code in src/parcellate/metrics/volume.py
def iqr_filtered_mean(values: np.ndarray, factor: float = 1.5) -> float:
    """Compute the mean of values after applying an interquartile range (IQR) filter.

    Parameters
    ----------
    values : np.ndarray
        The array of values to filter and compute the mean from.
    factor : float, optional
        The IQR factor for filtering, by default 1.5.

    Returns
    -------
    float
        The mean of the filtered values.
    """
    filtered = _iqr_filter(values, factor)
    if filtered.size == 0:
        return float(np.nanmean(values))
    return float(np.nanmean(filtered))

iqr_filtered_std(values, factor=1.5)

Compute the standard deviation of values after applying an interquartile range (IQR) filter.

Parameters:

Name Type Description Default
values ndarray

The array of values to filter and compute the standard deviation from.

required
factor float

The IQR factor for filtering, by default 1.5.

1.5

Returns:

Type Description
float

The standard deviation of the filtered values.

Source code in src/parcellate/metrics/volume.py
def iqr_filtered_std(values: np.ndarray, factor: float = 1.5) -> float:
    """Compute the standard deviation of values after applying an interquartile range (IQR) filter.

    Parameters
    ----------
    values : np.ndarray
        The array of values to filter and compute the standard deviation from.
    factor : float, optional
        The IQR factor for filtering, by default 1.5.

    Returns
    -------
    float
        The standard deviation of the filtered values.
    """
    filtered = _iqr_filter(values, factor)
    if filtered.size == 0:
        return float(np.nanstd(values))
    return float(np.nanstd(filtered))

robust_mean(values)

Compute the robust mean of values using median and MAD.

Parameters:

Name Type Description Default
values ndarray

The array of values to compute the robust mean from.

required

Returns:

Type Description
float

The robust mean of the values.

Source code in src/parcellate/metrics/volume.py
def robust_mean(values: np.ndarray) -> float:
    """Compute the robust mean of values using median and MAD.

    Parameters
    ----------
    values : np.ndarray
        The array of values to compute the robust mean from.

    Returns
    -------
    float
        The robust mean of the values.
    """
    filtered = _robust_filter(values)
    if filtered.size == 0:
        return float(np.nanmedian(values))
    return float(np.nanmean(filtered))

robust_std(values)

Compute the robust standard deviation of values using median and MAD.

Parameters:

Name Type Description Default
values ndarray

The array of values to compute the robust standard deviation from.

required

Returns:

Type Description
float

The robust standard deviation of the values.

Source code in src/parcellate/metrics/volume.py
def robust_std(values: np.ndarray) -> float:
    """Compute the robust standard deviation of values using median and MAD.

    Parameters
    ----------
    values : np.ndarray
        The array of values to compute the robust standard deviation from.

    Returns
    -------
    float
        The robust standard deviation of the values.
    """
    filtered = _robust_filter(values)
    if filtered.size == 0:
        return float(np.nanstd(values))
    return float(np.nanstd(filtered))

mad_median(values)

Compute the median absolute deviation (MAD) of values.

Parameters:

Name Type Description Default
values ndarray

The array of values to compute the MAD from.

required

Returns:

Type Description
float

The MAD of the values.

Source code in src/parcellate/metrics/volume.py
def mad_median(values: np.ndarray) -> float:
    """Compute the median absolute deviation (MAD) of values.

    Parameters
    ----------
    values : np.ndarray
        The array of values to compute the MAD from.

    Returns
    -------
    float
        The MAD of the values.
    """
    median_val = np.nanmedian(values)
    mad = np.nanmedian(np.abs(values - median_val))
    return float(mad)

volsum(values)

Compute the sum of values.

Parameters:

Name Type Description Default
values ndarray

The array of values to compute the sum from.

required

Returns:

Type Description
float

The sum of the values.

Source code in src/parcellate/metrics/volume.py
def volsum(values: np.ndarray) -> float:
    """Compute the sum of values.

    Parameters
    ----------
    values : np.ndarray
        The array of values to compute the sum from.

    Returns
    -------
    float
        The sum of the values.
    """
    return float(np.nansum(values))

cv(values)

Classical coefficient of variation (CV = SD / mean).

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Coefficient of variation. Higher values indicate more relative variability.

Notes

Sensitive to mean approaching zero. For metrics that can be zero or negative, consider using robust_cv instead.

Typical values for neuroimaging: - Cortical thickness: CV ~ 0.10 - FA: CV ~ 0.15 - MD: CV ~ 0.20

Source code in src/parcellate/metrics/volume.py
def cv(values):
    """
    Classical coefficient of variation (CV = SD / mean).

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Coefficient of variation. Higher values indicate more relative variability.

    Notes
    -----
    Sensitive to mean approaching zero. For metrics that can be zero or negative,
    consider using robust_cv instead.

    Typical values for neuroimaging:
    - Cortical thickness: CV ~ 0.10
    - FA: CV ~ 0.15
    - MD: CV ~ 0.20
    """
    return np.std(values) / (np.mean(values) + 1e-10)

robust_cv(values)

Robust coefficient of variation (IQR / median).

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Robust coefficient of variation. Less sensitive to outliers than classical CV.

Notes

Recommended when outliers are present or suspected. More stable than classical CV for distributions with heavy tails.

Source code in src/parcellate/metrics/volume.py
def robust_cv(values):
    """
    Robust coefficient of variation (IQR / median).

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Robust coefficient of variation. Less sensitive to outliers than classical CV.

    Notes
    -----
    Recommended when outliers are present or suspected. More stable than classical
    CV for distributions with heavy tails.
    """
    return iqr(values) / (np.median(values) + 1e-10)

quartile_dispersion(values)

Quartile coefficient of dispersion.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Quartile dispersion coefficient: (Q3 - Q1) / (Q3 + Q1)

Notes

Scale-free measure of dispersion. Bounded between 0 and 1. Less affected by extreme values than CV.

Source code in src/parcellate/metrics/volume.py
def quartile_dispersion(values):
    """
    Quartile coefficient of dispersion.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Quartile dispersion coefficient: (Q3 - Q1) / (Q3 + Q1)

    Notes
    -----
    Scale-free measure of dispersion. Bounded between 0 and 1.
    Less affected by extreme values than CV.
    """
    clean = np.asarray(values)
    clean = clean[np.isfinite(clean)]
    if len(clean) == 0:
        return float("nan")
    q1, q3 = np.percentile(clean, [25, 75])
    return (q3 - q1) / (q3 + q1 + 1e-10)

skewness(values)

Skewness (third standardized moment).

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Skewness value. 0 = symmetric, >0 = right tail, <0 = left tail.

Notes

Problematic if |skewness| > 0.5.

Neuroimaging context: - Cortical thickness often shows negative skew (-0.2 to -0.8) due to partial volume effects - FA often shows positive skew (0.3 to 1.2) due to crossing fibers

Source code in src/parcellate/metrics/volume.py
def skewness(values):
    """
    Skewness (third standardized moment).

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Skewness value. 0 = symmetric, >0 = right tail, <0 = left tail.

    Notes
    -----
    Problematic if |skewness| > 0.5.

    Neuroimaging context:
    - Cortical thickness often shows negative skew (-0.2 to -0.8) due to
      partial volume effects
    - FA often shows positive skew (0.3 to 1.2) due to crossing fibers
    """
    return float(skew(values, nan_policy="omit"))

excess_kurtosis(values)

Excess kurtosis (fourth standardized moment minus 3).

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Excess kurtosis. 0 = normal, >0 = heavy tails, <0 = light tails.

Notes

Problematic if |kurtosis| > 1.0.

Positive kurtosis indicates outlier-prone distributions. Common in diffusion metrics due to partial volume effects with CSF.

Source code in src/parcellate/metrics/volume.py
def excess_kurtosis(values):
    """
    Excess kurtosis (fourth standardized moment minus 3).

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Excess kurtosis. 0 = normal, >0 = heavy tails, <0 = light tails.

    Notes
    -----
    Problematic if |kurtosis| > 1.0.

    Positive kurtosis indicates outlier-prone distributions. Common in diffusion
    metrics due to partial volume effects with CSF.
    """
    return float(scipy_kurtosis(values, nan_policy="omit"))

abs_skewness(values)

Absolute value of skewness.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Absolute skewness. Useful for categorizing without caring about direction.

Source code in src/parcellate/metrics/volume.py
def abs_skewness(values):
    """
    Absolute value of skewness.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Absolute skewness. Useful for categorizing without caring about direction.
    """
    return float(np.abs(skew(values, nan_policy="omit")))

abs_excess_kurtosis(values)

Absolute value of excess kurtosis.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Absolute excess kurtosis.

Source code in src/parcellate/metrics/volume.py
def abs_excess_kurtosis(values):
    """
    Absolute value of excess kurtosis.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Absolute excess kurtosis.
    """
    return float(np.abs(scipy_kurtosis(values, nan_policy="omit")))

bimodality_coefficient(values)

Bimodality coefficient (Pfister et al. 2013).

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Bimodality coefficient. Values > 0.555 suggest bimodal/multimodal distribution.

Notes

Calculated as: BC = (skew² + 1) / (kurtosis + 3(n-1)²/((n-2)(n-3)))

Critical for cortical thickness ROIs that span gyri and sulci, which often show bimodal distributions.

References

Pfister R, Schwarz KA, Janczyk M, Dale R, Freeman JB (2013). Good things peak in pairs: a note on the bimodality coefficient. Front Psychol 4:700.

Source code in src/parcellate/metrics/volume.py
def bimodality_coefficient(values):
    """
    Bimodality coefficient (Pfister et al. 2013).

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Bimodality coefficient. Values > 0.555 suggest bimodal/multimodal distribution.

    Notes
    -----
    Calculated as: BC = (skew² + 1) / (kurtosis + 3(n-1)²/((n-2)(n-3)))

    Critical for cortical thickness ROIs that span gyri and sulci, which often
    show bimodal distributions.

    References
    ----------
    Pfister R, Schwarz KA, Janczyk M, Dale R, Freeman JB (2013).
    Good things peak in pairs: a note on the bimodality coefficient.
    Front Psychol 4:700.
    """
    n = len(values)
    if n < 4:
        return np.nan

    skew_val = skew(values, nan_policy="omit")
    kurt_val = scipy_kurtosis(values, nan_policy="omit")

    m3_squared = skew_val**2
    m4 = kurt_val + 3  # Convert excess kurtosis to raw kurtosis

    bc = (m3_squared + 1) / (m4 + 3 * ((n - 1) ** 2) / ((n - 2) * (n - 3)))
    return float(bc)

prop_outliers_2sd(values)

Proportion of values beyond ±2 standard deviations from mean.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Proportion of outliers (0 to 1). Expected ~0.046 for normal distribution.

Notes

Problematic if > 0.10 (10%).

Source code in src/parcellate/metrics/volume.py
def prop_outliers_2sd(values):
    """
    Proportion of values beyond ±2 standard deviations from mean.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Proportion of outliers (0 to 1). Expected ~0.046 for normal distribution.

    Notes
    -----
    Problematic if > 0.10 (10%).
    """
    mean, std = np.mean(values), np.std(values)
    return np.mean(np.abs(values - mean) > 2 * std)

prop_outliers_3sd(values)

Proportion of values beyond ±3 standard deviations from mean.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Proportion of outliers (0 to 1). Expected ~0.003 for normal distribution.

Notes

Problematic if > 0.02 (2%). High values suggest segmentation errors or image quality issues.

Source code in src/parcellate/metrics/volume.py
def prop_outliers_3sd(values):
    """
    Proportion of values beyond ±3 standard deviations from mean.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Proportion of outliers (0 to 1). Expected ~0.003 for normal distribution.

    Notes
    -----
    Problematic if > 0.02 (2%).
    High values suggest segmentation errors or image quality issues.
    """
    mean, std = np.mean(values), np.std(values)
    return np.mean(np.abs(values - mean) > 3 * std)

prop_outliers_iqr(values)

Proportion of Tukey fence outliers (beyond Q1 - 1.5IQR or Q3 + 1.5IQR).

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Proportion of outliers (0 to 1). Expected ~0.007 for normal distribution.

Notes

More robust than SD-based outlier detection. This is the standard boxplot definition of outliers. Problematic if > 0.05 (5%).

Source code in src/parcellate/metrics/volume.py
def prop_outliers_iqr(values):
    """
    Proportion of Tukey fence outliers (beyond Q1 - 1.5*IQR or Q3 + 1.5*IQR).

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Proportion of outliers (0 to 1). Expected ~0.007 for normal distribution.

    Notes
    -----
    More robust than SD-based outlier detection. This is the standard boxplot
    definition of outliers. Problematic if > 0.05 (5%).
    """
    clean = np.asarray(values)
    clean = clean[np.isfinite(clean)]
    if len(clean) == 0:
        return float("nan")
    q1, q3 = np.percentile(clean, [25, 75])
    iqr_val = q3 - q1
    lower_fence = q1 - 1.5 * iqr_val
    upper_fence = q3 + 1.5 * iqr_val
    return float(np.mean((clean < lower_fence) | (clean > upper_fence)))

left_tail_mass(values)

Proportion of values in left tail (below mean - 2*SD).

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Left tail mass. Expected ~0.023 for normal distribution.

Source code in src/parcellate/metrics/volume.py
def left_tail_mass(values):
    """
    Proportion of values in left tail (below mean - 2*SD).

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Left tail mass. Expected ~0.023 for normal distribution.
    """
    mean, std = np.mean(values), np.std(values)
    return np.mean(values < (mean - 2 * std))

right_tail_mass(values)

Proportion of values in right tail (above mean + 2*SD).

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Right tail mass. Expected ~0.023 for normal distribution.

Source code in src/parcellate/metrics/volume.py
def right_tail_mass(values):
    """
    Proportion of values in right tail (above mean + 2*SD).

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Right tail mass. Expected ~0.023 for normal distribution.
    """
    mean, std = np.mean(values), np.std(values)
    return np.mean(values > (mean + 2 * std))

tail_asymmetry(values)

Difference between right and left tail masses.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Tail asymmetry. >0 = heavier right tail, <0 = heavier left tail.

Notes

Expected ~0 for symmetric distributions. Problematic if |asymmetry| > 0.02. Helps identify which tail is causing distributional problems.

Source code in src/parcellate/metrics/volume.py
def tail_asymmetry(values):
    """
    Difference between right and left tail masses.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Tail asymmetry. >0 = heavier right tail, <0 = heavier left tail.

    Notes
    -----
    Expected ~0 for symmetric distributions. Problematic if |asymmetry| > 0.02.
    Helps identify which tail is causing distributional problems.
    """
    return right_tail_mass(values) - left_tail_mass(values)

excess_tail_mass(values)

Total excess tail mass beyond what's expected for normal distribution.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Excess tail mass. >0 indicates heavy tails.

Notes

For normal distribution, total tail mass (beyond ±2SD) = 4.6%. Excess tail mass = observed - expected. Problematic if > 0.02 (2% excess).

Source code in src/parcellate/metrics/volume.py
def excess_tail_mass(values):
    """
    Total excess tail mass beyond what's expected for normal distribution.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Excess tail mass. >0 indicates heavy tails.

    Notes
    -----
    For normal distribution, total tail mass (beyond ±2SD) = 4.6%.
    Excess tail mass = observed - expected.
    Problematic if > 0.02 (2% excess).
    """
    normal_tail_mass = 0.023  # Each tail
    total_observed = left_tail_mass(values) + right_tail_mass(values)
    return total_observed - 2 * normal_tail_mass

dagostino_k2(values)

D'Agostino-Pearson K² statistic for normality test.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values (requires n ≥ 20)

required

Returns:

Type Description
float

K² test statistic (chi-squared distributed with 2 df)

Notes

Returns NaN if n < 20. Higher values indicate stronger deviation from normality.

See also

dagostino_p : Corresponding p-value

Source code in src/parcellate/metrics/volume.py
def dagostino_k2(values):
    """
    D'Agostino-Pearson K² statistic for normality test.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values (requires n ≥ 20)

    Returns
    -------
    float
        K² test statistic (chi-squared distributed with 2 df)

    Notes
    -----
    Returns NaN if n < 20. Higher values indicate stronger deviation from normality.

    See also
    --------
    dagostino_p : Corresponding p-value
    """
    if len(values) < 20:
        return np.nan
    k_stat, _ = normaltest(values)
    return k_stat

dagostino_p(values)

D'Agostino-Pearson p-value for normality test.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values (requires n ≥ 20)

required

Returns:

Type Description
float

p-value. Values < 0.05 reject null hypothesis of normality.

Notes

Returns NaN if n < 20. Combines tests of skewness and kurtosis. More computationally efficient than Shapiro-Wilk for large samples.

Source code in src/parcellate/metrics/volume.py
def dagostino_p(values):
    """
    D'Agostino-Pearson p-value for normality test.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values (requires n ≥ 20)

    Returns
    -------
    float
        p-value. Values < 0.05 reject null hypothesis of normality.

    Notes
    -----
    Returns NaN if n < 20. Combines tests of skewness and kurtosis.
    More computationally efficient than Shapiro-Wilk for large samples.
    """
    if len(values) < 20:
        return np.nan
    _, p_val = normaltest(values)
    return p_val

log_dagostino_k2(values)

Log-transformed D'Agostino K² statistic.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values (requires n ≥ 20)

required

Returns:

Type Description
float

log(K²). Useful for visualization when K² spans many orders of magnitude.

Source code in src/parcellate/metrics/volume.py
def log_dagostino_k2(values):
    """
    Log-transformed D'Agostino K² statistic.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values (requires n ≥ 20)

    Returns
    -------
    float
        log(K²). Useful for visualization when K² spans many orders of magnitude.
    """
    k_stat = dagostino_k2(values)
    if np.isnan(k_stat):
        return np.nan
    return np.log(k_stat + 1e-10)

shapiro_w(values)

Shapiro-Wilk W statistic for normality test.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values (requires 3 ≤ n ≤ 5000)

required

Returns:

Type Description
float

W statistic (ranges 0 to 1). Values close to 1 indicate normality.

Notes

Returns NaN if n < 3 or n > 5000 or if test fails. Most powerful normality test but computationally expensive for large samples. Problematic if W < 0.95.

See also

shapiro_p : Corresponding p-value

Source code in src/parcellate/metrics/volume.py
def shapiro_w(values):
    """
    Shapiro-Wilk W statistic for normality test.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values (requires 3 ≤ n ≤ 5000)

    Returns
    -------
    float
        W statistic (ranges 0 to 1). Values close to 1 indicate normality.

    Notes
    -----
    Returns NaN if n < 3 or n > 5000 or if test fails.
    Most powerful normality test but computationally expensive for large samples.
    Problematic if W < 0.95.

    See also
    --------
    shapiro_p : Corresponding p-value
    """
    if not (3 <= len(values) <= 5000):
        return np.nan
    try:
        w_stat, _ = shapiro(values)
    except Exception:
        return np.nan
    else:
        return w_stat

shapiro_p(values)

Shapiro-Wilk p-value for normality test.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values (requires 3 ≤ n ≤ 5000)

required

Returns:

Type Description
float

p-value. Values < 0.05 reject null hypothesis of normality.

Notes

Returns NaN if n < 3 or n > 5000 or if test fails.

Source code in src/parcellate/metrics/volume.py
def shapiro_p(values):
    """
    Shapiro-Wilk p-value for normality test.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values (requires 3 ≤ n ≤ 5000)

    Returns
    -------
    float
        p-value. Values < 0.05 reject null hypothesis of normality.

    Notes
    -----
    Returns NaN if n < 3 or n > 5000 or if test fails.
    """
    if not (3 <= len(values) <= 5000):
        return np.nan
    try:
        _, p_val = shapiro(values)
    except Exception:
        return np.nan
    else:
        return p_val

qq_correlation(values)

Correlation coefficient from Q-Q plot (sample vs. theoretical normal quantiles).

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

R value from Q-Q plot (ranges -1 to 1). Values close to 1 indicate normality.

Notes

Quantifies the linearity of the Q-Q plot. Problematic if R² < 0.95.

See also

qq_r_squared : R² version (more commonly reported)

Source code in src/parcellate/metrics/volume.py
def qq_correlation(values):
    """
    Correlation coefficient from Q-Q plot (sample vs. theoretical normal quantiles).

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        R value from Q-Q plot (ranges -1 to 1). Values close to 1 indicate normality.

    Notes
    -----
    Quantifies the linearity of the Q-Q plot. Problematic if R² < 0.95.

    See also
    --------
    qq_r_squared : R² version (more commonly reported)
    """
    (_osm, _osr), (_slope, _intercept, r) = probplot(values, dist="norm", fit=True)
    return r

qq_r_squared(values)

R² from Q-Q plot regression.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

R² (ranges 0 to 1). Values close to 1 indicate good fit to normal distribution.

Notes

Problematic if R² < 0.95. Single-number summary of Q-Q plot linearity.

Source code in src/parcellate/metrics/volume.py
def qq_r_squared(values):
    """
    R² from Q-Q plot regression.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        R² (ranges 0 to 1). Values close to 1 indicate good fit to normal distribution.

    Notes
    -----
    Problematic if R² < 0.95. Single-number summary of Q-Q plot linearity.
    """
    r = qq_correlation(values)
    return r**2

histogram_entropy(values, bins='auto')

Shannon entropy of histogram.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required
bins int or str

Number of histogram bins or binning strategy. Default 'auto'.

'auto'

Returns:

Type Description
float

Entropy in bits. Higher values indicate more uniform/spread out distributions.

Notes

Useful for comparing distributional complexity across ROIs. Not directly interpretable for normality assessment.

Source code in src/parcellate/metrics/volume.py
def histogram_entropy(values, bins="auto"):
    """
    Shannon entropy of histogram.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values
    bins : int or str, optional
        Number of histogram bins or binning strategy. Default 'auto'.

    Returns
    -------
    float
        Entropy in bits. Higher values indicate more uniform/spread out distributions.

    Notes
    -----
    Useful for comparing distributional complexity across ROIs.
    Not directly interpretable for normality assessment.
    """
    valid_values = values[~np.isnan(values)]
    if len(valid_values) == 0:
        return np.nan

    hist, _ = np.histogram(valid_values, bins=bins, density=True)
    hist = hist / hist.sum()  # Normalize to probabilities
    hist = hist[hist > 0]  # Remove zero bins
    return float(entropy(hist, base=2))

percentile_5(values)

5th percentile.

Source code in src/parcellate/metrics/volume.py
def percentile_5(values):
    """5th percentile."""
    return float(np.nanpercentile(values, 5))

percentile_10(values)

10th percentile.

Source code in src/parcellate/metrics/volume.py
def percentile_10(values):
    """10th percentile."""
    return float(np.nanpercentile(values, 10))

percentile_25(values)

25th percentile (Q1).

Source code in src/parcellate/metrics/volume.py
def percentile_25(values):
    """25th percentile (Q1)."""
    return float(np.nanpercentile(values, 25))

percentile_50(values)

50th percentile (median).

Source code in src/parcellate/metrics/volume.py
def percentile_50(values):
    """50th percentile (median)."""
    return float(np.nanpercentile(values, 50))

percentile_75(values)

75th percentile (Q3).

Source code in src/parcellate/metrics/volume.py
def percentile_75(values):
    """75th percentile (Q3)."""
    return float(np.nanpercentile(values, 75))

percentile_90(values)

90th percentile.

Source code in src/parcellate/metrics/volume.py
def percentile_90(values):
    """90th percentile."""
    return float(np.nanpercentile(values, 90))

percentile_95(values)

95th percentile.

Source code in src/parcellate/metrics/volume.py
def percentile_95(values):
    """95th percentile."""
    return float(np.nanpercentile(values, 95))

is_strongly_skewed(values, threshold=0.5)

Whether distribution is strongly skewed.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required
threshold float

Absolute skewness threshold. Default 0.5.

0.5

Returns:

Type Description
bool

True if |skewness| > threshold

Source code in src/parcellate/metrics/volume.py
def is_strongly_skewed(values, threshold=0.5):
    """
    Whether distribution is strongly skewed.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values
    threshold : float, optional
        Absolute skewness threshold. Default 0.5.

    Returns
    -------
    bool
        True if |skewness| > threshold
    """
    return abs_skewness(values) > threshold

is_heavy_tailed(values, threshold=1.0)

Whether distribution has heavy tails.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required
threshold float

Absolute excess kurtosis threshold. Default 1.0.

1.0

Returns:

Type Description
bool

True if |excess_kurtosis| > threshold

Source code in src/parcellate/metrics/volume.py
def is_heavy_tailed(values, threshold=1.0):
    """
    Whether distribution has heavy tails.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values
    threshold : float, optional
        Absolute excess kurtosis threshold. Default 1.0.

    Returns
    -------
    bool
        True if |excess_kurtosis| > threshold
    """
    return abs_excess_kurtosis(values) > threshold

is_bimodal(values, threshold=0.555)

Whether distribution appears bimodal.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required
threshold float

Bimodality coefficient threshold. Default 0.555.

0.555

Returns:

Type Description
bool

True if bimodality_coefficient > threshold

Source code in src/parcellate/metrics/volume.py
def is_bimodal(values, threshold=0.555):
    """
    Whether distribution appears bimodal.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values
    threshold : float, optional
        Bimodality coefficient threshold. Default 0.555.

    Returns
    -------
    bool
        True if bimodality_coefficient > threshold
    """
    return bimodality_coefficient(values) > threshold

has_outliers(values, threshold=0.01)

Whether distribution has excessive outliers.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required
threshold float

Proportion threshold for 3SD outliers. Default 0.01 (1%).

0.01

Returns:

Type Description
bool

True if proportion of 3SD outliers > threshold

Source code in src/parcellate/metrics/volume.py
def has_outliers(values, threshold=0.01):
    """
    Whether distribution has excessive outliers.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values
    threshold : float, optional
        Proportion threshold for 3SD outliers. Default 0.01 (1%).

    Returns
    -------
    bool
        True if proportion of 3SD outliers > threshold
    """
    return prop_outliers_3sd(values) > threshold

fails_normality(values)

Whether distribution fails normality tests.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
bool

True if Shapiro-Wilk p < 0.05 (if available), else D'Agostino p < 0.05

Notes

Prioritizes Shapiro-Wilk (more powerful) when n ≤ 5000. Falls back to D'Agostino-Pearson for larger samples.

Source code in src/parcellate/metrics/volume.py
def fails_normality(values):
    """
    Whether distribution fails normality tests.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    bool
        True if Shapiro-Wilk p < 0.05 (if available), else D'Agostino p < 0.05

    Notes
    -----
    Prioritizes Shapiro-Wilk (more powerful) when n ≤ 5000.
    Falls back to D'Agostino-Pearson for larger samples.
    """
    shapiro_p_val = shapiro_p(values)
    if not np.isnan(shapiro_p_val):
        return shapiro_p_val < 0.05

    dagostino_p_val = dagostino_p(values)
    if not np.isnan(dagostino_p_val):
        return dagostino_p_val < 0.05

    return False  # If no test available, default to False

coverage(values)

Proportion of non-NaN values in the parcel.

Parameters:

Name Type Description Default
values array - like

Within-ROI voxel/vertex values

required

Returns:

Type Description
float

Coverage proportion (0 to 1). Higher values indicate more complete data.

Source code in src/parcellate/metrics/volume.py
def coverage(values):
    """
    Proportion of non-NaN values in the parcel.

    Parameters
    ----------
    values : array-like
        Within-ROI voxel/vertex values

    Returns
    -------
    float
        Coverage proportion (0 to 1). Higher values indicate more complete data.
    """
    total = len(values)
    if total == 0:
        return float("nan")
    non_nan = np.sum(~np.isnan(values))
    return non_nan / total

Utilities

image

Utility functions for image handling