xarray-contrib
diff --git a/‎flox/aggregate_flox.py
Lines changed: 104 additions & 17 deletions b/‎flox/aggregate_flox.py
Lines changed: 104 additions & 17 deletions
diff --git a/‎flox/aggregate_npg.py
Lines changed: 9 additions & 4 deletions b/‎flox/aggregate_npg.py
Lines changed: 9 additions & 4 deletions
diff --git a/‎flox/aggregations.py
Lines changed: 6 additions & 0 deletions b/‎flox/aggregations.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎flox/core.py
Lines changed: 17 additions & 4 deletions b/‎flox/core.py
Lines changed: 17 additions & 4 deletions
diff --git a/‎flox/xrutils.py
Lines changed: 14 additions & 0 deletions b/‎flox/xrutils.py
Lines changed: 14 additions & 0 deletions
diff --git a/‎tests/test_core.py
Lines changed: 18 additions & 3 deletions b/‎tests/test_core.py
Lines changed: 18 additions & 3 deletions
@@ -2,25 +2,103 @@
 
 import numpy as np
 
-from .xrutils import isnull
+from .xrutils import is_scalar, isnull, notnull
 
 
-def _prepare_for_flox(group_idx, array):
+def _prepare_for_flox(group_idx, array, lexsort):
     """
     Sort the input array once to save time.
     """
     assert array.shape[-1] == group_idx.shape[0]
-    issorted = (group_idx[:-1] <= group_idx[1:]).all()
-    if issorted:
-        ordered_array = array
+
+    if lexsort:
+        # lexsort allows us to sort by label AND array value
+        # numpy's quantile uses partition, which could be a big win
+        # IF we can figure out how to do that.
+        # This trick was snagged from scipy.ndimage.median() :)
+        labels_broadcast = np.broadcast_to(group_idx, array.shape)
+        idxs = np.lexsort((array, labels_broadcast), axis=-1)
+        ordered_array = np.take_along_axis(array, idxs, axis=-1)
+        group_idx = np.take_along_axis(group_idx, idxs[(0,) * (idxs.ndim - 1) + (...,)], axis=-1)
     else:
-        perm = group_idx.argsort(kind="stable")
-        group_idx = group_idx[..., perm]
-        ordered_array = array[..., perm]
+        issorted = (group_idx[:-1] <= group_idx[1:]).all()
+        if issorted:
+            ordered_array = array
+        else:
+            perm = group_idx.argsort(kind="stable")
+            group_idx = group_idx[..., perm]
+            ordered_array = array[..., perm]
     return group_idx, ordered_array
 
 
-def _np_grouped_op(group_idx, array, op, axis=-1, size=None, fill_value=None, dtype=None, out=None):
+def _lerp(a, b, *, t, dtype, out=None):
+    """
+    COPIED from numpy.
+
+    Compute the linear interpolation weighted by gamma on each point of
+    two same shape array.
+
+    a : array_like
+        Left bound.
+    b : array_like
+        Right bound.
+    t : array_like
+        The interpolation weight.
+    """
+    if out is None:
+        out = np.empty_like(a, dtype=dtype)
+    diff_b_a = np.subtract(b, a)
+    # asanyarray is a stop-gap until gh-13105
+    np.add(a, diff_b_a * t, out=out)
+    np.subtract(b, diff_b_a * (1 - t), out=out, where=t >= 0.5)
+    return out
+
+
+def quantile_(array, inv_idx, *, q, axis, skipna, dtype=None, out=None):
+    inv_idx = np.concatenate((inv_idx, [array.shape[-1]]))
+
+    if skipna:
+        sizes = np.add.reduceat(notnull(array), inv_idx[:-1], axis=axis)
+    else:
+        sizes = np.reshape(np.diff(inv_idx), (1,) * (array.ndim - 1) + (inv_idx.size - 1,))
+        nanmask = isnull(np.take_along_axis(array, sizes - 1, axis=axis))
+
+    qin = q
+    q = np.atleast_1d(qin)
+    q = np.reshape(q, (len(q),) + (1,) * array.ndim)
+
+    # This is numpy's method="linear"
+    # TODO: could support all the interpolations here
+    virtual_index = q * (sizes - 1) + inv_idx[:-1]
+
+    is_scalar_q = is_scalar(qin)
+    if is_scalar_q:
+        virtual_index = virtual_index.squeeze(axis=0)
+        idxshape = array.shape[:-1] + (sizes.shape[-1],)
+        a_ = array
+    else:
+        idxshape = (q.shape[0],) + array.shape[:-1] + (sizes.shape[-1],)
+        a_ = np.broadcast_to(array, (q.shape[0],) + array.shape)
+
+    # Broadcast to (num quantiles, ..., num labels)
+    lo_ = np.floor(virtual_index, casting="unsafe", out=np.empty(idxshape, dtype=np.int64))
+    hi_ = np.ceil(virtual_index, casting="unsafe", out=np.empty(idxshape, dtype=np.int64))
+
+    # get bounds
+    loval = np.take_along_axis(a_, lo_, axis=axis)
+    hival = np.take_along_axis(a_, hi_, axis=axis)
+
+    # TODO: could support all the interpolations here
+    gamma = np.broadcast_to(virtual_index, idxshape) - lo_
+    result = _lerp(loval, hival, t=gamma, out=out, dtype=dtype)
+    if not skipna and np.any(nanmask):
+        result[..., nanmask] = np.nan
+    return result
+
+
+def _np_grouped_op(
+    group_idx, array, op, axis=-1, size=None, fill_value=None, dtype=None, out=None, **kwargs
+):
     """
     most of this code is from shoyer's gist
     https://gist.github.com/shoyer/f538ac78ae904c936844
@@ -38,16 +116,21 @@ def _np_grouped_op(group_idx, array, op, axis=-1, size=None, fill_value=None, dt
         dtype = array.dtype
 
     if out is None:
-        out = np.full(array.shape[:-1] + (size,), fill_value=fill_value, dtype=dtype)
+        q = kwargs.get("q", None)
+        if q is None:
+            out = np.full(array.shape[:-1] + (size,), fill_value=fill_value, dtype=dtype)
+        else:
+            nq = len(np.atleast_1d(q))
+            out = np.full((nq,) + array.shape[:-1] + (size,), fill_value=fill_value, dtype=dtype)
 
     if (len(uniques) == size) and (uniques == np.arange(size, like=array)).all():
         # The previous version of this if condition
         #     ((uniques[1:] - uniques[:-1]) == 1).all():
         # does not work when group_idx is [1, 2] for e.g.
         # This happens during binning
-        op.reduceat(array, inv_idx, axis=axis, dtype=dtype, out=out)
+        op(array, inv_idx, axis=axis, dtype=dtype, out=out, **kwargs)
     else:
-        out[..., uniques] = op.reduceat(array, inv_idx, axis=axis, dtype=dtype)
+        out[..., uniques] = op(array, inv_idx, axis=axis, dtype=dtype, **kwargs)
 
     return out
 
@@ -65,14 +148,18 @@ def _nan_grouped_op(group_idx, array, func, fillna, *args, **kwargs):
     return result
 
 
-sum = partial(_np_grouped_op, op=np.add)
+sum = partial(_np_grouped_op, op=np.add.reduceat)
 nansum = partial(_nan_grouped_op, func=sum, fillna=0)
-prod = partial(_np_grouped_op, op=np.multiply)
+prod = partial(_np_grouped_op, op=np.multiply.reduceat)
 nanprod = partial(_nan_grouped_op, func=prod, fillna=1)
-max = partial(_np_grouped_op, op=np.maximum)
+max = partial(_np_grouped_op, op=np.maximum.reduceat)
 nanmax = partial(_nan_grouped_op, func=max, fillna=-np.inf)
-min = partial(_np_grouped_op, op=np.minimum)
+min = partial(_np_grouped_op, op=np.minimum.reduceat)
 nanmin = partial(_nan_grouped_op, func=min, fillna=np.inf)
+quantile = partial(_np_grouped_op, op=partial(quantile_, skipna=False))
+nanquantile = partial(_np_grouped_op, op=partial(quantile_, skipna=True))
+median = partial(_np_grouped_op, op=partial(quantile_, q=0.5, skipna=False))
+nanmedian = partial(_np_grouped_op, op=partial(quantile_, q=0.5, skipna=True))
 # TODO: all, any
 
 
@@ -99,7 +186,7 @@ def nansum_of_squares(group_idx, array, *, axis=-1, size=None, fill_value=None,
 
 
 def nanlen(group_idx, array, *args, **kwargs):
-    return sum(group_idx, (~isnull(array)).astype(int), *args, **kwargs)
+    return sum(group_idx, (notnull(array)).astype(int), *args, **kwargs)
 
 
 def mean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None):
 
@@ -8,6 +8,11 @@ def _get_aggregate(engine):
     return npg.aggregate_numpy if engine == "numpy" else npg.aggregate_numba
 
 
+def _casting_wrapper(func, grp, dtype):
+    """Used for generic aggregates. The group is dtype=object, need to cast back to fix weird bugs"""
+    return func(grp.astype(dtype))
+
+
 def sum_of_squares(
     group_idx,
     array,
@@ -106,7 +111,7 @@ def median(group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dty
     return npg.aggregate_numpy.aggregate(
         group_idx,
         array,
-        func=np.median,
+        func=partial(_casting_wrapper, np.median, dtype=array.dtype),
         axis=axis,
         size=size,
         fill_value=fill_value,
@@ -118,7 +123,7 @@ def nanmedian(group_idx, array, engine, *, axis=-1, size=None, fill_value=None,
     return npg.aggregate_numpy.aggregate(
         group_idx,
         array,
-        func=np.nanmedian,
+        func=partial(_casting_wrapper, np.nanmedian, dtype=array.dtype),
         axis=axis,
         size=size,
         fill_value=fill_value,
@@ -130,7 +135,7 @@ def quantile(group_idx, array, engine, *, q, axis=-1, size=None, fill_value=None
     return npg.aggregate_numpy.aggregate(
         group_idx,
         array,
-        func=partial(np.quantile, q=q),
+        func=partial(_casting_wrapper, partial(np.quantile, q=q), dtype=array.dtype),
         axis=axis,
         size=size,
         fill_value=fill_value,
@@ -142,7 +147,7 @@ def nanquantile(group_idx, array, engine, *, q, axis=-1, size=None, fill_value=N
     return npg.aggregate_numpy.aggregate(
         group_idx,
         array,
-        func=partial(np.nanquantile, q=q),
+        func=partial(_casting_wrapper, partial(np.nanquantile, q=q), dtype=array.dtype),
         axis=axis,
         size=size,
         fill_value=fill_value,
 
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import copy
+import logging
 import warnings
 from functools import partial
 from typing import TYPE_CHECKING, Any, Callable, Literal, TypedDict
@@ -16,6 +17,9 @@
     OptionalFuncTuple = tuple[Callable | str | None, ...]
 
 
+logger = logging.getLogger("flox")
+
+
 def _is_arg_reduction(func: str | Aggregation) -> bool:
     if isinstance(func, str) and func in ["argmin", "argmax", "nanargmax", "nanargmin"]:
         return True
@@ -62,6 +66,7 @@ def generic_aggregate(
         try:
             method = getattr(aggregate_flox, func)
         except AttributeError:
+            logger.debug(f"Couldn't find {func} for engine='flox'. Falling back to numpy")
             method = get_npg_aggregation(func, engine="numpy")
 
     elif engine == "numbagg":
@@ -78,6 +83,7 @@ def generic_aggregate(
             else:
                 method = getattr(aggregate_numbagg, func)
         except AttributeError:
+            logger.debug(f"Couldn't find {func} for engine='numbagg'. Falling back to numpy")
             method = get_npg_aggregation(func, engine="numpy")
 
     elif engine in ["numpy", "numba"]:
 
@@ -36,7 +36,13 @@
     generic_aggregate,
 )
 from .cache import memoize
-from .xrutils import is_duck_array, is_duck_dask_array, isnull, module_available
+from .xrutils import (
+    is_duck_array,
+    is_duck_dask_array,
+    isnull,
+    module_available,
+    notnull,
+)
 
 if module_available("numpy", minversion="2.0.0"):
     from numpy.lib.array_utils import (  # type: ignore[import-not-found]
@@ -46,6 +52,7 @@
     from numpy.core.numeric import normalize_axis_tuple  # type: ignore[attr-defined]
 
 HAS_NUMBAGG = module_available("numbagg", minversion="0.3.0")
+_LEXSORT_FOR_FLOX = ["quantile", "nanquantile", "median", "nanmedian"]
 
 if TYPE_CHECKING:
     try:
@@ -156,7 +163,7 @@ def _get_expected_groups(by: T_By, sort: bool) -> T_ExpectIndex:
     if is_duck_dask_array(by):
         raise ValueError("Please provide expected_groups if not grouping by a numpy array.")
     flatby = by.reshape(-1)
-    expected = pd.unique(flatby[~isnull(flatby)])
+    expected = pd.unique(flatby[notnull(flatby)])
     return _convert_expected_groups_to_index((expected,), isbin=(False,), sort=sort)[0]
 
 
@@ -953,7 +960,9 @@ def chunk_reduce(
     if engine == "flox":
         # is_arg_reduction = any("arg" in f for f in func if isinstance(f, str))
         # if not is_arg_reduction:
-        group_idx, array = _prepare_for_flox(group_idx, array)
+        group_idx, array = _prepare_for_flox(
+            group_idx, array, lexsort=any(f in _LEXSORT_FOR_FLOX for f in funcs)
+        )
 
     final_array_shape += results["groups"].shape
     final_groups_shape += results["groups"].shape
@@ -1095,7 +1104,7 @@ def _find_unique_groups(x_chunk) -> np.ndarray:
     from dask.utils import deepmap
 
     unique_groups = _unique(np.asarray(tuple(flatten(deepmap(listify_groups, x_chunk)))))
-    unique_groups = unique_groups[~isnull(unique_groups)]
+    unique_groups = unique_groups[notnull(unique_groups)]
 
     if len(unique_groups) == 0:
         unique_groups = np.array([np.nan])
@@ -1959,6 +1968,10 @@ def _choose_engine(by, agg: Aggregation):
 
     not_arg_reduce = not _is_arg_reduction(agg)
 
+    if agg.name in _LEXSORT_FOR_FLOX:
+        logger.info(f"_choose_engine: Choosing 'flox' since {agg.name}")
+        return "flox"
+
     # numbagg only supports nan-skipping reductions
     # without dtype specified
     has_blockwise_nan_skipping = (agg.chunk[0] is None and "nan" in agg.name) or any(
 
@@ -100,6 +100,20 @@ def is_scalar(value: Any, include_0d: bool = True) -> bool:
     )
 
 
+def notnull(data):
+    if not is_duck_array(data):
+        data = np.asarray(data)
+
+    scalar_type = data.dtype.type
+    if issubclass(scalar_type, (np.bool_, np.integer, np.character, np.void)):
+        # these types cannot represent missing values
+        return np.ones_like(data, dtype=bool)
+    else:
+        out = isnull(data)
+        np.logical_not(out, out=out)
+        return out
+
+
 def isnull(data):
     if not is_duck_array(data):
         data = np.asarray(data)
 
@@ -261,6 +261,9 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine):
         fill_value = None
         tolerance = None
 
+    # for constructing expected
+    array_func = _get_array_func(func)
+
     for kwargs in finalize_kwargs:
         flox_kwargs = dict(func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value)
         with np.errstate(invalid="ignore", divide="ignore"):
@@ -280,7 +283,6 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine):
                     array_[..., nanmask] = np.nan
                     expected = getattr(np, func_)(array_, axis=-1, **kwargs)
                 else:
-                    array_func = _get_array_func(func)
                     expected = array_func(array_[..., ~nanmask], axis=-1, **kwargs)
         for _ in range(nby):
             expected = np.expand_dims(expected, -1)
@@ -290,15 +292,28 @@ def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine):
             flox_kwargs["method"] = "blockwise"
 
         actual, *groups = groupby_reduce(array, *by, **flox_kwargs)
-        assert actual.ndim == (array.ndim + nby - 1)
-        assert expected.ndim == (array.ndim + nby - 1)
+        assert actual.ndim == expected.ndim == (array.ndim + nby - 1)
         expected_groups = tuple(np.array([idx + 1.0]) for idx in range(nby))
         for actual_group, expect in zip(groups, expected_groups):
             assert_equal(actual_group, expect)
         if "arg" in func:
             assert actual.dtype.kind == "i"
         assert_equal(expected, actual, tolerance)
 
+        if "nan" not in func and "arg" not in func:
+            # test non-NaN skipping behaviour when NaNs are present
+            nanned = array_.copy()
+            # remove nans in by to reduce complexity
+            # We are checking for consistent behaviour with NaNs in array
+            by_ = tuple(np.nan_to_num(b, nan=np.nanmin(b)) for b in by)
+            nanned[[1, 4, 5], ...] = np.nan
+            nanned.reshape(-1)[0] = np.nan
+            actual, *_ = groupby_reduce(nanned, *by_, **flox_kwargs)
+            expected_0 = array_func(nanned, axis=-1, **kwargs)
+            for _ in range(nby):
+                expected_0 = np.expand_dims(expected_0, -1)
+            assert_equal(expected_0, actual, tolerance)
+
         if not has_dask or chunks is None or func in BLOCKWISE_FUNCS:
             continue