@@ -3941,6 +3941,39 @@ def percentile(a,
3941
3941
3942
3942
.. versionchanged:: 1.9.0
3943
3943
A tuple of axes is supported
3944
+ weights: array_like, optional
3945
+ An array of weights associated with the values in `a`.
3946
+
3947
+ Weights cannot
3948
+
3949
+ * be negative
3950
+ * sum to 0
3951
+
3952
+ However, they can be
3953
+
3954
+ * 0, as long as they do not sum to 0
3955
+ * less than 1. In this case, all weights are re-normalized by
3956
+ the lowest non-zero weight prior to computation.
3957
+
3958
+ The renormalization ensures that no weight is less than 1, and that
3959
+ the weights can have a frequency interpretation.
3960
+
3961
+ A weight of 1 means a repetition of one for the corresponding value in
3962
+ the data array. That means the sum of weights along `axis` is the
3963
+ total count of values along that axis. The evaluation of weighted
3964
+ quantiles therefore amounts to computing normal quantiles in weight
3965
+ space.
3966
+
3967
+ Weights will be broadcasted to the shape of a, then reduced as done
3968
+ via _ureduce().
3969
+
3970
+ * if wgts.shape == a.shape, simply return ureduced wgts.
3971
+ * if scalar, broadcast to shape of a, followed by _ureduce.
3972
+ * for 1-D or N-D array, must make sure that its size matches that
3973
+ of `a` along the axis dims, so can be reshaped. Hence if
3974
+ a.shape == (2, 3, 4, 5, 6), axis == (1, 3), then weights must
3975
+ be of size 15, and will be reshaped to (1, 3, 1, 5, 1) before
3976
+ broadcasting to the shape of `a`, followed by _ureduce.
3944
3977
out : ndarray, optional
3945
3978
Alternative output array in which to place the result. It must
3946
3979
have the same shape and buffer length as the expected output,
@@ -4241,6 +4274,39 @@ def quantile(a,
4241
4274
axis : {int, tuple of int, None}, optional
4242
4275
Axis or axes along which the quantiles are computed. The default is
4243
4276
to compute the quantile(s) along a flattened version of the array.
4277
+ weights: array_like, optional
4278
+ An array of weights associated with the values in `a`.
4279
+
4280
+ Weights cannot
4281
+
4282
+ * be negative
4283
+ * sum to 0
4284
+
4285
+ However, they can be
4286
+
4287
+ * 0, as long as they do not sum to 0
4288
+ * less than 1. In this case, all weights are re-normalized by
4289
+ the lowest non-zero weight prior to computation.
4290
+
4291
+ The renormalization ensures that no weight is less than 1, and that
4292
+ the weights can have a frequency interpretation.
4293
+
4294
+ A weight of 1 means a repetition of one for the corresponding value in
4295
+ the data array. That means the sum of weights along `axis` is the
4296
+ total count of values along that axis. The evaluation of weighted
4297
+ quantiles therefore amounts to computing normal quantiles in weight
4298
+ space.
4299
+
4300
+ Weights will be broadcasted to the shape of a, then reduced as done
4301
+ via _ureduce().
4302
+
4303
+ * if wgts.shape == a.shape, simply return ureduced wgts.
4304
+ * if scalar, broadcast to shape of a, followed by _ureduce.
4305
+ * for 1-D or N-D array, must make sure that its size matches that
4306
+ of `a` along the axis dims, so can be reshaped. Hence if
4307
+ a.shape == (2, 3, 4, 5, 6), axis == (1, 3), then weights must
4308
+ be of size 15, and will be reshaped to (1, 3, 1, 5, 1) before
4309
+ broadcasting to the shape of `a`, followed by _ureduce.
4244
4310
out : ndarray, optional
4245
4311
Alternative output array in which to place the result. It must have
4246
4312
the same shape and buffer length as the expected output, but the
@@ -4502,6 +4568,11 @@ def _quantile_unchecked(a,
4502
4568
def _validate_and_ureduce_weights (a , axis , wgts ):
4503
4569
"""Ensure quantile weights are valid.
4504
4570
4571
+ A weight of 1 means a repetition of one for the corresponding value in
4572
+ the data array. That means the sum of weights along `axis` is the total
4573
+ count of values along that axis, based on which the quantiles are
4574
+ evaluated.
4575
+
4505
4576
Weights cannot:
4506
4577
* be negative
4507
4578
* sum to 0
@@ -4544,10 +4615,16 @@ def _validate_and_ureduce_weights(a, axis, wgts):
4544
4615
# broadcast weights to the shape of array a.
4545
4616
if wgts .shape == a .shape : # no need to broadcast
4546
4617
pass
4547
- elif wgts .size == 1 :
4618
+ elif wgts .size == 1 : # straight-forwardly broadcastable
4619
+ wgts = np .broadcast_to (wgts , a .shape )
4620
+ elif (wgts .ndim == a .ndim and
4621
+ all (d1 == d2 or d1 == 1 for d1 , d2 in zip (wgts .shape , a .shape ))):
4622
+ # Same # of dims, but some are length==1. Broadcastable.
4548
4623
wgts = np .broadcast_to (wgts , a .shape )
4549
4624
elif (wgts .size == functools .reduce (lambda x , y : x * y ,
4550
4625
[a .shape [d ] for d in dims ])):
4626
+ # wgts might be wrong shape, but has right size to match the data dim.
4627
+ # Reshape to match shape of dims, with some length==1, before broadcast
4551
4628
to_shape = tuple (a .shape [d ] if d in dims else 1 for d in range (a .ndim ))
4552
4629
wgts = wgts .reshape (to_shape )
4553
4630
wgts = np .broadcast_to (wgts , a .shape )
@@ -4565,6 +4642,22 @@ def _validate_and_ureduce_weights(a, axis, wgts):
4565
4642
# Obtain a weights array of the same shape as ureduced a
4566
4643
wgts = _ureduce (wgts , func = lambda x , ** kwargs : x , axis = dims )
4567
4644
4645
+ # Now check/renormalize weights if any is (0, 1)
4646
+ def _normalize (v ):
4647
+ inds = v > 0
4648
+ if (v [inds ] < 1 ).any ():
4649
+ vec = v .copy ()
4650
+ vec [inds ] = vec [inds ] / vec [inds ].min () # renormalization
4651
+ return vec
4652
+ else :
4653
+ return v
4654
+
4655
+ # perform normalization along reduced axis
4656
+ if len (dims ) > 1 :
4657
+ wgts = np .apply_along_axis (_normalize , - 1 , wgts )
4658
+ else :
4659
+ wgts = np .apply_along_axis (_normalize , dims [0 ], wgts )
4660
+
4568
4661
return wgts
4569
4662
4570
4663
@@ -4824,91 +4917,99 @@ def _quantile(
4824
4917
supports_nans = (
4825
4918
np .issubdtype (arr .dtype , np .inexact ) or arr .dtype .kind in 'Mm' )
4826
4919
4827
- if weights is not None : # weights is the same shape as arr
4920
+ if weights is not None : # weights should be the same shape as arr
4828
4921
# first move data to axis=-1 for np.vectorize, which runs on axis=-1
4829
4922
if axis != - 1 :
4830
4923
arr = np .moveaxis (arr , axis , destination = - 1 )
4831
4924
weights = np .moveaxis (weights , axis , destination = - 1 )
4832
- values_count = arr .shape [- 1 ]
4833
-
4834
- # Now check/renormalize weights if any is (0, 1)
4835
- def _normalize (v ):
4836
- inds = v > 0
4837
- if (v [inds ] < 1 ).any ():
4838
- vec = v .copy ()
4839
- vec [inds ] = vec [inds ] / vec [inds ].min () # renormalization
4840
- return vec
4841
- else :
4842
- return v
4843
- weights = np .apply_along_axis (_normalize , - 1 , weights )
4844
-
4845
- # values with weight=0 are made nan and later sorted to end of array
4846
- if (weights == 0 ).any ():
4847
- weights [weights == 0 ] = np .nan
4848
-
4849
- def _sort_by_index (vector , vec_indices ):
4850
- return vector [vec_indices ]
4851
- # this func vectorizes sort along axis
4852
- n = values_count
4853
- arraysort = np .vectorize (_sort_by_index ,
4854
- signature = f"({ n } ),({ n } )->({ n } )" )
4855
- # compute the sorted array. nans will be sorted to the end.
4856
- ind_sorted = np .argsort (arr , axis = - 1 )
4857
- arr = arraysort (arr , ind_sorted )
4858
- # need to align weights to now-sorted arr, hence np.vectorize is used
4859
- weights = arraysort (weights , ind_sorted )
4860
4925
4926
+ # identical treatment to slices_having_nans as no-weight case
4861
4927
if supports_nans :
4862
- slices_having_nans = np .isnan (
4863
- take (arr , indices = - 1 , axis = - 1 )
4864
- )
4928
+ slices_having_nans = np .isnan (arr .sum (axis = - 1 ))
4865
4929
else :
4866
4930
slices_having_nans = None
4867
4931
4868
- # convert weights to virtual indexes.
4869
- def _map_weights_to_indexes (wgts , quantiles , hard_indexes ):
4870
- # NOTE 1-D function used within apply_along_axis() might be slower,
4871
- # but probably incurs less memory footprint.
4872
- wgts_cumsum = wgts .cumsum ()
4873
- n = wgts_cumsum [- 1 ] # sum along axis replaces n in Hyndman paper.
4932
+ # Vectorized function to compute weighted quantiles.
4933
+ # Each array value occupies a range within weight space, bounded
4934
+ # by its left/right_weight_bound. If a weight_space_index falls within
4935
+ # those two bounds, the quantile will just take on that array value.
4936
+ # If the weight_space_index falls between neighboring ranges, that
4937
+ # means its quantile value must fall between neighboring array values
4938
+ # as well, so the gamma (computed in weight space) is used to
4939
+ # interpolate between those neighboring array values.
4940
+ def _get_weighted_quantile_values (arr1d , wgts1d ):
4941
+ # first deal with sorting
4942
+ # values with weight=0 are made nan and sorted to end of array
4943
+ if (wgts1d == 0 ).any ():
4944
+ arr1d = arr1d .astype (float ) # allow np.nan to weight=0 cells
4945
+ arr1d [wgts1d == 0 ] = np .nan
4946
+ sort_indexes = np .argsort (arr1d )
4947
+ arr1d = arr1d [sort_indexes ]
4948
+ wgts1d = wgts1d [sort_indexes ]
4949
+
4950
+ wgts1d = wgts1d [~ np .isnan (arr1d )] # only deal with non-nan values
4951
+ arr1d = arr1d [~ np .isnan (arr1d )]
4952
+ # weights are a proxy to values count
4953
+ wgts1d_cumsum = wgts1d .cumsum ()
4954
+ n = wgts1d_cumsum [- 1 ] # sum along axis replaces n in Hyndman.
4874
4955
weight_space_indexes = method ["get_virtual_index" ](n , quantiles )
4875
- left_weight_bound = np .roll (wgts_cumsum , 1 )
4876
- left_weight_bound [0 ] = 0
4877
- right_weight_bound = wgts_cumsum - 1
4878
- indexes = np .zeros (2 * hard_indexes .size )
4879
- weights = indexes .copy ()
4880
- indexes [0 ::2 ] = hard_indexes
4881
- indexes [1 ::2 ] = hard_indexes
4882
- weights [0 ::2 ] = left_weight_bound
4883
- weights [1 ::2 ] = right_weight_bound
4884
- return np .interp (weight_space_indexes , weights , indexes )
4885
-
4886
- hard_indexes = np .arange (values_count )
4887
-
4888
- virtual_indexes = np .apply_along_axis (_map_weights_to_indexes ,
4889
- - 1 , weights ,
4890
- quantiles = quantiles ,
4891
- hard_indexes = hard_indexes )
4892
- # unlike the no-weights case, virtual_indexes here can be N-D,
4893
- # with different indexes along every DATA_AXIS slice.
4894
- # also the use of np.interp means virtual_indexes is of type float.
4895
- previous_indexes , next_indexes = _get_indexes (arr , virtual_indexes ,
4896
- values_count )
4897
4956
4898
- # this function becomes vectorized
4899
- def _get_values_from_indexes (arr , virtual_indexes ,
4900
- previous_indexes , next_indexes ):
4901
- previous = take (arr , previous_indexes )
4902
- next = take (arr , next_indexes )
4903
- gamma = _get_gamma (virtual_indexes , previous_indexes , method )
4957
+ # each weight occupies a range in weight space w/ left/right bounds
4958
+ left_weight_bound = np .roll (wgts1d_cumsum , 1 )
4959
+ left_weight_bound [0 ] = 0 # left-most weight bound fixed at 0
4960
+ right_weight_bound = wgts1d_cumsum - 1
4961
+
4962
+ # now construct a mapping from weight bounds to real indexes
4963
+ # for example, arr1d=[1, 2] & wgts1d=[2, 3] ->
4964
+ # -> real_indexes=[0, 0, 1, 1] & w_index_bounds=[0, 1, 2, 4]
4965
+ indexes = np .arange (arr1d .size )
4966
+ real_indexes = np .zeros (2 * indexes .size )
4967
+ w_index_bounds = real_indexes .copy ()
4968
+ real_indexes [0 ::2 ] = indexes # real indexes
4969
+ real_indexes [1 ::2 ] = indexes
4970
+ w_index_bounds [0 ::2 ] = left_weight_bound # weight index bounds
4971
+ w_index_bounds [1 ::2 ] = right_weight_bound
4972
+
4973
+ # first define previous_w_indexes/next_w_indexes as the indexes
4974
+ # within w_index_bounds whose values sandwich weight_space_indexes.
4975
+ # so if w_index_bounds=[0, 1, 2, 4] and weight_space_index=3.5,
4976
+ # then previous_w_indexes = 2 and next_w_indexes = 3
4977
+ previous_w_indexes = np .searchsorted (w_index_bounds ,
4978
+ weight_space_indexes ,
4979
+ side = "right" ) - 1
4980
+ # leverage _get_index() to deal with out-of-bound indices
4981
+ previous_w_indexes , next_w_indexes = \
4982
+ _get_indexes (w_index_bounds , previous_w_indexes ,
4983
+ len (w_index_bounds ))
4984
+ # now redefine previous_w_indexes/next_w_indexes as the weight
4985
+ # space indexes that neighbor weight_space_indexes.
4986
+ previous_w_indexes = w_index_bounds [previous_w_indexes ]
4987
+ next_w_indexes = w_index_bounds [next_w_indexes ]
4988
+
4989
+ # map all weight space indexes to real indexes, then compute gamma
4990
+ previous_indexes = \
4991
+ np .interp (previous_w_indexes , w_index_bounds , real_indexes )
4992
+ next_indexes = \
4993
+ np .interp (next_w_indexes , w_index_bounds , real_indexes )
4994
+ indexes = \
4995
+ np .interp (weight_space_indexes , w_index_bounds , real_indexes )
4996
+
4997
+ # method-dependent gammas determine interpolation scheme between
4998
+ # neighboring values, and are computed in weight space.
4999
+ gamma = _get_gamma (indexes , previous_indexes , method )
5000
+
5001
+ previous = take (arr1d , previous_indexes .astype (int ))
5002
+ next = take (arr1d , next_indexes .astype (int ))
4904
5003
return _lerp (previous , next , gamma , out = out )
4905
5004
4906
- m = virtual_indexes .shape [- 1 ]
4907
- get_values_from_indexes = \
4908
- np .vectorize (_get_values_from_indexes ,
4909
- signature = f"({ n } ),({ m } ),({ m } ),({ m } )->({ m } )" )
4910
- result = get_values_from_indexes (arr , virtual_indexes ,
4911
- previous_indexes , next_indexes )
5005
+ n = arr .shape [- 1 ] if arr .ndim else 1 # same as values_count
5006
+ m = quantiles .shape [- 1 ] if quantiles .ndim else ""
5007
+
5008
+ get_weighted_quantile_values = \
5009
+ np .vectorize (_get_weighted_quantile_values ,
5010
+ signature = f"({ n } ),({ n } )->({ m } )" )
5011
+ result = get_weighted_quantile_values (arr , weights )
5012
+
4912
5013
# now move data to DATA_AXIS to be consistent with no-weights case
4913
5014
result = np .moveaxis (result , - 1 , destination = 0 )
4914
5015
@@ -4935,7 +5036,7 @@ def _get_values_from_indexes(arr, virtual_indexes,
4935
5036
4936
5037
def _take_sorted_index_values (arr , method , virtual_indexes , values_count ,
4937
5038
supports_nans , out ):
4938
-
5039
+ """Evaluate quantile values for all cases without weights."""
4939
5040
if np .issubdtype (virtual_indexes .dtype , np .integer ):
4940
5041
# No interpolation needed, take the points along axis
4941
5042
if supports_nans :
0 commit comments