80
80
81
81
"""
82
82
83
- __all__ = [ 'StatisticsError' , 'NormalDist' , 'quantiles' ,
84
- 'pstdev' , 'pvariance' , 'stdev' , 'variance' ,
85
- 'median' , 'median_low' , 'median_high' , 'median_grouped' ,
86
- 'mean' , 'mode' , 'multimode' , 'harmonic_mean' , 'fmean' ,
87
- 'geometric_mean' ,
88
- ]
83
+ __all__ = [
84
+ 'NormalDist' ,
85
+ 'StatisticsError' ,
86
+ 'fmean' ,
87
+ 'geometric_mean' ,
88
+ 'harmonic_mean' ,
89
+ 'mean' ,
90
+ 'median' ,
91
+ 'median_grouped' ,
92
+ 'median_high' ,
93
+ 'median_low' ,
94
+ 'mode' ,
95
+ 'multimode' ,
96
+ 'pstdev' ,
97
+ 'pvariance' ,
98
+ 'quantiles' ,
99
+ 'stdev' ,
100
+ 'variance' ,
101
+ ]
89
102
90
103
import math
91
104
import numbers
@@ -304,16 +317,16 @@ def mean(data):
304
317
assert count == n
305
318
return _convert (total / n , T )
306
319
320
+
307
321
def fmean (data ):
308
- """ Convert data to floats and compute the arithmetic mean.
322
+ """Convert data to floats and compute the arithmetic mean.
309
323
310
324
This runs faster than the mean() function and it always returns a float.
311
325
The result is highly accurate but not as perfect as mean().
312
326
If the input dataset is empty, it raises a StatisticsError.
313
327
314
328
>>> fmean([3.5, 4.0, 5.25])
315
329
4.25
316
-
317
330
"""
318
331
try :
319
332
n = len (data )
@@ -332,6 +345,7 @@ def count(iterable):
332
345
except ZeroDivisionError :
333
346
raise StatisticsError ('fmean requires at least one data point' ) from None
334
347
348
+
335
349
def geometric_mean (data ):
336
350
"""Convert data to floats and compute the geometric mean.
337
351
@@ -350,6 +364,7 @@ def geometric_mean(data):
350
364
raise StatisticsError ('geometric mean requires a non-empty dataset '
351
365
' containing positive numbers' ) from None
352
366
367
+
353
368
def harmonic_mean (data ):
354
369
"""Return the harmonic mean of data.
355
370
@@ -547,23 +562,23 @@ def mode(data):
547
562
548
563
549
564
def multimode (data ):
550
- """ Return a list of the most frequently occurring values.
551
-
552
- Will return more than one result if there are multiple modes
553
- or an empty list if *data* is empty.
565
+ """Return a list of the most frequently occurring values.
554
566
555
- >>> multimode('aabbbbbbbbcc')
556
- ['b']
557
- >>> multimode('aabbbbccddddeeffffgg')
558
- ['b', 'd', 'f']
559
- >
3419
>> multimode('')
560
- []
567
+ Will return more than one result if there are multiple modes
568
+ or an empty list if *data* is empty.
561
569
570
+ >>> multimode('aabbbbbbbbcc')
571
+ ['b']
572
+ >>> multimode('aabbbbccddddeeffffgg')
573
+ ['b', 'd', 'f']
574
+ >>> multimode('')
575
+ []
562
576
"""
563
577
counts = Counter (iter (data )).most_common ()
564
578
maxcount , mode_items = next (groupby (counts , key = itemgetter (1 )), (0 , []))
565
579
return list (map (itemgetter (0 ), mode_items ))
566
580
581
+
567
582
# Notes on methods for computing quantiles
568
583
# ----------------------------------------
569
584
#
@@ -601,7 +616,7 @@ def multimode(data):
601
616
# external packages can be used for anything more advanced.
602
617
603
618
def quantiles (dist , / , * , n = 4 , method = 'exclusive' ):
604
- ''' Divide *dist* into *n* continuous intervals with equal probability.
619
+ """ Divide *dist* into *n* continuous intervals with equal probability.
605
620
606
621
Returns a list of (n - 1) cut points separating the intervals.
607
622
@@ -616,7 +631,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):
616
631
If *method* is set to *inclusive*, *dist* is treated as population
617
632
data. The minimum value is treated as the 0th percentile and the
618
633
maximum value is treated as the 100th percentile.
619
- '''
634
+ """
620
635
if n < 1 :
621
636
raise StatisticsError ('n must be at least 1' )
622
637
if hasattr (dist , 'inv_cdf' ):
@@ -646,6 +661,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):
646
661
return result
647
662
raise ValueError (f'Unknown method: { method !r} ' )
648
663
664
+
649
665
# === Measures of spread ===
650
666
651
667
# See http://mathworld.wolfram.com/Variance.html
@@ -805,59 +821,64 @@ def pstdev(data, mu=None):
805
821
except AttributeError :
806
822
return math .sqrt (var )
807
823
824
+
808
825
## Normal Distribution #####################################################
809
826
810
827
class NormalDist :
811
- ' Normal distribution of a random variable'
828
+ " Normal distribution of a random variable"
812
829
# https://en.wikipedia.org/wiki/Normal_distribution
813
830
# https://en.wikipedia.org/wiki/Variance#Properties
814
831
815
- __slots__ = {'_mu' : 'Arithmetic mean of a normal distribution' ,
816
- '_sigma' : 'Standard deviation of a normal distribution' }
832
+ __slots__ = {
833
+ '_mu' : 'Arithmetic mean of a normal distribution' ,
834
+ '_sigma' : 'Standard deviation of a normal distribution' ,
835
+ }
817
836
818
837
def __init__ (self , mu = 0.0 , sigma = 1.0 ):
819
- ' NormalDist where mu is the mean and sigma is the standard deviation.'
838
+ " NormalDist where mu is the mean and sigma is the standard deviation."
820
839
if sigma < 0.0 :
821
840
raise StatisticsError ('sigma must be non-negative' )
822
841
self ._mu = mu
823
842
self ._sigma = sigma
824
843
825
844
@classmethod
826
845
def from_samples (cls , data ):
827
- ' Make a normal distribution instance from sample data.'
846
+ " Make a normal distribution instance from sample data."
828
847
if not isinstance (data , (list , tuple )):
829
848
data = list (data )
830
849
xbar = fmean (data )
831
850
return cls (xbar , stdev (data , xbar ))
832
851
833
852
def samples (self , n , * , seed = None ):
834
- ' Generate *n* samples for a given mean and standard deviation.'
853
+ " Generate *n* samples for a given mean and standard deviation."
835
854
gauss = random .gauss if seed is None else random .Random (seed ).gauss
836
855
mu , sigma = self ._mu , self ._sigma
837
856
return [gauss (mu , sigma ) for i in range (n )]
838
857
839
858
def pdf (self , x ):
840
- ' Probability density function. P(x <= X < x+dx) / dx'
859
+ " Probability density function. P(x <= X < x+dx) / dx"
841
860
variance = self ._sigma ** 2.0
842
861
if not variance :
843
862
raise StatisticsError ('pdf() not defined when sigma is zero' )
844
- return exp ((x - self ._mu )** 2.0 / (- 2.0 * variance )) / sqrt (tau * variance )
863
+ return exp ((x - self ._mu )** 2.0 / (- 2.0 * variance )) / sqrt (tau * variance )
845
864
846
865
def cdf (self , x ):
847
- ' Cumulative distribution function. P(X <= x)'
866
+ " Cumulative distribution function. P(X <= x)"
848
867
if not self ._sigma :
849
868
raise StatisticsError ('cdf() not defined when sigma is zero' )
850
869
return 0.5 * (1.0 + erf ((x - self ._mu ) / (self ._sigma * sqrt (2.0 ))))
851
870
852
871
def inv_cdf (self , p ):
853
- ''' Inverse cumulative distribution function. x : P(X <= x) = p
872
+ """ Inverse cumulative distribution function. x : P(X <= x) = p
854
873
855
- Finds the value of the random variable such that the probability of the
856
- variable being less than or equal to that value equals the given probability.
874
+ Finds the value of the random variable such that the probability of
875
+ the variable being less than or equal to that value equals the given
876
+ probability.
857
877
858
- This function is also called the percent point function or quantile function.
859
- '''
860
- if (p <= 0.0 or p >= 1.0 ):
878
+ This function is also called the percent point function or quantile
879
+ function.
880
+ """
881
+ if p <= 0.0 or p >= 1.0 :
861
882
raise StatisticsError ('p must be in the range 0.0 < p < 1.0' )
862
883
if self ._sigma <= 0.0 :
863
884
raise StatisticsError ('cdf() not defined when sigma at or below zero' )
@@ -933,7 +954,7 @@ def inv_cdf(self, p):
933
954
return self ._mu + (x * self ._sigma )
934
955
935
956
def overlap (self , other ):
936
- ''' Compute the overlapping coefficient (OVL) between two normal distributions.
957
+ """ Compute the overlapping coefficient (OVL) between two normal distributions.
937
958
938
959
Measures the agreement between two normal probability distributions.
939
960
Returns a value between 0.0 and 1.0 giving the overlapping area in
@@ -943,7 +964,7 @@ def overlap(self, other):
943
964
>>> N2 = NormalDist(3.2, 2.0)
944
965
>>> N1.overlap(N2)
945
966
0.8035050657330205
946
- '''
967
+ """
947
968
# See: "The overlapping coefficient as a measure of agreement between
948
969
# probability distributions and point estimation of the overlap of two
949
970
# normal densities" -- Henry F. Inman and Edwin L. Bradley Jr
@@ -968,87 +989,87 @@ def overlap(self, other):
968
989
969
990
@property
970
991
def mean (self ):
971
- ' Arithmetic mean of the normal distribution.'
992
+ " Arithmetic mean of the normal distribution."
972
993
return self ._mu
973
994
974
995
@property
975
996
def stdev (self ):
976
- ' Standard deviation of the normal distribution.'
997
+ " Standard deviation of the normal distribution."
977
998
return self ._sigma
978
999
979
1000
@property
980
1001
def variance (self ):
981
- ' Square of the standard deviation.'
1002
+ " Square of the standard deviation."
982
1003
return self ._sigma ** 2.0
983
1004
984
1005
def __add__ (x1 , x2 ):
985
- ''' Add a constant or another NormalDist instance.
1006
+ """ Add a constant or another NormalDist instance.
986
1007
987
1008
If *other* is a constant, translate mu by the constant,
988
1009
leaving sigma unchanged.
989
1010
990
1011
If *other* is a NormalDist, add both the means and the variances.
991
1012
Mathematically, this works only if the two distributions are
992
1013
independent or if they are jointly normally distributed.
993
- '''
1014
+ """
994
1015
if isinstance (x2 , NormalDist ):
995
1016
return NormalDist (x1 ._mu + x2 ._mu , hypot (x1 ._sigma , x2 ._sigma ))
996
1017
return NormalDist (x1 ._mu + x2 , x1 ._sigma )
997
1018
998
1019
def __sub__ (x1 , x2 ):
999
- ''' Subtract a constant or another NormalDist instance.
1020
+ """ Subtract a constant or another NormalDist instance.
1000
1021
1001
1022
If *other* is a constant, translate by the constant mu,
1002
1023
leaving sigma unchanged.
1003
1024
1004
1025
If *other* is a NormalDist, subtract the means and add the variances.
1005
1026
Mathematically, this works only if the two distributions are
1006
1027
independent or if they are jointly normally distributed.
1007
- '''
1028
+ """
1008
1029
if isinstance (x2 , NormalDist ):
1009
1030
return NormalDist (x1 ._mu - x2 ._mu , hypot (x1 ._sigma , x2 ._sigma ))
1010
1031
return NormalDist (x1 ._mu - x2 , x1 ._sigma )
1011
1032
1012
1033
def __mul__ (x1 , x2 ):
1013
- ''' Multiply both mu and sigma by a constant.
1034
+ """ Multiply both mu and sigma by a constant.
1014
1035
1015
1036
Used for rescaling, perhaps to change measurement units.
1016
1037
Sigma is scaled with the absolute value of the constant.
1017
- '''
1038
+ """
1018
1039
return NormalDist (x1 ._mu * x2 , x1 ._sigma * fabs (x2 ))
1019
1040
1020
1041
def __truediv__ (x1 , x2 ):
1021
- ''' Divide both mu and sigma by a constant.
1042
+ """ Divide both mu and sigma by a constant.
1022
1043
1023
1044
Used for rescaling, perhaps to change measurement units.
1024
1045
Sigma is scaled with the absolute value of the constant.
1025
- '''
1046
+ """
1026
1047
return NormalDist (x1 ._mu / x2 , x1 ._sigma / fabs (x2 ))
1027
1048
1028
1049
def __pos__ (x1 ):
1029
- ' Return a copy of the instance.'
1050
+ " Return a copy of the instance."
1030
1051
return NormalDist (x1 ._mu , x1 ._sigma )
1031
1052
1032
1053
def __neg__ (x1 ):
1033
- ' Negates mu while keeping sigma the same.'
1054
+ " Negates mu while keeping sigma the same."
1034
1055
return NormalDist (- x1 ._mu , x1 ._sigma )
1035
1056
1036
1057
__radd__ = __add__
1037
1058
1038
1059
def __rsub__ (x1 , x2 ):
1039
- ' Subtract a NormalDist from a constant or another NormalDist.'
1060
+ " Subtract a NormalDist from a constant or another NormalDist."
1040
1061
return - (x1 - x2 )
1041
1062
1042
1063
__rmul__ = __mul__
1043
1064
1044
1065
def __eq__ (x1 , x2 ):
1045
- ' Two NormalDist objects are equal if their mu and sigma are both equal.'
1066
+ " Two NormalDist objects are equal if their mu and sigma are both equal."
1046
1067
if not isinstance (x2 , NormalDist ):
1047
1068
return NotImplemented
1048
1069
return (x1 ._mu , x2 ._sigma ) == (x2 ._mu , x2 ._sigma )
1049
1070
1050
1071
def __hash__ (self ):
1051
- ' NormalDist objects hash equal if their mu and sigma are both equal.'
1072
+ " NormalDist objects hash equal if their mu and sigma are both equal."
1052
1073
return hash ((self ._mu , self ._sigma ))
1053
1074
1054
1075
def __repr__ (self ):
0 commit comments