36
36
import matplotlib .ticker as mticker
37
37
import matplotlib .transforms as mtransforms
38
38
import matplotlib .tri as mtri
39
-
40
39
from matplotlib .container import BarContainer , ErrorbarContainer , StemContainer
41
40
42
41
iterable = cbook .iterable
@@ -5470,14 +5469,15 @@ def xywhere(xs, ys, mask):
5470
5469
5471
5470
return errorbar_container # (l0, caplines, barcols)
5472
5471
5473
- def boxplot (self , x , notch = 0 , sym = 'b+' , vert = 1 , whis = 1.5 ,
5472
+ def boxplot (self , x , notch = False , sym = 'b+' , vert = True , whis = 1.5 ,
5474
5473
positions = None , widths = None , patch_artist = False ,
5475
- bootstrap = None ):
5474
+ bootstrap = None , usermedians = None , conf_intervals = None ):
5476
5475
"""
5477
5476
Call signature::
5478
5477
5479
- boxplot(x, notch=0, sym='+', vert=1, whis=1.5,
5480
- positions=None, widths=None, patch_artist=False)
5478
+ boxplot(x, notch=False, sym='+', vert=True, whis=1.5,
5479
+ positions=None, widths=None, patch_artist=False,
5480
+ bootstrap=None, usermedians=None, conf_intervals=None)
5481
5481
5482
5482
Make a box and whisker plot for each column of *x* or each
5483
5483
vector in sequence *x*. The box extends from the lower to
@@ -5490,59 +5490,110 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
5490
5490
*x* :
5491
5491
Array or a sequence of vectors.
5492
5492
5493
- *notch* : [ 0 (default) | 1 ]
5494
- If 0, produce a rectangular box plot.
5495
- If 1, produce a notched box plot
5493
+ *notch* : [ False (default) | True ]
5494
+ If False (default), produces a rectangular box plot.
5495
+ If True, will produce a notched box plot
5496
5496
5497
- *sym* :
5498
- (default 'b+') is the default symbol for flier points.
5497
+ *sym* : [ default 'b+' ]
5498
+ The default symbol for flier points.
5499
5499
Enter an empty string ('') if you don't want to show fliers.
5500
5500
5501
- *vert* : [1 (default) | 0]
5502
- If 1, make the boxes vertical.
5503
- If 0, make horizontal boxes. (Odd, but kept for compatibility
5504
- with MATLAB boxplots)
5501
+ *vert* : [ False | True (default) ]
5502
+ If True (default), makes the boxes vertical.
5503
+ If False, makes horizontal boxes.
5505
5504
5506
- *whis* : ( default 1.5)
5507
- Defines the length of the whiskers as
5508
- a function of the inner quartile range. They extend to the
5509
- most extreme data point within ( ``whis*(75%-25%)`` ) data range.
5505
+ *whis* : [ default 1.5 ]
5506
+ Defines the length of the whiskers as a function of the inner
5507
+ quartile range. They extend to the most extreme data point
5508
+ within ( ``whis*(75%-25%)`` ) data range.
5510
5509
5511
5510
*bootstrap* : [ *None* (default) | integer ]
5512
5511
Specifies whether to bootstrap the confidence intervals
5513
- around the median for notched boxplots. If *None*, no
5514
- bootstrapping is performed, and notches are calculated
5515
- using a Gaussian-based asymptotic approximation
5516
- (see McGill, R., Tukey, J.W., and Larsen, W.A.,
5517
- 1978, and Kendall and Stuart, 1967). Otherwise, bootstrap
5518
- specifies the number of times to bootstrap the median to
5519
- determine its 95% confidence intervals. Values between 1000
5520
- and 10000 are recommended.
5521
-
5522
- *positions* : (default 1,2,...,n)
5523
- Sets the horizontal positions of
5524
- the boxes. The ticks and limits are automatically set to match
5525
- the positions.
5526
-
5527
- *widths* : [ scalar | array ]
5528
- Either a scalar or a vector to set the width of each box.
5529
- The default is 0.5, or ``0.15*(distance between extreme
5530
- positions)`` if that is smaller.
5531
-
5532
- *patch_artist* : boolean
5533
- If *False* (default), produce boxes with the
5534
- :class:`~matplotlib.lines.Line2D` artist.
5535
- If *True*, produce boxes with the
5536
- :class:`~matplotlib.patches.Patch` artist.
5512
+ around the median for notched boxplots. If bootstrap==None,
5513
+ no bootstrapping is performed, and notches are calculated
5514
+ using a Gaussian-based asymptotic approximation (see McGill, R.,
5515
+ Tukey, J.W., and Larsen, W.A., 1978, and Kendall and Stuart,
5516
+ 1967). Otherwise, bootstrap specifies the number of times to
5517
+ bootstrap the median to determine it's 95% confidence intervals.
5518
+ Values between 1000 and 10000 are recommended.
5519
+
5520
+ *usermedians* : [ default None ]
5521
+ An array or sequence whose first dimension (or length) is
5522
+ compatible with *x*. This overrides the medians computed by
5523
+ matplotlib for each element of *usermedians* that is not None.
5524
+ When an element of *usermedians* == None, the median will be
5525
+ computed directly as normal.
5526
+
5527
+ *conf_intervals* : [ default None ]
5528
+ Array or sequence whose first dimension (or length) is compatible
5529
+ with *x* and whose second dimension is 2. When the current element
5530
+ of *conf_intervals* is not None, the notch locations computed by
5531
+ matplotlib are overridden (assuming notch is True). When an element of
5532
+ *conf_intervals* is None, boxplot compute notches the method
5533
+ specified by the other kwargs (e.g. *bootstrap*).
5534
+
5535
+ *positions* : [ default 1,2,...,n ]
5536
+ Sets the horizontal positions of the boxes. The ticks and limits
5537
+ are automatically set to match the positions.
5538
+
5539
+ *widths* : [ default 0.5 ]
5540
+ Either a scalar or a vector and sets the width of each box. The
5541
+ default is 0.5, or ``0.15*(distance between extreme positions)``
5542
+ if that is smaller.
5543
+
5544
+ *patch_artist* : [ False (default) | True ]
5545
+ If False produces boxes with the Line2D artist
5546
+ If True produces boxes with the Patch artist
5537
5547
5538
5548
Returns a dictionary mapping each component of the boxplot
5539
- to a list of the :class:`~matplotlib.lines.Line2D`
5540
- instances created (unless *patch_artist* was *True*. See above.).
5549
+ to a list of the :class:`matplotlib.lines.Line2D`
5550
+ instances created. That disctionary has the following keys
5551
+ (assuming vertical boxplots):
5552
+ boxes: the main body of the boxplot showing the quartiles
5553
+ and the median's confidence intervals if enabled.
5554
+ medians: horizonal lines at the median of each box.
5555
+ whiskers: the vertical lines extending to the most extreme,
5556
+ non-outlier data points.
5557
+ caps: the horizontal lines at the ends of the whiskers.
5558
+ fliers: points representing data that extend beyone the
5559
+ whiskers (outliers).
5560
+
5541
5561
5542
5562
**Example:**
5543
5563
5544
5564
.. plot:: pyplots/boxplot_demo.py
5545
5565
"""
5566
+ def bootstrapMedian (data , N = 5000 ):
5567
+ # determine 95% confidence intervals of the median
5568
+ M = len (data )
5569
+ percentile = [2.5 ,97.5 ]
5570
+ estimate = np .zeros (N )
5571
+ for n in range (N ):
5572
+ bsIndex = np .random .random_integers (0 ,M - 1 ,M )
5573
+ bsData = data [bsIndex ]
5574
+ estimate [n ] = mlab .prctile (bsData , 50 )
5575
+ CI = mlab .prctile (estimate , percentile )
5576
+ return CI
5577
+
5578
+ def computeConfInterval (data , med , iq , bootstrap ):
5579
+ if bootstrap is not None :
5580
+ # Do a bootstrap estimate of notch locations.
5581
+ # get conf. intervals around median
5582
+ CI = bootstrapMedian (data , N = bootstrap )
5583
+ notch_min = CI [0 ]
5584
+ notch_max = CI [1 ]
5585
+ else :
5586
+ # Estimate notch locations using Gaussian-based
5587
+ # asymptotic approximation.
5588
+ #
5589
+ # For discussion: McGill, R., Tukey, J.W.,
5590
+ # and Larsen, W.A. (1978) "Variations of
5591
+ # Boxplots", The American Statistician, 32:12-16.
5592
+ N = len (data )
5593
+ notch_min = med - 1.57 * iq / np .sqrt (N )
5594
+ notch_max = med + 1.57 * iq / np .sqrt (N )
5595
+ return notch_min , notch_max
5596
+
5546
5597
if not self ._hold : self .cla ()
5547
5598
holdStatus = self ._hold
5548
5599
whiskers , caps , boxes , medians , fliers = [], [], [], [], []
@@ -5568,6 +5619,38 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
5568
5619
x = [x ]
5569
5620
col = len (x )
5570
5621
5622
+ # sanitize user-input medians
5623
+ msg1 = "usermedians must either be a list/tuple or a 1d array"
5624
+ msg2 = "usermedians' length must be compatible with x"
5625
+ if usermedians is not None :
5626
+ if hasattr (usermedians , 'shape' ):
5627
+ if len (usermedians .shape ) != 1 :
5628
+ raise ValueError (msg1 )
5629
+ elif usermedians .shape [0 ] != col :
5630
+ raise ValueError (msg2 )
5631
+ elif len (usermedians ) != col :
5632
+ raise ValueError (msg2 )
5633
+
5634
+ #sanitize user-input confidence intervals
5635
+ msg1 = "conf_intervals must either be a list of tuples or a 2d array"
5636
+ msg2 = "conf_intervals' length must be compatible with x"
5637
+ msg3 = "each conf_interval, if specificied, must have two values"
5638
+ if conf_intervals is not None :
5639
+ if hasattr (conf_intervals , 'shape' ):
5640
+ if len (conf_intervals .shape ) != 2 :
5641
+ raise ValueError (msg1 )
5642
+ elif conf_intervals .shape [0 ] != col :
5643
+ raise ValueError (msg2 )
5644
+ elif conf_intervals .shape [1 ] == 2 :
5645
+ raise ValueError (msg3 )
5646
+ else :
5647
+ if len (conf_intervals ) != col :
5648
+ raise ValueError (msg2 )
5649
+ for ci in conf_intervals :
5650
+ if ci is not None and len (ci ) != 2 :
5651
+ raise ValueError (msg3 )
5652
+
5653
+
5571
5654
# get some plot info
5572
5655
if positions is None :
5573
5656
positions = range (1 , col + 1 )
@@ -5579,14 +5662,21 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
5579
5662
5580
5663
# loop through columns, adding each to plot
5581
5664
self .hold (True )
5582
- for i ,pos in enumerate (positions ):
5665
+ for i , pos in enumerate (positions ):
5583
5666
d = np .ravel (x [i ])
5584
5667
row = len (d )
5585
5668
if row == 0 :
5586
5669
# no data, skip this position
5587
5670
continue
5671
+
5588
5672
# get median and quartiles
5589
5673
q1 , med , q3 = mlab .prctile (d ,[25 ,50 ,75 ])
5674
+
5675
+ # replace with input medians if available
5676
+ if usermedians is not None :
5677
+ if usermedians [i ] is not None :
5678
+ med = usermedians [i ]
5679
+
5590
5680
# get high extreme
5591
5681
iq = q3 - q1
5592
5682
hi_val = q3 + whis * iq
@@ -5626,42 +5716,16 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
5626
5716
# get y location for median
5627
5717
med_y = [med , med ]
5628
5718
5629
- # calculate 'regular' plot
5630
- if notch == 0 :
5631
- # make our box vectors
5632
- box_x = [box_x_min , box_x_max , box_x_max , box_x_min , box_x_min ]
5633
- box_y = [q1 , q1 , q3 , q3 , q1 ]
5634
- # make our median line vectors
5635
- med_x = [box_x_min , box_x_max ]
5636
5719
# calculate 'notch' plot
5637
- else :
5638
- if bootstrap is not None :
5639
- # Do a bootstrap estimate of notch locations.
5640
- def bootstrapMedian (data , N = 5000 ):
5641
- # determine 95% confidence intervals of the median
5642
- M = len (data )
5643
- percentile = [2.5 ,97.5 ]
5644
- estimate = np .zeros (N )
5645
- for n in range (N ):
5646
- bsIndex = np .random .random_integers (0 ,M - 1 ,M )
5647
- bsData = data [bsIndex ]
5648
- estimate [n ] = mlab .prctile (bsData , 50 )
5649
- CI = mlab .prctile (estimate , percentile )
5650
- return CI
5651
-
5652
- # get conf. intervals around median
5653
- CI = bootstrapMedian (d , N = bootstrap )
5654
- notch_max = CI [1 ]
5655
- notch_min = CI [0 ]
5720
+ if notch :
5721
+ # conf. intervals from user, if available
5722
+ if conf_intervals is not None and conf_intervals [i ] is not None :
5723
+ notch_max = np .max (conf_intervals [i ])
5724
+ notch_min = np .min (conf_intervals [i ])
5656
5725
else :
5657
- # Estimate notch locations using Gaussian-based
5658
- # asymptotic approximation.
5659
- #
5660
- # For discussion: McGill, R., Tukey, J.W.,
5661
- # and Larsen, W.A. (1978) "Variations of
5662
- # Boxplots", The American Statistician, 32:12-16.
5663
- notch_max = med + 1.57 * iq / np .sqrt (row )
5664
- notch_min = med - 1.57 * iq / np .sqrt (row )
5726
+ notch_min , notch_max = computeConfInterval (d , med , iq ,
5727
+ bootstrap )
5728
+
5665
5729
# make our notched box vectors
5666
5730
box_x = [box_x_min , box_x_max , box_x_max , cap_x_max , box_x_max ,
5667
5731
box_x_max , box_x_min , box_x_min , cap_x_min , box_x_min ,
@@ -5671,6 +5735,13 @@ def bootstrapMedian(data, N=5000):
5671
5735
# make our median line vectors
5672
5736
med_x = [cap_x_min , cap_x_max ]
5673
5737
med_y = [med , med ]
5738
+ # calculate 'regular' plot
5739
+ else :
5740
+ # make our box vectors
5741
+ box_x = [box_x_min , box_x_max , box_x_max , box_x_min , box_x_min ]
5742
+ box_y = [q1 , q1 , q3 , q3 , q1 ]
5743
+ # make our median line vectors
5744
+ med_x = [box_x_min , box_x_max ]
5674
5745
5675
5746
def to_vc (xs ,ys ):
5676
5747
# convert arguments to verts and codes
@@ -5720,12 +5791,13 @@ def dopatch(xs,ys):
5720
5791
boxes .extend (dopatch (box_x , box_y ))
5721
5792
else :
5722
5793
boxes .extend (doplot (box_x , box_y , 'b-' ))
5794
+
5723
5795
medians .extend (doplot (med_x , med_y , median_color + '-' ))
5724
5796
fliers .extend (doplot (flier_hi_x , flier_hi , sym ,
5725
5797
flier_lo_x , flier_lo , sym ))
5726
5798
5727
5799
# fix our axes/ticks up a little
5728
- if 1 == vert :
5800
+ if vert :
5729
5801
setticks , setlim = self .set_xticks , self .set_xlim
5730
5802
else :
5731
5803
setticks , setlim = self .set_yticks , self .set_ylim
0 commit comments