8000 DOC: MEP12 docstring for the histogram examples · matplotlib/matplotlib@bb8143a · GitHub
[go: up one dir, main page]

Skip to content

Commit bb8143a

Browse files
committed
DOC: MEP12 docstring for the histogram examples
1 parent 2ecdc6b commit bb8143a

File tree

5 files changed

+99
-43
lines changed

5 files changed

+99
-43
lines changed
Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,64 @@
1-
"""
2-
Demo of the histogram (hist) function used to plot a cumulative distribution.
1+
"""Demo of using histograms to plot a cumulative distribution.
2+
3+
This shows how to plot a cumulative, normalized histogram as a
4+
step function as means of visualization the empirical cumulative
5+
distribution function (CDF) of a sample. We also use the `mlab`
6+
module to show the theoretical CDF.
7+
8+
A couple of other options to the `hist` function are demostrated.
9+
Namely, we use the `normed` parameter to normalize the histogram and
10+
a couple of different options to the `cumulative` parameter. Normalizing
11+
a histogram means that the counts within each bin are scaled such that
12+
the total height of each bin sum up to 1. Since we're showing the
13+
cumulative histogram, the max value at the end of the series is 1.
14+
The `normed` parameter takes a boolean value.
15+
16+
The `cumulative` kwarg is a little more nuanced. Like `normed`, you can
17+
pass it True or False, but you can also pass it -1 and that will
18+
reverse the distribution. In engineering, CDFs where `cumulative` is
19+
simply True are sometimes "non-excedance" curves. In other words, you
20+
can look at the y-value to set the probability of excedance. For example
21+
the value of 225 on the x-axis corresponse to about 0.85 on the y-axis,
22+
so there's an 85% chance that an observation in the sames does not
23+
exceed 225.
24+
25+
Conversely, setting, `cumulative` to -1 as is done in the last series
26+
for this example, creates a "excedance" curve.
327
428
"""
29+
530
import numpy as np
631
import matplotlib.pyplot as plt
732
from matplotlib import mlab
833

34+
np.random.seed(0)
935

1036
mu = 200
1137
sigma = 25
1238
n_bins = 50
13-
x = mu + sigma*np.random.randn(10000)
39+
x = mu + sigma*np.random.randn(100)
1440

15-
n, bins, patches = plt.hist(x, n_bins, normed=1,
16-
histtype='step', cumulative=True)
41+
fig, ax = plt.subplots(figsize=(8, 4))
42+
43+
# plot the cumulative histograme
44+
n, bins, patches = ax.hist(x, n_bins, normed=1, histtype='step',
45+
cumulative=True, label='Empirical')
1746

1847
# Add a line showing the expected distribution.
1948
y = mlab.normpdf(bins, mu, sigma).cumsum()
2049
y /= y[-1]
21-
plt.plot(bins, y, 'k--', linewidth=1.5)
50+
51+
ax.plot(bins, y, 'k--', linewidth=1.5, label='Theoretical')
2252

2353
# Overlay a reversed cumulative histogram.
24-
plt.hist(x, bins=bins, normed=1, histtype='step', cumulative=-1)
54+
ax.hist(x, bins=bins, normed=1, histtype='step', cumulative=-1,
55+
label='Reversed emp.')
2556

26-
plt.grid(True)
27-
plt.title('cumulative step')
57+
# tidy up the figure
58+
ax.grid(True)
59+
ax.legend(loc='right')
60+
ax.set_title('Cumulative step histograms')
61+
ax.set_xlabel('Annual rainfall (mm)')
62+
ax.set_ylabel('Likelihood of occurance')
2863

2964
plt.show()
Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,41 @@
1-
"""
2-
Demo of the histogram (hist) function with a few features.
1+
"""Demo of the histogram (hist) function with a few features.
32
4-
In addition to the basic histogram, this demo shows a few optional features:
3+
In addition to the basic histogram, this demo shows a few optional
4+
features:
55
66
* Setting the number of data bins
7-
* The ``normed`` flag, which normalizes bin heights so that the integral of
8-
the histogram is 1. The resulting histogram is a probability density.
7+
* The ``normed`` flag, which normalizes bin heights so that the
8+
integral of the histogram is 1. The resulting histogram is an
9+
approximation of the probability density function.
910
* Setting the face color of the bars
1011
* Setting the opacity (alpha value).
11-
1212
"""
13+
1314
import numpy as np
1415
import matplotlib.mlab as mlab
1516
import matplotlib.pyplot as plt
1617

18+
np.random.seed(0)
1719

1820
# example data
1921
mu = 100 # mean of distribution
2022
sigma = 15 # standard deviation of distribution
21-
x = mu + sigma * np.random.randn(10000)
23+
x = mu + sigma * np.random.randn(437)
2224

2325
num_bins = 50
26+
27+
fig, ax = plt.subplots()
28+
2429
# the histogram of the data
25-
n, bins, patches = plt.hist(x, num_bins, normed=1)
30+
n, bins, patches = ax.hist(x, num_bins, normed=1)
31+
2632
# add a 'best fit' line
2733
y = mlab.normpdf(bins, mu, sigma)
28-
plt.plot(bins, y, '--')
29-
plt.xlabel('Smarts')
30-
plt.ylabel('Probability density')
31-
plt.title(r'Histogram of IQ: $\mu=100$, $\sigma=15$')
34+
ax.plot(bins, y, '--')
35+
ax.set_xlabel('Smarts')
36+
ax.set_ylabel('Probability density')
37+
ax.set_title(r'Histogram of IQ: $\mu=100$, $\sigma=15$')
3238

3339
# Tweak spacing to prevent clipping of ylabel
34-
plt.subplots_adjust(left=0.15)
40+
fig.tight_layout()
3541
plt.show()
Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
1-
"""
2-
Demo of the histogram (hist) function with different ``histtype`` settings.
1+
"""Demo of the histogram function's different ``histtype`` settings.
32
43
* Histogram with step curve that has a color fill.
5-
* Histogram with with unequal bin widths.
4+
* Histogram with custom and unequal bin widths.
65
76
"""
7+
88
import numpy as np
99
import matplotlib.pyplot as plt
1010

11+
np.random.seed(0)
1112

1213
mu = 200
1314
sigma = 25
14-
x = mu + sigma*np.random.randn(10000)
15+
x = np.random.normal(mu, sigma, size=100)
1516

1617
fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(8, 4))
1718

@@ -23,5 +24,5 @@
2324
ax1.hist(x, bins, normed=1, histtype='bar', rwidth=0.8)
2425
ax1.set_title('unequal bins')
2526

26-
plt.tight_layout()
27+
fig.tight_layout()
2728
plt.show()
Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,23 @@
1-
"""
2-
Demo of the histogram (hist) function with multiple data sets.
1+
"""Demo of the histogram (hist) function with multiple data sets.
32
43
Plot histogram with multiple sample sets and demonstrate:
54
65
* Use of legend with multiple sample sets
76
* Stacked bars
8-
* Step curve with a color fill
7+
* Step curve with no fill
98
* Data sets of different sample sizes
109
"""
10+
1111
import numpy as np
1212
import matplotlib.pyplot as plt
1313

14+
np.random.seed(0)
1415

1516
n_bins = 10
1617
x = np.random.randn(1000, 3)
1718

1819
fig, axes = plt.subplots(nrows=2, ncols=2)
19-
ax0, ax1, ax2, ax3 = axes.flat
20+
ax0, ax1, ax2, ax3 = axes.flatten()
2021

2122
colors = ['red', 'tan', 'lime']
2223
ax0.hist(x, n_bins, normed=1, histtype='bar', color=colors, label=colors)
@@ -26,13 +27,13 @@
2627
ax1.hist(x, n_bins, normed=1, histtype='bar', stacked=True)
2728
ax1.set_title('stacked bar')
2829

29-
ax2.hist(x, n_bins, histtype='step', stacked=True, fill=True)
30-
ax2.set_title('stepfilled')
30+
ax2.hist(x, n_bins, histtype='step', stacked=True, fill=False)
31+
ax2.set_title('step (unfilled)')
3132

3233
# Make a multiple-histogram of data-sets with different length.
3334
x_multi = [np.random.randn(n) for n in [10000, 5000, 2000]]
3435
ax3.hist(x_multi, n_bins, histtype='bar')
3536
ax3.set_title('different sample sizes')
3637

37-
plt.tight_layout()
38+
fig.tight_layout()
3839
plt.show()

examples/statistics/multiple_histograms_side_by_side.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,49 @@
1-
2-
"""
3-
Demo of how to produce multiple histograms side by side
1+
"""Demo of how to produce multiple histograms side by side
2+
3+
This example plots horizonal histograms of different samples along
4+
a categorical x-axis. Additionally, the histograms are plotted to
5+
be symmetrical about their x-position, thus making them very similar
6+
to violin plots.
7+
8+
To make this highly specialized plot, we can't use the standard `hist`
9+
method. Instead we use `barh` to draw the horizontal bars directly. The
10+
vertical positions and lengths of the bars are computed via the
11+
`np.histogram` function. The histograms for all the samples are
12+
computed using the same range (min and max values) and number of bins,
13+
so that the bins for each sample are in the same vertical positions.
414
"""
515

616
import numpy as np
717
import matplotlib.pyplot as plt
818

19+
np.random.seed(0)
920
number_of_bins = 20
1021

1122
# An example of three data sets to compare
12-
number_of_data_points = 1000
23+
number_of_data_points = 387
1324
labels = ["A", "B", "C"]
1425
data_sets = [np.random.normal(0, 1, number_of_data_points),
1526
np.random.normal(6, 1, number_of_data_points),
1627
np.random.normal(-3, 1, number_of_data_points)]
1728

1829
# Computed quantities to aid plotting
1930
hist_range = (np.min(data_sets), np.max(data_sets))
20-
binned_data_sets = [np.histogram(d, range=hist_range, bins=number_of_bins)[0]
21-
for d in data_sets]
31+
binned_data_sets = [
32+
np.histogram(d, range=hist_range, bins=number_of_bins)[0]
33+
for d in data_sets
34+
]
2235
binned_maximums = np.max(binned_data_sets, axis=1)
2336
x_locations = np.arange(0, sum(binned_maximums), np.max(binned_maximums))
2437

2538
# The bin_edges are the same for all of the histograms
2639
bin_edges = np.linspace(hist_range[0], hist_range[1], number_of_bins + 1)
27-
centers = .5 * (bin_edges + np.roll(bin_edges, 1))[:-1]
40+
centers = 0.5 * (bin_edges + np.roll(bin_edges, 1))[:-1]
2841
heights = np.diff(bin_edges)
2942

3043
# Cycle through and plot each histogram
31-
ax = plt.subplot(111)
44+
fig, ax = plt.subplots()
3245
for x_loc, binned_data in zip(x_locations, binned_data_sets):
33-
lefts = x_loc - .5 * binned_data
46+
lefts = x_loc - 0.5 * binned_data
3447
ax.barh(centers, binned_data, height=heights, left=lefts)
3548

3649
ax.set_xticks(x_locations)

0 commit comments

Comments
 (0)
0