8000 Cleanup stock sample data. · matplotlib/matplotlib@c7b1b3c · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit c7b1b3c

Browse files
committed
Cleanup stock sample data.
AAPL and INTC are no longer required without stock_demo.py, plus there are 3 copies of AAPL data. Convert datetime.date to np.datetime64 to increase portability of the files.
1 parent 4dd8b7b commit c7b1b3c

File tree

14 files changed

+49
-6134
lines changed

14 files changed

+49
-6134
lines changed

doc/api/api_changes.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,20 @@ or keyword argument (``hold=False``) to change that behavior,
5050
explicitly clear the axes or figure as needed prior to subsequent
5151
plotting commands.
5252

53+
Cleanup of stock sample data
54+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55+
56+
The sample data of stocks has been cleaned up to remove redundancies and
57+
increase portability. The ``AAPL.dat.gz``, ``INTC.dat.gz`` and ``aapl.csv``
58+
files have been removed entirely and will also no longer be available from
59+
`matplotlib.cbook.get_sample_data`. If a CSV file is required, we suggest using
60+
the ``msft.csv`` that continues to be shipped in the sample data. If a NumPy
61+
binary file is acceptable, we suggest using one of the following two new files.
62+
The ``aapl.npy.gz`` and ``goog.npy`` files have been replaced by ``aapl.npz``
63+
and ``goog.npz``, wherein the first column's type has changed from
64+
`datetime.date` to `np.datetime64` for better portability across Python
65+
versions. Note that matplotlib does not fully support `np.datetime64` as yet.
66+
5367

5468
`Artist.update` has return value
5569
--------------------------------

examples/api/date_demo.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,26 @@
2424
months = mdates.MonthLocator() # every month
2525
yearsFmt = mdates.DateFormatter('%Y')
2626

27-
# load a numpy record array from yahoo csv data with fields date,
28-
# open, close, volume, adj_close from the mpl-data/example directory.
29-
# The record array stores python datetime.date as an object array in
30-
# the date column
31-
datafile = cbook.get_sample_data('goog.npy')
32-
try:
33-
# Python3 cannot load python2 .npy files with datetime(object) arrays
34-
# unless the encoding is set to bytes. However this option was
35-
# not added until numpy 1.10 so this example will only work with
36-
# python 2 or with numpy 1.10 and later.
37-
r = np.load(datafile, encoding='bytes').view(np.recarray)
38-
except TypeError:
39-
r = np.load(datafile).view(np.recarray)
27+
# Load a numpy record array from yahoo csv data with fields date, open, close,
28+
# volume, adj_close from the mpl-data/example directory. The record array
29+
# stores the date as an np.datetime64 with a day unit ('D') in the date column.
30+
with cbook.get_sample_data('goog.npz') as datafile:
31+
r = np.load(datafile)['price_data'].view(np.recarray)
32+
# Matplotlib works better with datetime.datetime than np.datetime64, but the
33+
# latter is more portable.
34+
date = r.date.astype('O')
4035

4136
fig, ax = plt.subplots()
42-
ax.plot(r.date, r.adj_close)
37+
ax.plot(date, r.adj_close)
4338

4439

4540
# format the ticks
4641
ax.xaxis.set_major_locator(years)
4742
ax.xaxis.set_major_formatter(yearsFmt)
4843
ax.xaxis.set_minor_locator(months)
4944

50-
datemin = datetime.date(r.date.min().year, 1, 1)
51-
datemax = datetime.date(r.date.max().year + 1, 1, 1)
45+
datemin = datetime.date(date.min().year, 1, 1)
46+
datemax = datetime.date(date.max().year + 1, 1, 1)
5247
ax.set_xlim(datemin, datemax)
5348

5449

examples/api/date_index_formatter.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,23 @@
1010
from __future__ import print_function
1111
import numpy as np
1212
import matplotlib.pyplot as plt
13-
import matplotlib.mlab as mlab
1413
import matplotlib.cbook as cbook
1514
import matplotlib.ticker as ticker
1615

17-
datafile = cbook.get_sample_data('aapl.csv', asfileobj=False)
18-
print('loading %s' % datafile)
19-
r = mlab.csv2rec(datafile)
20-
21-
r.sort()
16+
# Load a numpy record array from yahoo csv data with fields date, open, close,
17+
# volume, adj_close from the mpl-data/example directory. The record array
18+
# stores the date as an np.datetime64 with a day unit ('D') in the date column.
19+
with cbook.get_sample_data('goog.npz') as datafile:
20+
r = np.load(datafile)['price_data'].view(np.recarray)
2221
r = r[-30:] # get the last 30 days
23-
22+
# Matplotlib works better with datetime.datetime than np.datetime64, but the
23+
# latter is more portable.
24+
date = r.date.astype('O')
2425

2526
# first we'll do it the default way, with gaps on weekends
2627
fig, axes = plt.subplots(ncols=2, figsize=(8, 4))
2728
ax = axes[0]
28-
ax.plot(r.date, r.adj_close, 'o-')
29+
ax.plot(date, r.adj_close, 'o-')
2930
ax.set_title("Default")
3031
fig.autofmt_xdate()
3132

@@ -36,7 +37,7 @@
3637

3738
def format_date(x, pos=None):
3839
thisind = np.clip(int(x + 0.5), 0, N - 1)
39-
return r.date[thisind].strftime('%Y-%m-%d')
40+
return date[thisind].strftime('%Y-%m-%d')
4041

4142
ax = axes[1]
4243
ax.plot(ind, r.adj_close, 'o-')

examples/misc/rec_groupby_demo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import matplotlib.mlab as mlab
44
import matplotlib.cbook as cbook
55

6-
datafile = cbook.get_sample_data('aapl.csv', asfileobj=False)
6+
datafile = cbook.get_sample_data('msft.csv', asfileobj=False)
77
print('loading', datafile)
88
r = mlab.csv2rec(datafile)
99
r.sort()

examples/misc/rec_join_demo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import matplotlib.mlab as mlab
44
import matplotlib.cbook as cbook
55

6-
datafile = cbook.get_sample_data('aapl.csv', asfileobj=False)
6+
datafile = cbook.get_sample_data('msft.csv', asfileobj=False)
77
print('loading', datafile)
88
r = mlab.csv2rec(datafile)
99

examples/pylab_examples/centered_ticklabels.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,15 @@
1919
import matplotlib.pyplot as plt
2020

2121
# load some financial data; apple's stock price
22-
fh = cbook.get_sample_data('aapl.npy.gz')
23-
try:
24-
# Python3 cannot load python2 .npy files with datetime(object) arrays
25-
# unless the encoding is set to bytes. However this option was
26-
# not added until numpy 1.10 so this example will only work with
27-
# python 2 or with numpy 1.10 and later.
28-
r = np.load(fh, encoding='bytes')
29-
except TypeError:
30-
r = np.load(fh)
31-
fh.close()
22+
with cbook.get_sample_data('aapl.npz') as fh:
23+
r = np.load(fh)['price_data'].view(np.recarray)
3224
r = r[-250:] # get the last 250 days
25+
# Matplotlib works better with datetime.datetime than np.datetime64, but the
26+
# latter is more portable.
27+
date = r.date.astype('O')
3328

3429
fig, ax = plt.subplots()
35-
ax.plot(r.date, r.adj_close)
30+
ax.plot(date, r.adj_close)
3631

3732
ax.xaxis.set_major_locator(dates.MonthLocator())
3833
ax.xaxis.set_minor_locator(dates.MonthLocator(bymonthday=15))
@@ -46,5 +41,5 @@
4641
tick.label1.set_horizontalalignment('center')
4742

4843
imid = len(r)//2
49-
ax.set_xlabel(str(r.date[imid].year))
44+
ax.set_xlabel(str(date[imid].year))
5045
plt.show()

examples/pylab_examples/scatter_demo2.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,11 @@
55
import matplotlib.pyplot as plt
66
import matplotlib.cbook as cbook
77

8-
# Load a numpy record array from yahoo csv data with fields date,
9-
# open, close, volume, adj_close from the mpl-data/example directory.
10-
# The record array stores python datetime.date as an object array in
11-
# the date column
12-
datafile = cbook.get_sample_data('goog.npy')
13-
try:
14-
# Python3 cannot load python2 .npy files with datetime(object) arrays
15-
# unless the encoding is set to bytes. However this option was
16-
# not added until numpy 1.10 so this example will only work with
17-
# python 2 or with numpy 1.10 and later
18-
price_data = np.load(datafile, encoding='bytes').view(np.recarray)
19-
except TypeError:
20-
price_data = np.load(datafile).view(np.recarray)
8+
# Load a numpy record array from yahoo csv data with fields date, open, close,
9+
# volume, adj_close from the mpl-data/example directory. The record array
10+
# stores the date as an np.datetime64 with a day unit ('D') in the date column.
11+
with cbook.get_sample_data('goog.npz') as datafile:
12+
price_data = np.load(datafile)['price_data'].view(np.recarray)
2113
price_data = price_data[-250:] # get the most recent 250 trading days
2214

2315
delta1 = np.diff(price_data.adj_close)/price_data.adj_close[:-1]
Binary file not shown.
-85.6 KB
Binary file not shown.

0 commit comments

Comments
 (0)
0