8000 Merge pull request #16288 from anntzer/csv2rec · matplotlib/matplotlib@d14ad5f · GitHub
[go: up one dir, main page]

Skip to content

Commit d14ad5f

Browse files
authored
Merge pull request #16288 from anntzer/csv2rec
Remove the private, unused _csv2rec.
2 parents 8971ab0 + 0efaba6 commit d14ad5f

File tree

4 files changed

+8
-362
lines changed

4 files changed

+8
-362
lines changed

doc/faq/howto_faq.rst

Lines changed: 7 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -336,37 +336,13 @@ setting in the right subplots.
336336
Skip dates where there is no data
337337
---------------------------------
338338

339-
When plotting time series, e.g., financial time series, one often wants
340-
to leave out days on which there is no data, e.g., weekends. By passing
341-
in dates on the x-xaxis, you get large horizontal gaps on periods when
342-
there is not data. The solution is to pass in some proxy x-data, e.g.,
343-
evenly sampled indices, and then use a custom formatter to format
344-
these as dates. The example below shows how to use an 'index formatter'
345-
to achieve the desired plot::
346-
347-
import numpy as np
348-
import matplotlib.pyplot as plt
349-
import matplotlib.mlab as mlab
350-
import matplotlib.ticker as ticker
351-
352-
r = mlab.csv2rec('../data/aapl.csv')
353-
r.sort()
354-
r = r[-30:] # get the last 30 days
355-
356-
N = len(r)
357-
ind = np.arange(N) # the evenly spaced plot indices
358-
359-
def format_date(x, pos=None):
360-
thisind = np.clip(int(x+0.5), 0, N-1)
361-
return r.date[thisind].strftime('%Y-%m-%d')
362-
363-
fig = plt.figure()
364-
ax = fig.add_subplot(111)
365-
ax.plot(ind, r.adj_close, 'o-')
366-
ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_date))
367-
fig.autofmt_xdate()
368-
369-
plt.show()
339+
When plotting time series, e.g., financial time series, one often wants to
340+
leave out days on which there is no data, e.g., weekends. By passing in
341+
dates on the x-xaxis, you get large horizontal gaps on periods when there
342+
is not data. The solution is to pass in some proxy x-data, e.g., evenly
343+
sampled indices, and then use a custom formatter to format these as dates.
344+
:doc:`/gallery/text_labels_and_annotations/date_index_formatter` demonstrates
345+
how to use an 'index formatter' to achieve the desired plot.
370346

371347
.. _howto-set-zorder:
372348

lib/matplotlib/mlab.py

Lines changed: 0 additions & 281 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
Apply a window along a given axis
5454
"""
5555

56-
import csv
5756
import functools
5857
from numbers import Number
5958

@@ -985,286 +984,6 @@ def cohere(x, y, NFFT=256, Fs=2, detrend=detrend_none, window=window_hanning,
985984
return Cxy, f
986985

987986

988-
def _csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',',
989-
converterd=None, names=None, missing='', missingd=None,
990-
use_mrecords=False, dayfirst=False, yearfirst=False):
991-
"""
992-
Load data from comma/space/tab delimited file in *fname* into a
993-
numpy record array and return the record array.
994-
995-
If *names* is *None*, a header row is required to automatically
996-
assign the recarray names. The headers will be lower cased,
997-
spaces will be converted to underscores, and illegal attribute
998-
name characters removed. If *names* is not *None*, it is a
999-
sequence of names to use for the column names. In this case, it
1000-
is assumed there is no header row.
1001-
1002-
1003-
- *fname*: can be a filename or a file handle. Support for gzipped
1004-
files is automatic, if the filename ends in '.gz'
1005-
1006-
- *comments*: the character used to indicate the start of a comment
1007-
in the file, or *None* to switch off the removal of comments
1008-
1009-
- *skiprows*: is the number of rows from the top to skip
1010-
1011-
- *checkrows*: is the number of rows to check to validate the column
1012-
data type. When set to zero all rows are validated.
1013-
1014-
- *converterd*: if not *None*, is a dictionary mapping column number or
1015-
munged column name to a converter function.
1016-
1017-
- *names*: if not None, is a list of header names. In this case, no
1018-
header will be read from the file
1019-
1020-
- *missingd* is a dictionary mapping munged column names to field values
1021-
which signify that the field does not contain actual data and should
1022-
be masked, e.g., '0000-00-00' or 'unused'
1023-
1024-
- *missing*: a string whose value signals a missing field regardless of
1025-
the column it appears in
1026-
1027-
- *use_mrecords*: if True, return an mrecords.fromrecords record array if
1028-
any of the data are missing
1029-
1030-
- *dayfirst*: default is False so that MM-DD-YY has precedence over
1031-
DD-MM-YY. See
1032-
http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
1033-
for further information.
1034-
1035-
- *yearfirst*: default is False so that MM-DD-YY has precedence over
1036-
YY-MM-DD. See
1037-
http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
1038-
for further information.
1039-
1040-
If no rows are found, *None* is returned
1041-
"""
1042-
1043-
if converterd is None:
1044-
converterd = dict()
1045-
1046-
if missingd is None:
1047-
missingd = {}
1048-
1049-
import dateutil.parser
1050-
import datetime
1051-
1052-
fh = cbook.to_filehandle(fname)
1053-
1054-
delimiter = str(delimiter)
1055-
1056-
class FH:
1057-
"""
1058-
For space-delimited files, we want different behavior than
1059-
comma or tab. Generally, we want multiple spaces to be
1060-
treated as a single separator, whereas with comma and tab we
1061-
want multiple commas to return multiple (empty) fields. The
1062-
join/strip trick below effects this.
1063-
"""
1064-
def __init__(self, fh):
1065-
self.fh = fh
1066-
1067-
def close(self):
1068-
self.fh.close()
1069-
1070-
def seek(self, arg):
1071-
self.fh.seek(arg)
1072-
1073-
def fix(self, s):
1074-
return ' '.join(s.split())
1075-
1076-
def __next__(self):
1077-
return self.fix(next(self.fh))
1078-
1079-
def __iter__(self):
1080-
for line in self.fh:
1081-
yield self.fix(line)
1082-
1083-
if delimiter == ' ':
1084-
fh = FH(fh)
1085-
1086-
reader = csv.reader(fh, delimiter=delimiter)
1087-
1088-
def process_skiprows(reader):
1089-
if skiprows:
1090-
for i, row in enumerate(reader):
1091-
if i >= (skiprows-1):
1092-
break
1093-
1094-
return fh, reader
1095-
1096-
process_skiprows(reader)
1097-
1098-
def ismissing(name, val):
1099-
"""Return whether the value val in column name should be masked."""
1100-
return val == missing or val == missingd.get(name) or val == ''
1101-
1102-
def with_default_value(func, default):
1103-
def newfunc(name, val):
1104-
if ismissing(name, val):
1105-
return default
1106-
else:
1107-
return func(val)
1108-
return newfunc
1109-
1110-
def mybool(x):
1111-
if x == 'True':
1112-
return True
1113-
elif x == 'False':
1114-
return False
1115-
else:
1116-
raise ValueError('invalid bool')
1117-
1118-
dateparser = dateutil.parser.parse
1119-
1120-
def mydateparser(x):
1121-
# try and return a datetime object
1122-
d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst)
1123-
return d
1124-
1125-
mydateparser = with_default_value(mydateparser, datetime.datetime(1, 1, 1))
1126-
1127-
myfloat = with_default_value(float, np.nan)
1128-
myint = with_default_value(int, -1)
1129-
mystr = with_default_value(str, '')
1130-
mybool = with_default_value(mybool, None)
1131-
1132-
def mydate(x):
1133-
# try and return a date object
1134-
d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst)
1135-
1136-
if d.hour > 0 or d.minute > 0 or d.second > 0:
1137-
raise ValueError('not a date')
1138-
return d.date()
1139-
mydate = with_default_value(mydate, datetime.date(1, 1, 1))
1140-
1141-
def get_func(name, item, func):
1142-
# promote functions in this order
1143-
funcs = [mybool, myint, myfloat, mydate, mydateparser, mystr]
1144-
for func in funcs[funcs.index(func):]:
1145-
try:
1146-
func(name, item)
1147-
except Exception:
1148-
continue
1149-
return func
1150-
raise ValueError('Could not find a working conversion function')
1151-
1152-
# map column names that clash with builtins -- TODO - extend this list
1153-
itemd = {
1154-
'return': 'return_',
1155-
'file': 'file_',
1156-
'print': 'print_',
1157-
}
1158-
1159-
def get_converters(reader, comments):
1160-
1161-
converters = None
1162-
i = 0
1163-
for row in reader:
1164-
if (len(row) and comments is not None and
1165-
row[0].startswith(comments)):
1166-
continue
1167-
if i == 0:
1168-
converters = [mybool]*len(row)
1169-
if checkrows and i > checkrows:
1170-
break
1171-
i += 1
1172-
1173-
for j, (name, item) in enumerate(zip(names, row)):
1174-
func = converterd.get(j)
1175-
if func is None:
1176-
func = converterd.get(name)
1177-
if func is None:
1178-
func = converters[j]
1179-
if len(item.strip()):
1180-
func = get_func(name, item, func)
1181-
else:
1182-
# how should we handle custom converters and defaults?
1183-
func = with_default_value(func, None)
1184-
converters[j] = func
1185-
return converters
1186-
1187-
# Get header and remove invalid characters
1188-
needheader = names is None
1189-
1190-
if needheader:
1191-
for row in reader:
1192-
if (len(row) and comments is not None and
1193-
row[0].startswith(comments)):
1194-
continue
1195-
headers = row
1196-
break
1197-
1198-
# remove these chars
1199-
delete = set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""")
1200-
delete.add('"')
1201-
1202-
names = []
1203-
seen = dict()
1204-
for i, item in enumerate(headers):
1205-
item = item.strip().lower().replace(' ', '_')
1206-
item = ''.join([c for c in item if c not in delete])
1207-
if not len(item):
1208-
item = 'column%d' % i
1209-
1210-
item = itemd.get(item, item)
1211-
cnt = seen.get(item, 0)
1212-
if cnt > 0:
1213-
names.append(item + '_%d' % cnt)
1214-
else:
1215-
names.append(item)
1216-
seen[item] = cnt+1
1217-
1218-
else:
1219-
if isinstance(names, str):
1220-
names = [n.strip() for n in names.split(',')]
1221-
1222-
# get the converter functions by inspecting checkrows
1223-
converters = get_converters(reader, comments)
1224-
if converters is None:
1225-
raise ValueError('Could not find any valid data in CSV file')
1226-
1227-
# reset the reader and start over
1228-
fh.seek(0)
1229-
reader = csv.reader(fh, delimiter=delimiter)
1230-
process_skiprows(reader)
1231-
1232-
if needheader:
1233-
while True:
1234-
# skip past any comments and consume one line of column header
1235-
row = next(reader)
1236-
if (len(row) and comments is not None and
1237-
row[0].startswith(comments)):
1238-
continue
1239-
break
1240-
1241-
# iterate over the remaining rows and convert the data to date
1242-
# objects, ints, or floats as appropriate
1243-
rows = []
1244-
rowmasks = []
1245-
for i, row in enumerate(reader):
1246-
if not len(row):
1247-
continue
1248-
if comments is not None and row[0].startswith(comments):
1249-
continue
1250-
# Ensure that the row returned always has the same nr of elements
1251-
row.extend([''] * (len(converters) - len(row)))
1252-
rows.append([func(name, val)
1253-
for func, name, val in zip(converters, names, row)])
1254-
rowmasks.append([ismissing(name, val)
1255-
for name, val in zip(names, row)])
1256-
fh.close()
1257-
1258-
if not len(rows):
1259-
return None
1260-
1261-
if use_mrecords and np.any(rowmasks):
1262-
r = np.ma.mrecords.fromrecords(rows, names=names, mask=rowmasks)
1263-
else:
1264-
r = np.rec.fromrecords(rows, names=names)
1265-
return r
1266-
1267-
1268987
class GaussianKDE:
1269988
"""
1270989
Representation of a kernel-density estimate using Gaussian kernels.

lib/matplotlib/pyplot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
from matplotlib.artist import Artist
4646
from matplotlib.axes import Axes, Subplot
4747
from matplotlib.projections import PolarAxes
48-
from matplotlib import mlab # for _csv2rec, detrend_none, window_hanning
48+
from matplotlib import mlab # for detrend_none, window_hanning
4949
from matplotlib.scale import get_scale_docs, get_scale_names
5050

5151
from matplotlib import cm

0 commit comments

Comments
 (0)
0