|
53 | 53 | Apply a window along a given axis
|
54 | 54 | """
|
55 | 55 |
|
56 |
| -import csv |
57 | 56 | import functools
|
58 | 57 | from numbers import Number
|
59 | 58 |
|
@@ -985,286 +984,6 @@ def cohere(x, y, NFFT=256, Fs=2, detrend=detrend_none, window=window_hanning,
|
985 | 984 | return Cxy, f
|
986 | 985 |
|
987 | 986 |
|
988 |
| -def _csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',', |
989 |
| - converterd=None, names=None, missing='', missingd=None, |
990 |
| - use_mrecords=False, dayfirst=False, yearfirst=False): |
991 |
| - """ |
992 |
| - Load data from comma/space/tab delimited file in *fname* into a |
993 |
| - numpy record array and return the record array. |
994 |
| -
|
995 |
| - If *names* is *None*, a header row is required to automatically |
996 |
| - assign the recarray names. The headers will be lower cased, |
997 |
| - spaces will be converted to underscores, and illegal attribute |
998 |
| - name characters removed. If *names* is not *None*, it is a |
999 |
| - sequence of names to use for the column names. In this case, it |
1000 |
| - is assumed there is no header row. |
1001 |
| -
|
1002 |
| -
|
1003 |
| - - *fname*: can be a filename or a file handle. Support for gzipped |
1004 |
| - files is automatic, if the filename ends in '.gz' |
1005 |
| -
|
1006 |
| - - *comments*: the character used to indicate the start of a comment |
1007 |
| - in the file, or *None* to switch off the removal of comments |
1008 |
| -
|
1009 |
| - - *skiprows*: is the number of rows from the top to skip |
1010 |
| -
|
1011 |
| - - *checkrows*: is the number of rows to check to validate the column |
1012 |
| - data type. When set to zero all rows are validated. |
1013 |
| -
|
1014 |
| - - *converterd*: if not *None*, is a dictionary mapping column number or |
1015 |
| - munged column name to a converter function. |
1016 |
| -
|
1017 |
| - - *names*: if not None, is a list of header names. In this case, no |
1018 |
| - header will be read from the file |
1019 |
| -
|
1020 |
| - - *missingd* is a dictionary mapping munged column names to field values |
1021 |
| - which signify that the field does not contain actual data and should |
1022 |
| - be masked, e.g., '0000-00-00' or 'unused' |
1023 |
| -
|
1024 |
| - - *missing*: a string whose value signals a missing field regardless of |
1025 |
| - the column it appears in |
1026 |
| -
|
1027 |
| - - *use_mrecords*: if True, return an mrecords.fromrecords record array if |
1028 |
| - any of the data are missing |
1029 |
| -
|
1030 |
| - - *dayfirst*: default is False so that MM-DD-YY has precedence over |
1031 |
| - DD-MM-YY. See |
1032 |
| - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 |
1033 |
| - for further information. |
1034 |
| -
|
1035 |
| - - *yearfirst*: default is False so that MM-DD-YY has precedence over |
1036 |
| - YY-MM-DD. See |
1037 |
| - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 |
1038 |
| - for further information. |
1039 |
| -
|
1040 |
| - If no rows are found, *None* is returned |
1041 |
| - """ |
1042 |
| - |
1043 |
| - if converterd is None: |
1044 |
| - converterd = dict() |
1045 |
| - |
1046 |
| - if missingd is None: |
1047 |
| - missingd = {} |
1048 |
| - |
1049 |
| - import dateutil.parser |
1050 |
| - import datetime |
1051 |
| - |
1052 |
| - fh = cbook.to_filehandle(fname) |
1053 |
| - |
1054 |
| - delimiter = str(delimiter) |
1055 |
| - |
1056 |
| - class FH: |
1057 |
| - """ |
1058 |
| - For space-delimited files, we want different behavior than |
1059 |
| - comma or tab. Generally, we want multiple spaces to be |
1060 |
| - treated as a single separator, whereas with comma and tab we |
1061 |
| - want multiple commas to return multiple (empty) fields. The |
1062 |
| - join/strip trick below effects this. |
1063 |
| - """ |
1064 |
| - def __init__(self, fh): |
1065 |
| - self.fh = fh |
1066 |
| - |
1067 |
| - def close(self): |
1068 |
| - self.fh.close() |
1069 |
| - |
1070 |
| - def seek(self, arg): |
1071 |
| - self.fh.seek(arg) |
1072 |
| - |
1073 |
| - def fix(self, s): |
1074 |
| - return ' '.join(s.split()) |
1075 |
| - |
1076 |
| - def __next__(self): |
1077 |
| - return self.fix(next(self.fh)) |
1078 |
| - |
1079 |
| - def __iter__(self): |
1080 |
| - for line in self.fh: |
1081 |
| - yield self.fix(line) |
1082 |
| - |
1083 |
| - if delimiter == ' ': |
1084 |
| - fh = FH(fh) |
1085 |
| - |
1086 |
| - reader = csv.reader(fh, delimiter=delimiter) |
1087 |
| - |
1088 |
| - def process_skiprows(reader): |
1089 |
| - if skiprows: |
1090 |
| - for i, row in enumerate(reader): |
1091 |
| - if i >= (skiprows-1): |
1092 |
| - break |
1093 |
| - |
1094 |
| - return fh, reader |
1095 |
| - |
1096 |
| - process_skiprows(reader) |
1097 |
| - |
1098 |
| - def ismissing(name, val): |
1099 |
| - """Return whether the value val in column name should be masked.""" |
1100 |
| - return val == missing or val == missingd.get(name) or val == '' |
1101 |
| - |
1102 |
| - def with_default_value(func, default): |
1103 |
| - def newfunc(name, val): |
1104 |
| - if ismissing(name, val): |
1105 |
| - return default |
1106 |
| - else: |
1107 |
| - return func(val) |
1108 |
| - return newfunc |
1109 |
| - |
1110 |
| - def mybool(x): |
1111 |
| - if x == 'True': |
1112 |
| - return True |
1113 |
| - elif x == 'False': |
1114 |
| - return False |
1115 |
| - else: |
1116 |
| - raise ValueError('invalid bool') |
1117 |
| - |
1118 |
| - dateparser = dateutil.parser.parse |
1119 |
| - |
1120 |
| - def mydateparser(x): |
1121 |
| - # try and return a datetime object |
1122 |
| - d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) |
1123 |
| - return d |
1124 |
| - |
1125 |
| - mydateparser = with_default_value(mydateparser, datetime.datetime(1, 1, 1)) |
1126 |
| - |
1127 |
| - myfloat = with_default_value(float, np.nan) |
1128 |
| - myint = with_default_value(int, -1) |
1129 |
| - mystr = with_default_value(str, '') |
1130 |
| - mybool = with_default_value(mybool, None) |
1131 |
| - |
1132 |
| - def mydate(x): |
1133 |
| - # try and return a date object |
1134 |
| - d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) |
1135 |
| - |
1136 |
| - if d.hour > 0 or d.minute > 0 or d.second > 0: |
1137 |
| - raise ValueError('not a date') |
1138 |
| - return d.date() |
1139 |
| - mydate = with_default_value(mydate, datetime.date(1, 1, 1)) |
1140 |
| - |
1141 |
| - def get_func(name, item, func): |
1142 |
| - # promote functions in this order |
1143 |
| - funcs = [mybool, myint, myfloat, mydate, mydateparser, mystr] |
1144 |
| - for func in funcs[funcs.index(func):]: |
1145 |
| - try: |
1146 |
| - func(name, item) |
1147 |
| - except Exception: |
1148 |
| - continue |
1149 |
| - return func |
1150 |
| - raise ValueError('Could not find a working conversion function') |
1151 |
| - |
1152 |
| - # map column names that clash with builtins -- TODO - extend this list |
1153 |
| - itemd = { |
1154 |
| - 'return': 'return_', |
1155 |
| - 'file': 'file_', |
1156 |
| - 'print': 'print_', |
1157 |
| - } |
1158 |
| - |
1159 |
| - def get_converters(reader, comments): |
1160 |
| - |
1161 |
| - converters = None |
1162 |
| - i = 0 |
1163 |
| - for row in reader: |
1164 |
| - if (len(row) and comments is not None and |
1165 |
| - row[0].startswith(comments)): |
1166 |
| - continue |
1167 |
| - if i == 0: |
1168 |
| - converters = [mybool]*len(row) |
1169 |
| - if checkrows and i > checkrows: |
1170 |
| - break |
1171 |
| - i += 1 |
1172 |
| - |
1173 |
| - for j, (name, item) in enumerate(zip(names, row)): |
1174 |
| - func = converterd.get(j) |
1175 |
| - if func is None: |
1176 |
| - func = converterd.get(name) |
1177 |
| - if func is None: |
1178 |
| - func = converters[j] |
1179 |
| - if len(item.strip()): |
1180 |
| - func = get_func(name, item, func) |
1181 |
| - else: |
1182 |
| - # how should we handle custom converters and defaults? |
1183 |
| - func = with_default_value(func, None) |
1184 |
| - converters[j] = func |
1185 |
| - return converters |
1186 |
| - |
1187 |
| - # Get header and remove invalid characters |
1188 |
| - needheader = names is None |
1189 |
| - |
1190 |
| - if needheader: |
1191 |
| - for row in reader: |
1192 |
| - if (len(row) and comments is not None and |
1193 |
| - row[0].startswith(comments)): |
1194 |
| - continue |
1195 |
| - headers = row |
1196 |
| - break |
1197 |
| - |
1198 |
| - # remove these chars |
1199 |
| - delete = set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""") |
1200 |
| - delete.add('"') |
1201 |
| - |
1202 |
| - names = [] |
1203 |
| - seen = dict() |
1204 |
| - for i, item in enumerate(headers): |
1205 |
| - item = item.strip().lower().replace(' ', '_') |
1206 |
| - item = ''.join([c for c in item if c not in delete]) |
1207 |
| - if not len(item): |
1208 |
| - item = 'column%d' % i |
1209 |
| - |
1210 |
| - item = itemd.get(item, item) |
1211 |
| - cnt = seen.get(item, 0) |
1212 |
| - if cnt > 0: |
1213 |
| - names.append(item + '_%d' % cnt) |
1214 |
| - else: |
1215 |
| - names.append(item) |
1216 |
| - seen[item] = cnt+1 |
1217 |
| - |
1218 |
| - else: |
1219 |
| - if isinstance(names, str): |
1220 |
| - names = [n.strip() for n in names.split(',')] |
1221 |
| - |
1222 |
| - # get the converter functions by inspecting checkrows |
1223 |
| - converters = get_converters(reader, comments) |
1224 |
| - if converters is None: |
1225 |
| - raise ValueError('Could not find any valid data in CSV file') |
1226 |
| - |
1227 |
| - # reset the reader and start over |
1228 |
| - fh.seek(0) |
1229 |
| - reader = csv.reader(fh, delimiter=delimiter) |
1230 |
| - process_skiprows(reader) |
1231 |
| - |
1232 |
| - if needheader: |
1233 |
| - while True: |
1234 |
| - # skip past any comments and consume one line of column header |
1235 |
| - row = next(reader) |
1236 |
| - if (len(row) and comments is not None and |
1237 |
| - row[0].startswith(comments)): |
1238 |
| - continue |
1239 |
| - break |
1240 |
| - |
1241 |
| - # iterate over the remaining rows and convert the data to date |
1242 |
| - # objects, ints, or floats as appropriate |
1243 |
| - rows = [] |
1244 |
| - rowmasks = [] |
1245 |
| - for i, row in enumerate(reader): |
1246 |
| - if not len(row): |
1247 |
| - continue |
1248 |
| - if comments is not None and row[0].startswith(comments): |
1249 |
| - continue |
1250 |
| - # Ensure that the row returned always has the same nr of elements |
1251 |
| - row.extend([''] * (len(converters) - len(row))) |
1252 |
| - rows.append([func(name, val) |
1253 |
| - for func, name, val in zip(converters, names, row)]) |
1254 |
| - rowmasks.append([ismissing(name, val) |
1255 |
| - for name, val in zip(names, row)]) |
1256 |
| - fh.close() |
1257 |
| - |
1258 |
| - if not len(rows): |
1259 |
| - return None |
1260 |
| - |
1261 |
| - if use_mrecords and np.any(rowmasks): |
1262 |
| - r = np.ma.mrecords.fromrecords(rows, names=names, mask=rowmasks) |
1263 |
| - else: |
1264 |
| - r = np.rec.fromrecords(rows, names=names) |
1265 |
| - return r |
1266 |
| - |
1267 |
| - |
1268 | 987 | class GaussianKDE:
|
1269 | 988 | """
|
1270 | 989 | Representation of a kernel-density estimate using Gaussian kernels.
|
|
0 commit comments