@@ -19,7 +19,7 @@ other faster and simpler functions like :func:`~numpy.loadtxt` cannot.
19
19
When giving examples, we will use the following conventions::
20
20
21
21
>>> import numpy as np
22
- >>> from StringIO import StringIO
22
+ >>> from io import BytesIO
23
23
24
24
25
25
@@ -59,7 +59,7 @@ example, comma-separated files (CSV) use a comma (``,``) or a semicolon
59
59
(``; ``) as delimiter::
60
60
61
61
>>> data = "1, 2, 3\n4, 5, 6"
62
- >>> np.genfromtxt(StringIO (data), delimiter=",")
62
+ >>> np.genfromtxt(BytesIO (data), delimiter=",")
63
63
array([[ 1., 2., 3.],
64
64
[ 4., 5., 6.]])
65
65
@@ -75,12 +75,12 @@ defined as a given number of characters. In that case, we need to set
75
75
size) or to a sequence of integers (if columns can have different sizes)::
76
76
77
77
>>> data = " 1 2 3\n 4 5 67\n890123 4"
78
- >>> np.genfromtxt(StringIO (data), delimiter=3)
78
+ >>> np.genfromtxt(BytesIO (data), delimiter=3)
79
79
array([[ 1., 2., 3.],
80
80
[ 4., 5., 67.],
81
81
[ 890., 123., 4.]])
82
82
>>> data = "123456789\n 4 7 9\n 4567 9"
83
- >>> np.genfromtxt(StringIO (data), delimiter=(4, 3, 2))
83
+ >>> np.genfromtxt(BytesIO (data), delimiter=(4, 3, 2))
84
84
array([[ 1234., 567., 89.],
85
85
[ 4., 7., 9.],
86
86
[ 4., 567., 9.]])
@@ -96,12 +96,12 @@ This behavior can be overwritten by setting the optional argument
96
96
97
97
>>> data = "1, abc , 2\n 3, xxx, 4"
98
98
>>> # Without autostrip
99
- >>> np.genfromtxt(StringIO (data), delimiter=",", dtype="|S5")
99
+ >>> np.genfromtxt(BytesIO (data), delimiter=",", dtype="|S5")
100
100
array([['1', ' abc ', ' 2'],
101
101
['3', ' xxx', ' 4']],
102
102
dtype='|S5')
103
103
>>> # With autostrip
104
- >>> np.genfromtxt(StringIO (data), delimiter=",", dtype="|S5", autostrip=True)
104
+ >>> np.genfromtxt(BytesIO (data), delimiter=",", dtype="|S5", autostrip=True)
105
105
array([['1', 'abc', '2'],
106
106
['3', 'xxx', '4']],
107
107
dtype='|S5')
@@ -126,7 +126,7 @@ marker(s) is simply ignored::
126
126
... # And here comes the last line
127
127
... 9, 0
128
128
... """
129
- >>> np.genfromtxt(StringIO (data), comments="#", delimiter=",")
129
+ >>> np.genfromtxt(BytesIO (data), comments="#", delimiter=",")
130
130
[[ 1. 2.]
131
131
[ 3. 4.]
132
132
[ 5. 6.]
@@ -154,9 +154,9 @@ performed. Similarly, we can skip the last ``n`` lines of the file by
154
154
using the :keyword: `skip_footer ` attribute and giving it a value of ``n ``::
155
155
156
156
>>> data = "\n".join(str(i) for i in range(10))
157
- >>> np.genfromtxt(StringIO (data),)
157
+ >>> np.genfromtxt(BytesIO (data),)
158
158
array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
159
- >>> np.genfromtxt(StringIO (data),
159
+ >>> np.genfromtxt(BytesIO (data),
160
160
... skip_header=3, skip_footer=5)
161
161
array([ 3., 4.])
162
162
@@ -178,7 +178,7 @@ For example, if we want to import only the first and the last columns, we
178
178
can use ``usecols=(0, -1) ``::
179
179
180
180
>>> data = "1 2 3\n4 5 6"
181
- >>> np.genfromtxt(StringIO (data), usecols=(0, -1))
181
+ >>> np.genfromtxt(BytesIO (data), usecols=(0, -1))
182
182
array([[ 1., 3.],
183
183
[ 4., 6.]])
184
184
@@ -187,11 +187,11 @@ giving their name to the :keyword:`usecols` argument, either as a sequence
187
187
of strings or a comma-separated string::
188
188
189
189
>>> data = "1 2 3\n4 5 6"
190
- >>> np.genfromtxt(StringIO (data),
190
+ >>> np.genfromtxt(BytesIO (data),
191
191
... names="a, b, c", usecols=("a", "c"))
192
192
array([(1.0, 3.0), (4.0, 6.0)],
193
193
dtype=[('a', '<f8'), ('c', '<f8')])
194
- >>> np.genfromtxt(StringIO (data),
194
+ >>> np.genfromtxt(BytesIO (data),
195
195
... names="a, b, c", usecols=("a, c"))
196
196
array([(1.0, 3.0), (4.0, 6.0)],
197
197
dtype=[('a', '<f8'), ('c', '<f8')])
@@ -249,15 +249,15 @@ A natural approach when dealing with tabular data is to allocate a name to
249
249
each column. A first possibility is to use an explicit structured dtype,
250
250
as mentioned previously::
251
251
252
- >>> data = StringIO ("1 2 3\n 4 5 6")
252
+ >>> data = BytesIO ("1 2 3\n 4 5 6")
253
253
>>> np.genfr
6D47
omtxt(data, dtype=[(_, int) for _ in "abc"])
254
254
array([(1, 2, 3), (4, 5, 6)],
255
255
dtype=[('a', '<i8'), ('b', '<i8'), ('c', '<i8')])
256
256
257
257
Another simpler possibility is to use the :keyword: `names ` keyword with a
258
258
sequence of strings or a comma-separated string::
259
259
260
- >>> data = StringIO ("1 2 3\n 4 5 6")
260
+ >>> data = BytesIO ("1 2 3\n 4 5 6")
261
261
>>> np.genfromtxt(data, names="A, B, C")
262
262
array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)],
263
263
dtype=[('A', '<f8'), ('B', '<f8'), ('C', '<f8')])
@@ -271,7 +271,7 @@ that case, we must use the :keyword:`names` keyword with a value of
271
271
``True ``. The names will then be read from the first line (after the
272
272
``skip_header `` ones), even if the line is commented out::
273
273
274
- >>> data = StringIO ("So it goes\n#a b c\n1 2 3\n 4 5 6")
274
+ >>> data = BytesIO ("So it goes\n#a b c\n1 2 3\n 4 5 6")
275
275
>>> np.genfromtxt(data, skip_header=1, names=True)
276
276
array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)],
277
277
dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
@@ -280,7 +280,7 @@ The default value of :keyword:`names` is ``None``. If we give any other
280
280
value to the keyword, the new names will overwrite the field names we may
281
281
have defined with the dtype::
282
282
283
- >>> data = StringIO ("1 2 3\n 4 5 6")
283
+ >>> data = BytesIO ("1 2 3\n 4 5 6")
284
284
>>> ndtype=[('a',int), ('b', float), ('c', int)]
285
285
>>> names = ["A", "B", "C"]
286
286
>>> np.genfromtxt(data, names=names, dtype=ndtype)
@@ -295,23 +295,23 @@ If ``names=None`` but a structured dtype is expected, names are defined
295
295
with the standard NumPy default of ``"f%i" ``, yielding names like ``f0 ``,
296
296
``f1 `` and so forth::
297
297
298
- >>> data = StringIO ("1 2 3\n 4 5 6")
298
+ >>> data = BytesIO ("1 2 3\n 4 5 6")
299
299
>>> np.genfromtxt(data, dtype=(int, float, int))
300
300
array([(1, 2.0, 3), (4, 5.0, 6)],
301
301
dtype=[('f0', '<i8'), ('f1', '<f8'), ('f2', '<i8')])
302
302
303
303
In the same way, if we don't give enough names to match the length of the
304
304
dtype, the missing names will be defined with this default template::
305
305
306
- >>> data = StringIO ("1 2 3\n 4 5 6")
306
+ >>> data = BytesIO ("1 2 3\n 4 5 6")
307
307
>>> np.genfromtxt(data, dtype=(int, float, int), names="a")
308
308
array([(1, 2.0, 3), (4, 5.0, 6)],
309
309
dtype=[('a', '<i8'), ('f0', '<f8'), ('f1', '<i8')])
310
310
311
311
We can overwrite this default with the :keyword: `defaultfmt ` argument, that
312
312
takes any format string::
313
313
314
- >>> data = StringIO ("1 2 3\n 4 5 6")
314
+ >>> data = BytesIO ("1 2 3\n 4 5 6")
315
315
>>> np.genfromtxt(data, dtype=(int, float, int), defaultfmt="var_%02i")
316
316
array([(1, 2.0, 3), (4, 5.0, 6)],
317
317
dtype=[('var_00', '<i8'), ('var_01', '<f8'), ('var_02', '<i8')])
@@ -377,7 +377,7 @@ representing a percentage to a float between 0 and 1::
377
377
>>> data = "1, 2.3%, 45.\n6, 78.9%, 0"
378
378
>>> names = ("i", "p", "n")
379
379
>>> # General case .....
380
- >>> np.genfromtxt(StringIO (data), delimiter=",", names=names)
380
+ >>> np.genfromtxt(BytesIO (data), delimiter=",", names=names)
381
381
array([(1.0, nan, 45.0), (6.0, nan, 0.0)],
382
382
dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])
383
383
@@ -387,7 +387,7 @@ and ``' 78.9%'`` cannot be converted to float and we end up having
387
387
``np.nan `` instead. Let's now use a converter::
388
388
389
389
>>> # Converted case ...
390
- >>> np.genfromtxt(StringIO (data), delimiter=",", names=names,
390
+ >>> np.genfromtxt(BytesIO (data), delimiter=",", names=names,
391
391
... converters={1: convertfunc})
392
392
array([(1.0, 0.023, 45.0), (6.0, 0.78900000000000003, 0.0)],
393
393
dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])
@@ -396,7 +396,7 @@ The same results can be obtained by using the name of the second column
396
396
(``"p" ``) as key instead of its index (1)::
397
397
398
398
>>> # Using a name for the converter ...
399
- >>> np.genfromtxt(StringIO (data), delimiter=",", names=names,
399
+ >>> np.genfromtxt(BytesIO (data), delimiter=",", names=names,
400
400
... converters={"p": convertfunc})
401
401
array([(1.0, 0.023, 45.0), (6.0, 0.78900000000000003, 0.0)],
402
402
dtype=[('i', '<f8'), ('p', '<f8'), ('n', '<f8')])
@@ -410,8 +410,8 @@ by default::
410
410
411
411
>>> data = "1, , 3\n 4, 5, 6"
412
412
>>> convert = lambda x: float(x.strip() or -999)
413
- >>> np.genfromtxt(StringIO (data), delimiter=",",
414
- ... converter ={1: convert})
413
+ >>> np.genfromtxt(BytesIO (data), delimiter=",",
414
+ ... converters ={1: convert})
415
415
array([[ 1., -999., 3.],
416
416
[ 4., 5., 6.]])
417
417
@@ -492,7 +492,7 @@ and second column, and to -999 if they occur in the last column::
492
492
... names="a,b,c",
493
493
... missing_values={0:"N/A", 'b':" ", 2:"???"},
494
494
... filling_values={0:0, 'b':0, 2:-999})
495
- >>> np.genfromtxt(StringIO.StringIO (data), **kwargs)
495
+ >>> np.genfromtxt(BytesIO (data), **kwargs)
496
496
array([(0, 2, 3), (4, 0, -999)],
497
497
dtype=[('a', '<i8'), ('b', '<i8'), ('c', '<i8')])
498
498
0 commit comments