8000 Merge pull request #6656 from I--P/loadtxt-int · numpy/numpy@d1dada1 · GitHub
[go: up one dir, main page]

Skip to content

Commit d1dada1

Browse files
committed
Merge pull request #6656 from I--P/loadtxt-int
ENH: usecols now accepts an int when only one column has to be read
2 parents ea97756 + 849b818 commit d1dada1

File tree

3 files changed

+73
-5
lines changed

3 files changed

+73
-5
lines changed

doc/release/1.12.0-notes.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ New Features
6161
Improvements
6262
============
6363

64+
*np.loadtxt* now supports a single integer as ``usecol`` argument
65+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
66+
Instead of using ``usecol=(n,)`` to read the nth column of a file
67+
it is now allowed to use ``usecol=n``. Also the error message is
68+
more user friendly when a non-integer is passed as a column index.
6469

6570

6671
Changes

numpy/lib/npyio.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import itertools
77
import warnings
88
import weakref
9-
from operator import itemgetter
9+
from operator import itemgetter, index as opindex
1010

1111
import numpy as np
1212
from . import format
@@ -714,10 +714,18 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
714714
``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None.
715715
skiprows : int, optional
716716
Skip the first `skiprows` lines; default: 0.
717-
usecols : sequence, optional
718-
Which columns to read, with 0 being the first. For example,
719-
``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
717+
718+
usecols : int or sequence, optional
719+
Which columns to read, with 0 being the first. For example,
720+
usecols = (1,4,5) will extract the 2nd, 5th and 6th columns.
720721
The default, None, results in all columns being read.
722+
723+
.. versionadded:: 1.11.0
724+
725+
Also when a single column has to be read it is possible to use
726+
an integer instead of a tuple. E.g ``usecols = 3`` reads the
727+
third column the same way as `usecols = (3,)`` would.
728+
721729
unpack : bool, optional
722730
If True, the returned array is transposed, so that arguments may be
723731
unpacked using ``x, y, z = loadtxt(...)``. When used with a structured
@@ -786,8 +794,25 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
786794
user_converters = converters
787795
if delimiter is not None:
788796
delimiter = asbytes(delimiter)
797+
789798
if usecols is not None:
790-
usecols = list(usecols)
799+
# Allow usecols to be a single int or a sequence of ints
800+
try:
801+
usecols_as_list = list(usecols)
802+
except TypeError:
803+
usecols_as_list = [usecols]
804+
for col_idx in usecols_as_list:
805+
try:
806+
opindex(col_idx)
807+
except TypeError as e:
808+
e.args = (
809+
"usecols must be an int or a sequence of ints but "
810+
"it contains at least one element of type %s" %
811+
type(col_idx),
812+
)
813+
raise
814+
# Fall back to existing code
815+
usecols = usecols_as_list
791816

792817
fown = False
793818
try:

numpy/lib/tests/test_io.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,29 @@ def test_usecols(self):
608608
x = np.loadtxt(c, dtype=float, usecols=np.array([1, 2]))
609609
assert_array_equal(x, a[:, 1:])
610610

611+
# Testing with an integer instead of a sequence
612+
for int_type in [int, np.int8, np.int16,
613+
np.int32, np.int64, np.uint8, np.uint16,
614+
np.uint32, np.uint64]:
615+
to_read = int_type(1)
616+
c.seek(0)
617+
x = np.loadtxt(c, dtype=float, usecols=to_read)
618+
assert_array_equal(x, a[:, 1])
619+
620+
# Testing with some crazy custom integer type
621+
class CrazyInt(object):
622+
def __index__(self):
623+
return 1
624+
625+
crazy_int = CrazyInt()
626+
c.seek(0)
627+
x = np.loadtxt(c, dtype=float, usecols=crazy_int)
628+
assert_array_equal(x, a[:, 1])
629+
630+
c.seek(0)
631+
x = np.loadtxt(c, dtype=float, usecols=(crazy_int,))
632+
assert_array_equal(x, a[:, 1])
633+
611634
# Checking with dtypes defined converters.
612635
data = '''JOE 70.1 25.3
613636
BOB 60.5 27.9
@@ -619,6 +642,21 @@ def test_usecols(self):
619642
assert_equal(arr['stid'], [b"JOE", b"BOB"])
620643
assert_equal(arr['temp'], [25.3, 27.9])
621644

645+
# Testing non-ints in usecols
646+
c.seek(0)
647+
bogus_idx = 1.5
648+
assert_raises_regex(
649+
TypeError,
650+
'^usecols must be.*%s' % type(bogus_idx),
651+
np.loadtxt, c, usecols=bogus_idx
652+
)
653+
654+
assert_raises_regex(
655+
TypeError,
656+
'^usecols must be.*%s' % type(bogus_idx),
657+
np.loadtxt, c, usecols=[0, bogus_idx, 0]
658+
)
659+
622660
def test_fancy_dtype(self):
623661
c = TextIO()
624662
c.write('1,2,3.0\n4,5,6.0\n')

0 commit comments

Comments
 (0)
0