diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index aa39e25a17da..7e95973286d2 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -188,7 +188,8 @@ def autostrip(self, method): """ return lambda input: [_.strip() for _ in method(input)] # - def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True): + def __init__(self, delimiter=None, quoter=None, comments=asbytes('#'), + autostrip=True): self.comments = comments # Delimiter is a character if isinstance(delimiter, unicode): @@ -207,6 +208,9 @@ def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True): else: (_handyman, delimiter) = (self._delimited_splitter, None) self.delimiter = delimiter + if isinstance(quoter, unicode): + quoter = quoter.encode('ascii') + self.quoter = quoter if autostrip: self._handyman = self.autostrip(_handyman) else: @@ -218,7 +222,35 @@ def _delimited_splitter(self, line): line = line.strip(asbytes(" \r\n")) if not line: return [] - return line.split(self.delimiter) + if self.quoter is None: + return line.split(self.delimiter) + else: + out = [] + word = asbytes('') + in_quote = False + is_escaped = False + + # py3 bytes compat + chars = [line[i:i+1] for i in range(len(line))] + + for char in chars: + if is_escaped: + word += char + is_escaped = False + elif char == asbytes('\\'): + is_escaped = True + elif char == self.quoter: + in_quote = not in_quote + elif in_quote: + word += char + elif char == self.delimiter: + out.append(word) + word = asbytes('') + else: + word += char + if word: + out.append(word) + return out # def _fixedwidth_splitter(self, line): if self.comments is not None: diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 98b4b6e35433..f4181ada18ba 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1174,7 +1174,7 @@ def fromregex(file, regexp, dtype): #####-------------------------------------------------------------------------- -def genfromtxt(fname, dtype=float, comments='#', delimiter=None, +def genfromtxt(fname, dtype=float, comments='#', delimiter=None, quoter=None, skiprows=0, skip_header=0, skip_footer=0, converters=None, missing='', missing_values=None, filling_values=None, usecols=None, names=None, @@ -1207,6 +1207,9 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skip_rows : int, optional `skip_rows` was deprecated in numpy 1.5, and will be removed in numpy 2.0. Please use `skip_header` instead. + quoter str, optional + The string used as a quoting character. By default it's assumed that + the values are not quoted. skip_header : int, optional The number of lines to skip at the beginning of the file. skip_footer : int, optional @@ -1334,6 +1337,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, comments = asbytes(comments) if isinstance(delimiter, unicode): delimiter = asbytes(delimiter) + if isinstance(quoter, unicode): + quoter = asbytes(quoter) if isinstance(missing, unicode): missing = asbytes(missing) if isinstance(missing_values, (unicode, list, tuple)): @@ -1365,7 +1370,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, "fname must be a string, filehandle, or generator. " "(got %s instead)" % type(fname)) - split_line = LineSplitter(delimiter=delimiter, comments=comments, + split_line = LineSplitter(delimiter=delimiter, quoter=quoter, + comments=comments, autostrip=autostrip)._handyman validate_names = NameValidator(excludelist=excludelist, deletechars=deletechars, diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index d0f81bde33a3..f09da8a152d8 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -986,6 +986,34 @@ def test_commented_header(self): test = np.genfromtxt(data, names=True, dtype=None) assert_equal(test, ctrl) + def check_quoter(self, quoter): + data = [["a, b c d", "e f", "g" + quoter + ' x'], + ["h, i jk", "lm no, p q", "r"]] + + ctrl = np.array([[asbytes(el) for el in row] + for row in data], + dtype='|S10') + + tio = TextIO() + for row in data: + quoted = [] + for el in row: + text = el.replace(quoter, '\\' + quoter) + quoted.append(quoter + text + quoter) + line = ','.join(quoted) + tio.write(line) + tio.write('\n') + tio.seek(0) + + test = np.genfromtxt(tio, quoter=quoter, delimiter=",", + dtype='|S10') + + assert_equal(test, ctrl) + + def test_quote(self): + self.check_quoter('"') + self.check_quoter("'") + def test_autonames_and_usecols(self): "Tests names and usecols" data = TextIO('A B C D\n aaaa 121 45 9.1')