numpy · ddasilva · Apr 6, 2014 · juliantaylor · Apr 6, 2014 · juliantaylor
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
@@ -188,7 +188,8 @@ def autostrip(self, method):
         """
         return lambda input: [_.strip() for _ in method(input)]
     #
-    def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True):
+    def __init__(self, delimiter=None, quoter=None, comments=asbytes('#'),
+                 autostrip=True):
         self.comments = comments
         # Delimiter is a character
         if isinstance(delimiter, unicode):
@@ -207,6 +208,9 @@ def __init__(self, delimiter=None, comments=asbytes('#'), autostrip=True):
         else:
             (_handyman, delimiter) = (self._delimited_splitter, None)
         self.delimiter = delimiter
+        if isinstance(quoter, unicode):
+            quoter = quoter.encode('ascii')
+        self.quoter = quoter
         if autostrip:
             self._handyman = self.autostrip(_handyman)
         else:
@@ -218,7 +222,35 @@ def _delimited_splitter(self, line):
         line = line.strip(asbytes(" \r\n"))
         if not line:
             return []
-        return line.split(self.delimiter)
+        if self.quoter is None:
+            return line.split(self.delimiter)
+        else:
+            out = []
+            word = asbytes('')
+            in_quote = False
+            is_escaped = False
+
+            # py3 bytes compat
+            chars = [line[i:i+1] for i in range(len(line))]
+
+            for char in chars:
+                if is_escaped:
+                    word += char
+                    is_escaped = False
+                elif char == asbytes('\\'):
+                    is_escaped = True
+                elif char == self.quoter:
+                    in_quote = not in_quote
+                elif in_quote:
+                    word += char
+                elif char == self.delimiter:
+                    out.append(word)
+                    word = asbytes('')
+                else:
+                    word += char
+            if word:
+                out.append(word)
+            return out
     #
     def _fixedwidth_splitter(self, line):
         if self.comments is not None:

diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
@@ -1174,7 +1174,7 @@ def fromregex(file, regexp, dtype):
 #####--------------------------------------------------------------------------
 
 
-def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
+def genfromtxt(fname, dtype=float, comments='#', delimiter=None, quoter=None,
                skiprows=0, skip_header=0, skip_footer=0, converters=None,
                missing='', missing_values=None, filling_values=None,
                usecols=None, names=None,
@@ -1207,6 +1207,9 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
     skip_rows : int, optional
         `skip_rows` was deprecated in numpy 1.5, and will be removed in
         numpy 2.0. Please use `skip_header` instead.
+    quoter  str, optional
+        The string used as a quoting character. By default it's assumed that
+        the values are not quoted.
     skip_header : int, optional
         The number of lines to skip at the beginning of the file.
     skip_footer : int, optional
@@ -1334,6 +1337,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         comments = asbytes(comments)
     if isinstance(delimiter, unicode):
         delimiter = asbytes(delimiter)
+    if isinstance(quoter, unicode):
+        quoter = asbytes(quoter)
     if isinstance(missing, unicode):
         missing = asbytes(missing)
     if isinstance(missing_values, (unicode, list, tuple)):
@@ -1365,7 +1370,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
             "fname must be a string, filehandle, or generator. "
             "(got %s instead)" % type(fname))
 
-    split_line = LineSplitter(delimiter=delimiter, comments=comments,
+    split_line = LineSplitter(delimiter=delimiter, quoter=quoter,
+                              comments=comments,
                               autostrip=autostrip)._handyman
     validate_names = NameValidator(excludelist=excludelist,
                                    deletechars=deletechars,

diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
@@ -986,6 +986,34 @@ def test_commented_header(self):
         test = np.genfromtxt(data, names=True, dtype=None)
         assert_equal(test, ctrl)
 
+    def check_quoter(self, quoter):
+        data = [["a, b c d", "e f", "g" + quoter + ' x'],
+                ["h, i jk", "lm no, p q", "r"]]
+
+        ctrl = np.array([[asbytes(el) for el in row]
+                         for row in data],
+                        dtype='|S10')
+
+        tio = TextIO()
+        for row in data:
+            quoted = []
+            for el in row:
+                text = el.replace(quoter, '\\' + quoter)
+                quoted.append(quoter + text + quot
6685
er)
+            line = ','.join(quoted)
+            tio.write(line)
+            tio.write('\n')
+        tio.seek(0)
+
+        test = np.genfromtxt(tio, quoter=quoter, delimiter=",",
+                             dtype='|S10')
+
+        assert_equal(test, ctrl)
+
+    def test_quote(self):
+        self.check_quoter('"')
+        self.check_quoter("'")
+
     def test_autonames_and_usecols(self):
         "Tests names and usecols"
         data = TextIO('A B C D\n aaaa 121 45 9.1')