2
2
Module contains tools for processing files into DataFrames or other objects
3
3
"""
4
4
from StringIO import StringIO
5
+ import re
5
6
import zipfile
6
7
7
8
import numpy as np
@@ -25,22 +26,23 @@ def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None,
25
26
except Exception : # pragma: no cover
26
27
f = open (filepath_or_buffer , 'r' )
27
28
28
- sniff_sep = True
29
- # default dialect
30
- dia = csv .excel
31
- if sep is not None :
32
- sniff_sep = False
33
- dia .delimiter = sep
34
- # attempt to sniff the delimiter
35
- if sniff_sep :
36
- line = f .readline ()
37
- sniffed = csv .Sniffer ().sniff (line )
38
- dia .delimiter = sniffed .delimiter
39
- buf = list (csv .reader (StringIO (line ), dialect = dia ))
29
+ buf = []
30
+ if sep is None or len (sep ) == 1 :
31
+ sniff_sep = True
32
+ # default dialect
33
+ dia = csv .excel
34
+ if sep is not None :
35
+ sniff_sep = False
36
+ dia .delimiter = sep
37
+ # attempt to sniff the delimiter
38
+ if sniff_sep :
39
+ line = f .readline ()
40
+ sniffed = csv .Sniffer ().sniff (line )
41
+ dia .delimiter = sniffed .delimiter
42
+ buf .extend (list (csv .reader (StringIO (line ), dialect = dia )))
43
+ reader = csv .reader (f , dialect = dia )
40
44
else :
41
- buf = []
42
-
43
- reader = csv .reader (f , dialect = dia )
45
+ reader = (re .split (sep , line .strip ()) for line in f )
44
46
45
47
if date_parser is not None :
46
48
parse_dates = True
@@ -73,7 +75,7 @@ def read_table(filepath_or_buffer, sep='\t', header=0, index_col=None,
73
75
nrows = nrows , iterator = iterator , chunksize = chunksize ,
74
76
skip_footer = skip_footer , converters = converters )
75
77
76
- def read_clipboard (** kwargs ): # pragma: no cover
78
+ def read_clipboard (sep = '\s+' , ** kwargs ): # pragma: no cover
77
79
"""
78
80
Read text from clipboard and pass to read_table. See read_table for the full
79
81
argument list
@@ -84,6 +86,7 @@ def read_clipboard(**kwargs): # pragma: no cover
84
86
"""
85
87
from pandas .util .clipboard import clipboard_get
86
88
text = clipboard_get ()
89
+ kwargs ['sep' ] = sep
87
90
return read_table (StringIO (text ), ** kwargs )
88
91
89
92
_parser_params = """Also supports optionally iterating or breaking of the file
0 commit comments