compression args passed as kwargs, update relevant docs

pandas-dev · WillAyd · Aug 26, 2019 · Apr 8, 2019 · Apr 8, 2019 · Apr 8, 2019
commit a1cb3f7917efedc52d5d98c4e162a66c9db8a06b
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -36,7 +36,7 @@ Other Enhancements
 - :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`)
 - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
 - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
-- :meth:`NDFrame.to_csv` now supports dicts as ``compression`` argument with key ``'method'`` being the compression method and optional key 
8000
``'arcname'`` specifying the archived CSV file name when the compression method is ``'zip'``. If key ``'arcname'`` unspecified or ``compression='zip'``, maintains previous behavior. (:issue:`26023`)
+- :meth:`NDFrame.to_csv` now supports dicts as ``compression`` argument with key ``'method'`` being the compression method and others as kwargs of ``ByteZipFile`` when the compression method is ``'zip'``. (:issue:`26023`)
 
 .. _whatsnew_0250.api_breaking:
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2984,7 +2984,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
             .. versionchanged:: 0.25.0
 
                May now be a dict with key 'method' as compression mode
-               and 'arcname' as CSV file name if mode is 'zip'
+               and other entries as ByteZipFile kwargs if compression mode
+               is 'zip'
 
         quoting : optional constant from csv module
             Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -9,7 +9,7 @@
 import lzma
 import mmap
 import os
-from typing import Dict, Union
+from typing import Dict, Tuple, Union
 from urllib.error import URLError  # noqa
 from urllib.parse import (  # noqa
     urlencode, urljoin, urlparse as parse_url, uses_netloc, uses_params,
@@ -269,40 +269,27 @@ def _get_compression_method(compression: Union[str, Dict, None]):
 
 def _infer_compression(filepath_or_buffer, compression):
     """
-    Get the compression method for filepath_or_buffer. If compression mode is
-    'infer', the inferred compression method is returned. Otherwise, the input
+    Get the compression method for filepath_or_buffer. If compression='infer',
+    the inferred compression method is returned. Otherwise, the input
     compression method is returned unchanged, unless it's invalid, in which
     case an error is raised.
-
     Parameters
     ----------
     filepath_or_buffer :
         a path (str) or buffer
-    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} or dict
-        If string, specifies compression mode. If dict, value at key 'method'
-        specifies compression mode. If compression mode is 'infer' and
-        `filepath_or_buffer` is path-like, then detect compression from the
-        following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
-        compression).
-
-        .. versionchanged 0.25.0
-
-        May now be a dict with required key 'method' specifying compression
-        mode
-
+    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
+        If 'infer' and `filepath_or_buffer` is path-like, then detect
+        compression from the following extensions: '.gz', '.bz2', '.zip',
+        or '.xz' (otherwise no compression).
     Returns
     -------
     string or None :
         compression method
-
     Raises
     ------
     ValueError on invalid compression specified
     """
 
-    # Handle compression as dict
-    compression, _ = _get_compression_method(compression)
-
     # No compression has been explicitly specified
     if compression is None:
         return None
@@ -357,7 +344,8 @@ def _get_handle(path_or_buf, mode, encoding=None,
         .. versionchanged:: 0.25.0
 
            May now be a dict with key 'method' as compression mode
-           and 'arcname' as CSV file name if mode is 'zip'
+           and other keys as kwargs for ByteZipFile if compression
+           mode is 'zip'.
 
     memory_map : boolean, default False
         See parsers._parser_params for more information.
@@ -374,7 +362,7 @@ def _get_handle(path_or_buf, mode, encoding=None,
     """
     try:
         from s3fs import S3File
-        need_text_wrapping = (BytesIO, S3File)
+        need_text_wrapping = (BytesIO, S3File)  # type: Tuple
     except ImportError:
         need_text_wrapping = (BytesIO,)
 
@@ -407,10 +395,7 @@ def _get_handle(path_or_buf, mode, encoding=None,
 
         # ZIP Compression
         elif compression == 'zip':
-            arcname = None
-            if 'arcname' in compression_args:
-                arcname = compression_args['arcname']
-            zf = BytesZipFile(path_or_buf, mode, arcname=arcname)
+            zf = BytesZipFile(path_or_buf, mode, **compression_args)
             # Ensure the container is closed as well.
             handles.append(zf)
             if zf.mode == 'w':

diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -122,7 +122,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
         self.data_index = obj.index
         if (isinstance(self.data_index, (ABCDatetimeIndex, ABCPeriodIndex)) and
                 date_format is not None):
-            from pandas import Index
+            from pandas import Index  # type: ignore
             self.data_index = Index([x.strftime(date_format) if notna(x) else
                                      '' for x in self.data_index])