API: concat will now concatenate mixed Series and DataFrames using th…

…e Series name or numbering columns as needed (GH2385)
pandas-dev · jreback · Feb 21, 2014 · Feb 21, 2014 · Feb 21, 2014 · Feb 21, 2014
commit c6b21b4742fe716ea6aa02b458682b5e0ee340f7
diff --git a/doc/source/merging.rst b/doc/source/merging.rst
@@ -213,6 +213,33 @@ This is also a valid argument to ``DataFrame.append``:
 
    df1.append(df2, ignore_index=True)
 
+.. _merging.mixed_ndims:
+
+Concatenating with mixed ndims
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can concatenate a mix of Series and DataFrames. The
+Series will be transformed to DataFrames with the column name as
+the name of the Series.
+
+.. ipython:: python
+
+   df1 = DataFrame(randn(6, 4), columns=['A', 'B', 'C', 'D'])
+   s1 = Series(randn(6), name='foo')
+   concat([df1, s1],axis=1)
+
+If unnamed Series are passed they will be numbered consecutively.
+
+.. ipython:: python
+
+   s2 = Series(randn(6))
+   concat([df1, s2, s2, s2],axis=1)
+
+Passing ``ignore_index=True`` will drop all name references.
+
+.. ipython:: python
+
+   concat([df1, s1],axis=1,ignore_index=True)
 
 More concatenating with group keys
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -98,6 +98,8 @@ API Changes
 - The top-level :func:`pandas.eval` function does not allow you use the
   ``'@'`` prefix and provides you with an error message telling you so.
 - ``NameResolutionError`` was removed because it isn't necessary anymore.
+- ``concat`` will now concatenate mixed Series and DataFrames using the Series name
+  or numbering columns as needed (:issue:`2385`)
 
 Experimental Features
 ~~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
@@ -66,6 +66,8 @@ API changes
 - The top-level :func:`pandas.eval` function does not allow you use the
   ``'@'`` prefix and provides you with an error message telling you so.
 - ``NameResolutionError`` was removed because it isn't necessary anymore.
+- ``concat`` will now concatenate mixed Series and DataFrames using the Series name
+  or numbering columns as needed (:issue:`2385`). See :ref:`the docs <mergine.mixed_ndims>`
 
 MultiIndexing Using Slicers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py
@@ -970,40 +970,86 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
             objs = clean_objs
             keys = clean_keys
 
-        # consolidate data
-        self.objs = []
+        if len(objs) == 0:
+            raise Exception('All objects passed were None')
+
+        # consolidate data & figure out what our result ndim is going to be
+        ndims = set()
         for obj in objs:
             if not isinstance(obj, NDFrame):
                 raise TypeError("cannot concatenate a non-NDFrame object")
 
-            # skip completely empty
-            if not np.sum(obj.shape):
-                continue
-
            # consolidate
             obj.consolidate(inplace=True)
-            self.objs.append(obj)
+            ndims.add(obj.ndim)
+
+        # get the sample
+        # want the higest ndim that we have, and must be non-empty
+        # unless all objs are empty
+        sample = None
+        if len(ndims) > 1:
+            max_ndim = max(ndims)
+            for obj in objs:
+                if obj.ndim == max_ndim and np.sum(obj.shape):
+                    sample = obj
+                    break
 
-        if len(self.objs) == 0:
-            raise Exception('All objects passed were None')
-
-        # need the first as a sample non-empty as a sample
-        sample = next(obj for obj in self.objs if np.prod(obj.shape))
+        else:
+            # filter out the empties
+            # if we have not multi-index possibiltes
+            df = DataFrame([ obj.shape for obj in objs ]).sum(1)
+            non_empties = df[df!=0]
+            if len(non_empties) and (keys is None and names is None and levels is None and join_axes is None):
+                objs = [ objs[i] for i in non_empties.index ]
+                sample = objs[0]
+
+        if sample is None:
+            sample = objs[0]
+        self.objs = objs
 
         # Need to flip BlockManager axis in the DataFrame special case
-        if isinstance(sample, DataFrame):
+        self._is_frame = isinstance(sample, DataFrame)
+        if self._is_frame:
             axis = 1 if axis == 0 else 0
 
         self._is_series = isinstance(sample, ABCSeries)
         if not 0 <= axis <= sample.ndim:
             raise AssertionError("axis must be between 0 and {0}, "
                                  "input was {1}".format(sample.ndim, axis))
 
+        # if we have mixed ndims, then convert to highest ndim
+        # creating column numbers as needed
+        if len(ndims) > 1:
+            current_column = 0
+            max_ndim = sample.ndim
+            self.objs, objs = [], self.objs
+            for obj in objs:
+
+                ndim = obj.ndim
+                if ndim == max_ndim:
+                    pass
+    
+                elif ndim != max_ndim-1:
+                    raise ValueError("cannot concatenate unaligned mixed "
+                                     "dimensional NDFrame objects")
+
+                else:
+                    name = getattr(obj,'name',None)
+                    if ignore_index or name is None:
+                        name = current_column
+                        current_column += 1
+
+                    # doing a row-wise concatenation so need everything
+                    # to line up
+                    if self._is_frame and axis == 1:
+                        name = 0
+                    obj = sample._constructor({ name : obj })
+
+                self.objs.append(obj)
+
         # note: this is the BlockManager axis (since DataFrame is transposed)
         self.axis = axis
-
         self.join_axes = join_axes
-
         self.keys = keys
         self.names = names
         self.levels = levels

diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py
@@ -1657,11 +1657,73 @@ def test_handle_empty_objects(self):
         # GH3259
         df = DataFrame(dict(A = range(10000)),index=date_range('20130101',periods=10000,freq='s'))
         empty = DataFrame()
+        result = concat([df,empty],axis=1)
+        assert_frame_equal(result, df)
+        result = concat([empty,df],axis=1)
+        assert_frame_equal(result, df)
+
         result = concat([df,empty])
         assert_frame_equal(result, df)
         result = concat([empty,df])
         assert_frame_equal(result, df)
 
+    def test_concat_mixed_objs(self):
+
+        # concat mixed series/frames
+        # G2385
+
+        # axis 1
+        index=date_range('01-Jan-2013', periods=10, freq='H')
+        arr = np.arange(10, dtype='int64')
+        s1 = Series(arr, index=index)
+        s2 = Series(arr, index=index)
+        df = DataFrame(arr.reshape(-1,1), index=index)
+
+        expected = DataFrame(np.repeat(arr,2).reshape(-1,2), index=index, columns = [0, 0])
+        result = concat([df,df], axis=1)
+        assert_frame_equal(result, expected)
+
+        expected = DataFrame(np.repeat(arr,2).reshape(-1,2), index=index, columns = [0, 1])
+        result = concat([s1,s2], axis=1)
+        assert_frame_equal(result, expected)
+
+        expected = DataFrame(np.repeat(arr,3).reshape(-1,3), index=index, columns = [0, 1, 2])
+        result = concat([s1,s2,s1], axis=1)
+        assert_frame_equal(result, expected)
+
+        expected = DataFrame(np.repeat(arr,5).reshape(-1,5), index=index, columns = [0, 0, 1, 2, 3])
+        result = concat([s1,df,s2,s2,s1], axis=1)
+        assert_frame_equal(result, expected)
+
+        # with names
+        s1.name = 'foo'
+        expected = DataFrame(np.repeat(arr,3).reshape(-1,3), index=index, columns = ['foo', 0, 0])
+        result = concat([s1,df,s2], axis=1)
+        assert_frame_equal(result, expected)
+
+        s2.name = 'bar'
+        expected = DataFrame(np.repeat(arr,3).reshape(-1,3), index=index, columns = ['foo', 0, 'bar'])
+        result = concat([s1,df,s2], axis=1)
+        assert_frame_equal(result, expected)
+
+        # ignore index
+        expected = DataFrame(np.repeat(arr,3).reshape(-1,3), index=index, columns = [0, 1, 2])
+        result = concat([s1,df,s2], axis=1, ignore_index=True)
+        assert_frame_equal(result, expected)
+
+        # axis 0
+        expected = DataFrame(np.tile(arr,3).reshape(-1,1), index=index.tolist() * 3, columns = [0])
+        result = concat([s1,df,s2])
+        assert_frame_equal(result, expected)
+
+        expected = DataFrame(np.tile(arr,3).reshape(-1,1), columns = [0])
+        result = concat([s1,df,s2], ignore_index=True)
+        assert_frame_equal(result, expected)
+
+        # invalid concatente of mixed dims
+        panel = tm.makePanel()
+        self.assertRaises(ValueError, lambda : concat([panel,s1],axis=1))
+
     def test_panel_join(self):
         panel = tm.makePanel()
         tm.add_nans(panel)
@@ -1991,15 +2053,6 @@ def test_concat_invalid_first_argument(self):
         # generator ok though
         concat(DataFrame(np.random.rand(5,5)) for _ in range(3))
 
-    def test_concat_mixed_types_fails(self):
-        df = DataFrame(randn(10, 1))
-
-        with tm.assertRaisesRegexp(TypeError, "Cannot concatenate.+"):
-            concat([df[0], df], axis=1)
-
-        with tm.assertRaisesRegexp(TypeError, "Cannot concatenate.+"):
-            concat([df, df[0]], axis=1)
-
 class TestOrderedMerge(tm.TestCase):
 
     def setUp(self):

diff --git a/vb_suite/join_merge.py b/vb_suite/join_merge.py
@@ -195,9 +195,9 @@ def sample(values, k):
 empty = DataFrame()
 """
 
-concat_empty_frames1 = Benchmark('concat([df,empty)', setup,
+concat_empty_frames1 = Benchmark('concat([df,empty])', setup,
                                 start_date=datetime(2012, 1, 1))
-concat_empty_frames2 = Benchmark('concat([empty,df)', setup,
+concat_empty_frames2 = Benchmark('concat([empty,df])', setup,
                                 start_date=datetime(2012, 1, 1))