pandas-dev · changhiskhan · Nov 13, 2011 · Nov 13, 2011 · Nov 13, 2011 · Nov 13, 2011
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1491,11 +1491,7 @@ def sort(self, column=None, axis=0, ascending=True):
         -------
         sorted : DataFrame
         """
-        by = None
-        if column:
-            assert(axis == 0)
-            by = self[column].values
-        return self.sort_index(by=by, axis=axis, ascending=ascending)
+        return self.sort_index(by=column, axis=axis, ascending=ascending)
 
     def sort_index(self, axis=0, by=None, ascending=True):
         """
@@ -1507,7 +1503,7 @@ def sort_index(self, axis=0, by=None, ascending=True):
         axis : {0, 1}
             Sort index/rows versus columns
         by : object
-            Column name in frame
+            Column names in frame
         ascending : boolean, default True
             Sort ascending vs. descending
 
@@ -1516,17 +1512,20 @@ def sort_index(self, axis=0, by=None, ascending=True):
         sorted : DataFrame
         """
         labels = self._get_axis(axis)
-
+        order_list = None
         if by is not None:
-            try:
-                if by in self.columns:
-                    assert(axis == 0)
-                by = self[by].values
-            except Exception:
-                pass
-
-            assert(len(by) == len(labels))
-            sort_index = Series(by, index=labels).order().index
+            assert(axis == 0)
+            by = self[by]
+
+            if isinstance(by, Series):
+                assert(len(by) == len(labels))
+                by = by.values
+                sort_index = Series(by, index=labels).order().index
+            elif isinstance(by, DataFrame):
+                assert(len(by.index) == len(labels))
+                type_list = [(col_name, by[col_name].dtype) for col_name in by.columns]
+                sort_arr = np.array([tuple(r) for r in by.values], dtype=type_list)
+                sort_index = labels.take(sort_arr.argsort(order=by.columns.tolist()))
         else:
             sort_index = labels.take(labels.argsort())
 
@@ -2485,19 +2484,42 @@ def corr(self):
         sigma = np.sqrt(np.diag(baseCov))
         correl = baseCov / np.outer(sigma, sigma)
 
+        # Get the covariance with items that have NaN values
+        for i, j, ac, bc in self._cov_helper(mat):
+            c = np.corrcoef(ac, bc)[0, 1]
+            correl[i, j] = c
+            correl[j, i] = c
+
+        return self._constructor(correl, index=cols, columns=cols)
+
+    def cov(self):
+        """
+        Compute pairwise covariance of columns, excluding NA/null values
+
+        Returns
+        -------
+        y : DataFrame
+        """
+        cols = self.columns
+        mat = self.as_matrix(cols).T
+        baseCov = np.cov(mat)
+
+        for i, j, ac, bc in self._cov_helper(mat):
+            c = np.cov(ac, bc)[0, 1]
+            baseCov[i, j] = c
+            baseCov[j, i] = c
+
+        return self._constructor(baseCov, index=cols, columns=cols)
+
+    def _cov_helper(self, mat):
         # Get the covariance with items that have NaN values
         mask = np.isfinite(mat)
         for i, A in enumerate(mat):
             if not mask[i].all():
                 for j, B in enumerate(mat):
                     in_common = mask[i] & mask[j]
                     if in_common.any():
-                        ac, bc = A[in_common], B[in_common]
-                        c = np.corrcoef(ac, bc)[0, 1]
-                        correl[i, j] = c
-                        correl[j, i] = c
-
-        return self._constructor(correl, index=cols, columns=cols)
+                        yield i, j, A[in_common], B[in_common]        
 
     def corrwith(self, other, axis=0, drop=False):
         """

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -903,15 +903,40 @@ def corr(self, other):
         -------
         correlation : float
         """
+        this, that = self._get_nonna_aligned(other)
+        if this is None or that is None:
+            return nan
+        return np.corrcoef(this, that)[0, 1]
+
+    def cov(self, other):
+        """
+        Compute covariance with Series, excluding missing values
+
+        Parameters
+        ----------
+        other : Series
+
+        Returns
+        -------
+        covariance : float
+        """
+        this, that = self._get_nonna_aligned(other)
+        if this is None or that is None:
+            return nan
+        return np.cov(this, that)[0, 1]
+
+    def _get_nonna_aligned(self, other):
+        """
+        Returns two sub-Series with the same index and only non-na values
+        """
         commonIdx = self.dropna().index.intersection(other.dropna().index)
 
         if len(commonIdx) == 0:
-            return nan
+            return None, None
 
         this = self.reindex(commonIdx)
         that = other.reindex(commonIdx)
-
-        return np.corrcoef(this, that)[0, 1]
+        return this, that
 
     def diff(self, periods=1):
         """

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -1950,6 +1950,14 @@ def test_corr(self):
 
         assert_almost_equal(correls['A']['C'],
                             self.frame['A'].corr(self.frame['C']))
+
+    def test_cov(self):
+        self.frame['A'][:5] = nan
+        self.frame['B'][:10] = nan
+        cov = self.frame.cov()
+
+        assert_almost_equal(cov['A']['C'],
+                            self.frame['A'].cov(self.frame['C']))
 
     def test_corrwith(self):
         a = self.tsframe
@@ -2698,6 +2706,28 @@ def test_sort_index(self):
         expected = frame.ix[frame.index[indexer]]
         assert_frame_equal(sorted_df, expected)
 
+        # by multiple columns
+        frame.values[1, 0] = frame.values[0, 0]
+        smaller, larger = min(frame.values[:1, 1]), max(frame.values[:1, 1])
+        if smaller == larger:
+            larger = smaller + 1
+        frame.values[0, 1] = larger
+        frame.values[1, 1] = smaller
+
+        sorted_df = frame.sort_index(by=['A', 'B'])
+        indexer = frame['A'].argsort().values
+        zero_mask = indexer == 0
+        one_mask = indexer == 1
+        indexer[zero_mask] = 1
+        indexer[one_mask] = 0
+        expected = frame.ix[frame.index[indexer]]
+        assert_frame_equal(sorted_df, expected)
+
+        sorted_df = frame.sort_index(by=['A', 'B'], ascending=False)
+        indexer = indexer[::-1]
+        expected = frame.ix[frame.index[indexer]]
+        assert_frame_equal(sorted_df, expected)
+
         # check for now
         sorted_df = frame.sort(column='A')
         expected = frame.sort_index(by='A')

diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -833,6 +833,16 @@ def test_corr(self):
 
         # additional checks?
 
+    def test_cov(self):
+        # full overlap
+        self.assertAlmostEqual(self.ts.cov(self.ts), self.ts.std()**2)
+
+        # partial overlap
+        self.assertAlmostEqual(self.ts[:15].cov(self.ts[5:]), self.ts[5:15].std()**2)
+
+        # No overlap
+        self.assert_(np.isnan(self.ts[::2].cov(self.ts[1::2])))
+
     def test_copy(self):
         ts = self.ts.copy()