API: Make Series.searchsorted return a scalar, when supplied a scalar

pandas-dev · jreback · Dec 21, 2018 · Nov 20, 2018 · Nov 20, 2018 · Dec 9, 2018
commit bf8ec16c5128b3c4443cd010ca9af43924b3ead1
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1096,6 +1096,7 @@ Other API Changes
   has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
 - Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
 - :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
+- :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`).
 - :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`).
 - :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
 - :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1338,8 +1338,8 @@ def factorize(self, sort=False, na_sentinel=-1):
 
         Parameters
         ----------
-        value : array_like
-            Values to insert into `self`.
+        value : scalar or array_like
+            Value(s) to insert into `self`.
         side : {'left', 'right'}, optional
             If 'left', the index of the first suitable location found is given.
             If 'right', return the last such index.  If there is no suitable
@@ -1350,8 +1350,14 @@ def factorize(self, sort=False, na_sentinel=-1):
 
         Returns
         -------
-        indices : array of ints
-            Array of insertion points with the same shape as `value`.
+        int or array of ints
+            A scalar or array of insertion points with the
+            same shape as `value`.
+
+            .. versionchanged :: 0.24.0
+                Ìf `value`is a scalar, an int is now always returned.
+                Previously, scalar inputs returned an 1-item array for
+                :class:`Series` and :class:`Categorical`.
 
         See Also
         --------
@@ -1372,7 +1378,7 @@ def factorize(self, sort=False, na_sentinel=-1):
         dtype: int64
 
         >>> x.searchsorted(4)
-        array([3])
+        3
 
         >>> x.searchsorted([0, 4])
         array([0, 3])
@@ -1389,7 +1395,7 @@ def factorize(self, sort=False, na_sentinel=-1):
         Categories (4, object): [apple < bread < cheese < milk]
 
         >>> x.searchsorted('bread')
-        array([1])     # Note: an array, not a scalar
+        1
 
         >>> x.searchsorted(['bread'], side='right')
         array([3])

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2215,8 +2215,10 @@ def __rmatmul__(self, other):
     def searchsorted(self, value, side='left', sorter=None):
         if sorter is not None:
             sorter = ensure_platform_int(sorter)
-        return self._values.searchsorted(Series(value)._values,
-                                         side=side, sorter=sorter)
+        result = self._values.searchsorted(Series(value)._values,
+                                           side=side, sorter=sorter)
+
+        return result[0] if is_scalar(value) else result
 
     # -------------------------------------------------------------------
     # Combination

diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py
@@ -86,9 +86,11 @@ def test_searchsorted(self):
         # Searching for single item argument, side='left' (default)
         res_cat = c1.searchsorted('apple')
         assert res_cat == 2
+        assert tm.is_scalar(res_cat)
 
         res_ser = s1.searchsorted('apple')
         assert res_ser == 2
+        assert tm.is_scalar(res_ser)
 
         # Searching for single item array, side='left' (default)
         res_cat = c1.searchsorted(['bread'])

diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
@@ -1364,17 +1364,19 @@ def test_numpy_repeat(self):
     def test_searchsorted(self):
         s = Series([1, 2, 3])
 
-        idx = s.searchsorted(1, side='left')
-        tm.assert_numpy_array_equal(idx, np.array([0], dtype=np.intp))
+        result = s.searchsorted(1, side='left')
+        assert tm.is_scalar(result)
+        assert result == 0
 
-        idx = s.searchsorted(1, side='right')
-        tm.assert_numpy_array_equal(idx, np.array([1], dtype=np.intp))
+        result = s.searchsorted(1, side='right')
+        assert tm.is_scalar(result)
+        assert result == 1
 
     def test_searchsorted_numeric_dtypes_scalar(self):
         s = Series([1, 2, 90, 1000, 3e9])
         r = s.searchsorted(30)
-        e = 2
-        assert r == e
+        assert tm.is_scalar(r)
+        assert r == 2
 
         r = s.searchsorted([30])
         e = np.array([2], dtype=np.intp)
@@ -1390,8 +1392,8 @@ def test_search_sorted_datetime64_scalar(self):
         s = Series(pd.date_range('20120101', periods=10, freq='2D'))
         v = pd.Timestamp('20120102')
         r = s.searchsorted(v)
-        e = 1
-        assert r == e
+        assert tm.is_scalar(r)
+        assert r == 1
 
     def test_search_sorted_datetime64_list(self):
         s = Series(pd.date_range('20120101', periods=10, freq='2D'))

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -27,7 +27,8 @@
     is_bool, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
     is_datetimelike_v_numeric, is_datetimelike_v_object,
     is_extension_array_dtype, is_interval_dtype, is_list_like, is_number,
-    is_period_dtype, is_sequence, is_timedelta64_dtype, needs_i8_conversion)
+    is_period_dtype, is_scalar, is_sequence, is_timedelta64_dtype,
+    needs_i8_conversion)  # noqa
 from pandas.core.dtypes.missing import array_equivalent
 
 import pandas as pd