move searchsorted to algorithms.py

pandas-dev · jreback · Feb 24, 2019 · Jul 23, 2018 · Jul 25, 2018 · Jul 28, 2018
commit a9905fd69d79788ca0859e3aa002588c0c663403
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -19,7 +19,7 @@
     ensure_float64, ensure_int64, ensure_object, ensure_platform_int,
     ensure_uint64, is_array_like, is_bool_dtype, is_categorical_dtype,
     is_complex_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype,
-    is_datetimelike, is_extension_array_dtype, is_float_dtype,
+    is_datetimelike, is_extension_array_dtype, is_float_dtype, is_integer,
     is_integer_dtype, is_interval_dtype, is_list_like, is_numeric_dtype,
     is_object_dtype, is_period_dtype, is_scalar, is_signed_integer_dtype,
     is_sparse, is_timedelta64_dtype, is_unsigned_integer_dtype,
@@ -1724,6 +1724,88 @@ def func(arr, indexer, out, fill_value=np.nan):
     return out
 
 
+# ---- #
+# searchsorted #
+# ---- #
+
+def searchsorted(arr, value, side="left", sorter=None):
+    """
+    Find indices where elements should be inserted to maintain order.
+
+    .. versionadded:: 0.25.0
+
+    Find the indices into a sorted array `self` (a) such that, if the
+    corresponding elements in `value` were inserted before the indices,
+    the order of `self` would be preserved.
+
+    Assuming that `self` is sorted:
+
+    ======  ================================
+    `side`  returned index `i` satisfies
+    ======  ================================
+    left    ``self[i-1] < value <= self[i]``
+    right   ``self[i-1] <= value < self[i]``
+    ======  ================================
+
+    Parameters
+    ----------
+    arr: numpy.array or ExtensionArray
+        array to search in. Cannot be Index, Series or PandasArray, as that
+        would cause a RecursionError.
+    value : array_like
+        Values to insert into `arr`.
+    side : {'left', 'right'}, optional
+        If 'left', the index of the first suitable location found is given.
+        If 'right', return the last such index.  If there is no suitable
+        index, return either 0 or N (where N is the length of `self`).
+    sorter : 1-D array_like, optional
+        Optional array of integer indices that sort array a into ascending
+        order. They are typically the result of argsort.
+
+    Returns
+    -------
+    array of ints
+        Array of insertion points with the same shape as `value`.
+
+    See Also
+    --------
+    numpy.searchsorted : Similar method from NumPy.
+    """
+    if sorter is not None:
+        sorter = ensure_platform_int(sorter)
+
+    if is_integer_dtype(arr) and (
+            is_integer(value) or is_integer_dtype(value)):
+        from .arrays.array_ import array
+        # if `arr` and `value` have different dtypes, `arr` would be
+        # recast by numpy, causing a slow search.
+        # Before searching below, we therefore try to give `value` the
+        # same dtype as `arr`, while guarding against integer overflows.
+        iinfo = np.iinfo(arr.dtype.type)
+        value_arr = np.array([value]) if is_scalar(value) else np.array(value)
+        if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all():
+            # value within bounds, so no overflow, so can convert value dtype
+            # to dtype of arr
+            dtype = arr.dtype
+        else:
+            dtype = value_arr.dtype
+
+        if is_scalar(value):
+            value = dtype.type(value)
+        else:
+            value = array(value, dtype=dtype)
+    elif not (is_object_dtype(arr) or is_numeric_dtype(arr) or
+              is_categorical_dtype(arr)):
+        from pandas.core.series import Series
+        # E.g. if `arr` is an array with dtype='datetime64[ns]'
+        # and `value` is a pd.Timestamp, we may need to convert value
+        value_ser = Series(value)._values
+        value = value_ser[0] if is_scalar(value) else value_ser
+
+    result = arr.searchsorted(value, side=side, sorter=sorter)
+    return result
+
+
 # ---- #
 # diff #
 # ---- #

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -12,7 +12,8 @@
 from pandas.core.dtypes.inference import is_array_like, is_list_like
 
 from pandas import compat
-from pandas.core import common as com, nanops
+from pandas.core import nanops
+from pandas.core.algorithms import searchsorted
 from pandas.core.missing import backfill_1d, pad_1d
 
 from .base import ExtensionArray, ExtensionOpsMixin
@@ -426,8 +427,8 @@ def to_numpy(self, dtype=None, copy=False):
 
     @Appender(ExtensionArray.searchsorted.__doc__)
     def searchsorted(self, value, side='left', sorter=None):
-        return com.searchsorted(self.to_numpy(), value,
-                                side=side, sorter=sorter)
+        return searchsorted(self.to_numpy(), value,
+                            side=side, sorter=sorter)
 
     # ------------------------------------------------------------------------
     # Ops

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1525,8 +1525,8 @@ def factorize(self, sort=False, na_sentinel=-1):
     @Substitution(klass='Index')
     @Appender(_shared_docs['searchsorted'])
     def searchsorted(self, value, side='left', sorter=None):
-        return com.searchsorted(self._values, value,
-                                side=side, sorter=sorter)
+        return algorithms.searchsorted(self._values, value,
+                                       side=side, sorter=sorter)
 
     def drop_duplicates(self, keep='first', inplace=False):
         inplace = validate_bool_kwarg(inplace, 'inplace')

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -17,9 +17,7 @@
 
 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
 from pandas.core.dtypes.common import (
-    ensure_platform_int, is_array_like, is_bool_dtype, is_categorical_dtype,
-    is_extension_array_dtype, is_integer, is_integer_dtype, is_numeric_dtype,
-    is_object_dtype, is_scalar)
+    is_array_like, is_bool_dtype, is_extension_array_dtype, is_integer)
 from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
 from pandas.core.dtypes.inference import _iterable_not_string
 from pandas.core.dtypes.missing import isna, isnull, notnull  # noqa
@@ -485,81 +483,3 @@ def f(x):
         f = mapper
 
     return f
-
-
-def searchsorted(arr, value, side="left", sorter=None):
-    """
-    Find indices where elements should be inserted to maintain order.
-
-    .. versionadded:: 0.25.0
-
-    Find the indices into a sorted array `self` (a) such that, if the
-    corresponding elements in `value` were inserted before the indices,
-    the order of `self` would be preserved.
-
-    Assuming that `self` is sorted:
-
-    ======  ================================
-    `side`  returned index `i` satisfies
-    ======  ================================
-    left    ``self[i-1] < value <= self[i]``
-    right   ``self[i-1] <= value < self[i]``
-    ======  ================================
-
-    Parameters
-    ----------
-    arr: numpy.array or ExtensionArray
-        array to search in. Cannot be Index, Series or PandasArray, as that
-        would cause a RecursionError.
-    value : array_like
-        Values to insert into `arr`.
-    side : {'left', 'right'}, optional
-        If 'left', the index of the first suitable location found is given.
-        If 'right', return the last such index.  If there is no suitable
-        index, return either 0 or N (where N is the length of `self`).
-    sorter : 1-D array_like, optional
-        Optional array of integer indices that sort array a into ascending
-        order. They are typically the result of argsort.
-
-    Returns
-    -------
-    array of ints
-        Array of insertion points with the same shape as `value`.
-
-    See Also
-    --------
-    numpy.searchsorted : Similar method from NumPy.
-    """
-    if sorter is not None:
-        sorter = ensure_platform_int(sorter)
-
-    if is_integer_dtype(arr) and (
-            is_integer(value) or is_integer_dtype(value)):
-        from .arrays.array_ import array
-        # if `arr` and `value` have different dtypes, `arr` would be
-        # recast by numpy, causing a slow search.
-        # Before searching below, we therefore try to give `value` the
-        # same dtype as `arr`, while guarding against integer overflows.
-        iinfo = np.iinfo(arr.dtype.type)
-        value_arr = np.array([value]) if is_scalar(value) else np.array(value)
-        if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all():
-            # value within bounds, so no overflow, so can convert value dtype
-            # to dtype of arr
-            dtype = arr.dtype
-        else:
-            dtype = value_arr.dtype
-
-        if is_scalar(value):
-            value = dtype.type(value)
-        else:
-            value = array(value, dtype=dtype)
-    elif not (is_object_dtype(arr) or is_numeric_dtype(arr) or
-              is_categorical_dtype(arr)):
-        from pandas.core.series import Series
-        # E.g. if `arr` is an array with dtype='datetime64[ns]'
-        # and `value` is a pd.Timestamp, we may need to convert value
-        value_ser = Series(value)._values
-        value = value_ser[0] if is_scalar(value) else value_ser
-
-    result = arr.searchsorted(value, side=side, sorter=sorter)
-    return result
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2392,8 +2392,8 @@ def __rmatmul__(self, other):
     @Substitution(klass='Series')
     @Appender(base._shared_docs['searchsorted'])
     def searchsorted(self, value, side='left', sorter=None):
-        return com.searchsorted(self._values, value,
-                                side=side, sorter=sorter)
+        return algorithms.searchsorted(self._values, value,
+                                       side=side, sorter=sorter)
 
     # -------------------------------------------------------------------
     # Combination