cleanup

pandas-dev · rhshadrach · Feb 29, 2024 · Mar 2, 2024 · Mar 3, 2024 · Mar 5, 2024
commit 12ef132ba484dfeb408a8031b5c4473f52c325c4
diff --git a/doc/source/development/maintaining.rst b/doc/source/development/maintaining.rst
@@ -326,6 +326,34 @@ a milestone before tagging, you can request the bot to backport it with:
    @Meeseeksdev backport <branch>
 
 
+.. _maintaining.asv-machine:
+
+Benchmark machine
+-----------------
+
+The team currently owns dedicated hardware for hosting a website for pandas' ASV performance benchmark. The results
+are published to https://asv-runner.github.io/asv-collection/pandas/
+
+Configuration
+`````````````
+
+The machine can be configured with the `Ansible <http://docs.ansible.com/ansible/latest/index.html>`_ playbook in https://github.com/tomaugspurger/asv-runner.
+
+Publishing
+``````````
+
+The results are published to another GitHub repository, https://github.com/tomaugspurger/asv-collection.
+Finally, we have a cron job on our docs server to pull from https://github.com/tomaugspurger/asv-collection, to serve them from ``/speed``.
+Ask Tom or Joris for access to the webserver.
+
+Debugging
+`````````
+
+The benchmarks are scheduled by Airflow. It has a dashboard for viewing and debugging the results. You'll need to setup an SSH tunnel to view them
+
+    ssh -L 8080:localhost:8080 pandas@panda.likescandy.com
+
+
 .. _maintaining.release:
 
 Release process

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -191,7 +191,6 @@ Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
 - :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`)
-- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
 - All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
 - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
 - All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`)
@@ -239,12 +238,12 @@ Removal of prior version deprecations/changes
 - Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`)
 - Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`)
 
+
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.performance:
 
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
-- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
 - Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
 - Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
 - Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
@@ -253,11 +252,11 @@ Performance improvements
 - Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
 - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
 - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
-- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
 - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
-- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
+- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`?``)
+- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.bug_fixes:

diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -29,7 +29,6 @@
     doc,
 )
 
-from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.common import (
     ensure_platform_int,
     ensure_python_int,
@@ -43,7 +42,6 @@
 from pandas.core import ops
 import pandas.core.common as com
 from pandas.core.construction import extract_array
-from pandas.core.indexers import check_array_indexer
 import pandas.core.indexes.base as ibase
 from pandas.core.indexes.base import (
     Index,
@@ -1050,18 +1048,6 @@ def __getitem__(self, key):
                 "and integer or boolean "
                 "arrays are valid indices"
             )
-        elif com.is_bool_indexer(key):
-            if isinstance(getattr(key, "dtype", None), ExtensionDtype):
-                np_key = key.to_numpy(dtype=bool, na_value=False)
-            else:
-                np_key = np.asarray(key, dtype=bool)
-            check_array_indexer(self._range, np_key)  # type: ignore[arg-type]
-            # Short circuit potential _shallow_copy check
-            if np_key.all():
-                return self._simple_new(self._range, name=self.name)
-            elif not np_key.any():
-                return self._simple_new(_empty_range, name=self.name)
-            return self.take(np.flatnonzero(np_key))
         return super().__getitem__(key)
 
     def _getitem_slice(self, slobj: slice) -> Self:

diff --git a/pandas/io/excel/_calamine.py b/pandas/io/excel/_calamine.py
@@ -75,7 +75,7 @@ def load_workbook(
         from python_calamine import load_workbook
 
         return load_workbook(
-            filepath_or_buffer,
+            filepath_or_buffer,  # type: ignore[arg-type]
             **engine_kwargs,
         )
 

diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py
@@ -623,41 +623,3 @@ def test_append_one_nonempty_preserve_step():
     expected = RangeIndex(0, -1, -1)
     result = RangeIndex(0).append([expected])
     tm.assert_index_equal(result, expected, exact=True)
-
-
-def test_getitem_boolmask_all_true():
-    ri = RangeIndex(3, name="foo")
-    expected = ri.copy()
-    result = ri[[True] * 3]
-    tm.assert_index_equal(result, expected, exact=True)
-
-
-def test_getitem_boolmask_all_false():
-    ri = RangeIndex(3, name="foo")
-    result = ri[[False] * 3]
-    expected = RangeIndex(0, name="foo")
-    tm.assert_index_equal(result, expected, exact=True)
-
-
-def test_getitem_boolmask_returns_rangeindex():
-    ri = RangeIndex(3, name="foo")
-    result = ri[[False, True, True]]
-    expected = RangeIndex(1, 3, name="foo")
-    tm.assert_index_equal(result, expected, exact=True)
-
-    result = ri[[True, False, True]]
-    expected = RangeIndex(0, 3, 2, name="foo")
-    tm.assert_index_equal(result, expected, exact=True)
-
-
-def test_getitem_boolmask_returns_index():
-    ri = RangeIndex(4, name="foo")
-    result = ri[[True, True, False, True]]
-    expected = Index([0, 1, 3], name="foo")
-    tm.assert_index_equal(result, expected)
-
-
-def test_getitem_boolmask_wrong_length():
-    ri = RangeIndex(4, name="foo")
-    with pytest.raises(IndexError, match="Boolean index has wrong length"):
-        ri[[True]]
diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py
@@ -132,8 +132,6 @@ def test_append_series(setup_path):
 
         # select on the index and values
         expected = ns[(ns > 70) & (ns.index < 90)]
-        # Reading/writing RangeIndex info is not supported yet
-        expected.index = Index(expected.index._data)
         result = store.select("ns", "foo>70 and index<90")
         tm.assert_series_equal(result, expected, check_index_type=True)
 

diff --git a/web/pandas/community/benchmarks.md b/web/pandas/community/benchmarks.md
diff --git a/web/pandas/config.yml b/web/pandas/config.yml
@@ -54,8 +54,6 @@ navbar:
       target: community/coc.html
     - name: "Ecosystem"
       target: community/ecosystem.html
-    - name: "Benchmarks"
-      target: community/benchmarks.html
   - name: "Contribute"
     target: contribute.html
 blog: