From d11fa8d3db49d02e36e090d02b1be7eedc8eff56 Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 27 Oct 2023 17:04:29 +0200 Subject: [PATCH 01/15] Backport PR #55722 on branch 2.1.x (DOC: Add whatsnew for 2.1.3) (#55723) Backport PR #55722: DOC: Add whatsnew for 2.1.3 Co-authored-by: Joris Van den Bossche --- doc/source/whatsnew/index.rst | 1 + doc/source/whatsnew/v2.1.1.rst | 2 +- doc/source/whatsnew/v2.1.2.rst | 2 ++ doc/source/whatsnew/v2.1.3.rst | 41 ++++++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 doc/source/whatsnew/v2.1.3.rst diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index baab20845a2a2..eef65366a2762 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -16,6 +16,7 @@ Version 2.1 .. toctree:: :maxdepth: 2 + v2.1.3 v2.1.2 v2.1.1 v2.1.0 diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst index c52f7db8ec3ec..6101333ae760c 100644 --- a/doc/source/whatsnew/v2.1.1.rst +++ b/doc/source/whatsnew/v2.1.1.rst @@ -52,4 +52,4 @@ Other Contributors ~~~~~~~~~~~~ -.. contributors:: v2.1.0..v2.1.1|HEAD +.. contributors:: v2.1.0..v2.1.1 diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst index d2e10b4a42e68..ade7f767815b1 100644 --- a/doc/source/whatsnew/v2.1.2.rst +++ b/doc/source/whatsnew/v2.1.2.rst @@ -66,3 +66,5 @@ Other Contributors ~~~~~~~~~~~~ + +.. contributors:: v2.1.1..v2.1.2 diff --git a/doc/source/whatsnew/v2.1.3.rst b/doc/source/whatsnew/v2.1.3.rst new file mode 100644 index 0000000000000..1359123ef153e --- /dev/null +++ b/doc/source/whatsnew/v2.1.3.rst @@ -0,0 +1,41 @@ +.. _whatsnew_213: + +What's new in 2.1.3 (November ??, 2023) +--------------------------------------- + +These are the changes in pandas 2.1.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- +.. _whatsnew_213.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- +- + +.. --------------------------------------------------------------------------- +.. _whatsnew_213.bug_fixes: + +Bug fixes +~~~~~~~~~ +- +- + +.. --------------------------------------------------------------------------- +.. _whatsnew_213.other: + +Other +~~~~~ +- +- + +.. --------------------------------------------------------------------------- +.. _whatsnew_213.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v2.1.2..v2.1.3|HEAD From c5da5ea15a17a2c0e95495619555b77488e2811b Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 27 Oct 2023 18:43:34 +0200 Subject: [PATCH 02/15] Backport PR #55707 on branch 2.1.x (REF: Avoid np.can_cast for scalar inference for NEP 50) (#55716) Backport PR #55707: REF: Avoid np.can_cast for scalar inference for NEP 50 Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- ci/run_tests.sh | 3 ++- pandas/core/dtypes/cast.py | 25 +++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 48ef21686a26f..6a70ea1df3e71 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -10,7 +10,8 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" -PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +# TODO: Support NEP 50 and remove NPY_PROMOTION_STATE +PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8ed57e9bf5532..9a9a8ed22f282 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -700,7 +700,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.integer): - if not np.can_cast(fill_value, dtype): + if not np_can_cast_scalar(fill_value, dtype): # type: ignore[arg-type] # upcast to prevent overflow mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) @@ -1892,4 +1892,25 @@ def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: """ if not len(rng): return True - return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype) + return np_can_cast_scalar(rng.start, dtype) and np_can_cast_scalar(rng.stop, dtype) + + +def np_can_cast_scalar(element: Scalar, dtype: np.dtype) -> bool: + """ + np.can_cast pandas-equivalent for pre 2-0 behavior that allowed scalar + inference + + Parameters + ---------- + element : Scalar + dtype : np.dtype + + Returns + ------- + bool + """ + try: + np_can_hold_element(dtype, element) + return True + except (LossySetitemError, NotImplementedError): + return False From 08f2bd4c5da3adae7ea1a82a22fbe7b0494a3d59 Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Sun, 29 Oct 2023 02:43:00 +0100 Subject: [PATCH 03/15] Backport PR #55747 on branch 2.1.x (DOC: Fix release date for 2.1.2) (#55749) Backport PR #55747: DOC: Fix release date for 2.1.2 Co-authored-by: Thomas Li <47963215+lithomas1@users.noreply.github.com> --- doc/source/whatsnew/v2.1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst index ade7f767815b1..a13a273b01257 100644 --- a/doc/source/whatsnew/v2.1.2.rst +++ b/doc/source/whatsnew/v2.1.2.rst @@ -1,6 +1,6 @@ .. _whatsnew_212: -What's new in 2.1.2 (October 25, 2023) +What's new in 2.1.2 (October 26, 2023) --------------------------------------- These are the changes in pandas 2.1.2. See :ref:`release` for a full changelog From bf9168410cb81cdc87928f637fba6638002cb33a Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 1 Nov 2023 23:52:02 +0100 Subject: [PATCH 04/15] Backport PR #55726 on branch 2.1.x (fix segfault with tz_localize_to_utc) (#55796) Backport PR #55726: fix segfault with tz_localize_to_utc Co-authored-by: William Ayd --- pandas/_libs/tslibs/tzconversion.pyx | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index e17d264333264..af27acf16b771 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -332,13 +332,16 @@ timedelta-like} # Shift the delta_idx by if the UTC offset of # the target tz is greater than 0 and we're moving forward # or vice versa - first_delta = info.deltas[0] - if (shift_forward or shift_delta > 0) and first_delta > 0: - delta_idx_offset = 1 - elif (shift_backward or shift_delta < 0) and first_delta < 0: - delta_idx_offset = 1 - else: - delta_idx_offset = 0 + # TODO: delta_idx_offset and info.deltas are needed for zoneinfo timezones, + # but are not applicable for all timezones. Setting the former to 0 and + # length checking the latter avoids UB, but this could use a larger refactor + delta_idx_offset = 0 + if len(info.deltas): + first_delta = info.deltas[0] + if (shift_forward or shift_delta > 0) and first_delta > 0: + delta_idx_offset = 1 + elif (shift_backward or shift_delta < 0) and first_delta < 0: + delta_idx_offset = 1 for i in range(n): val = vals[i] From 8a616b901f1f2a3f8fdcbc8a6e4754330abafba1 Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Mon, 6 Nov 2023 23:05:51 +0100 Subject: [PATCH 05/15] Backport PR #55817 on branch 2.1.x (COMPAT: Numpy int64 Windows default for Numpy 2.0) (#55851) Backport PR #55817: COMPAT: Numpy int64 Windows default for Numpy 2.0 Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/compat/numpy/__init__.py | 3 ++- pandas/tests/extension/base/reduce.py | 4 +-- pandas/tests/extension/test_arrow.py | 2 +- pandas/tests/extension/test_masked.py | 29 +++++++++++++++++----- pandas/tests/frame/methods/test_reindex.py | 3 ++- pandas/tests/frame/test_reductions.py | 14 +++++++---- 6 files changed, 39 insertions(+), 16 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 51c9892b64a08..0552301b59400 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -11,6 +11,7 @@ np_version_gte1p24 = _nlv >= Version("1.24") np_version_gte1p24p3 = _nlv >= Version("1.24.3") np_version_gte1p25 = _nlv >= Version("1.25") +np_version_gt2 = _nlv >= Version("2.0.0.dev0") is_numpy_dev = _nlv.dev is not None _min_numpy_ver = "1.22.4" @@ -26,7 +27,7 @@ np_long: type np_ulong: type -if _nlv >= Version("2.0.0.dev0"): +if np_version_gt2: try: with warnings.catch_warnings(): warnings.filterwarnings( diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index 43b2df4290eed..91d4855dcefe5 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -39,7 +39,7 @@ def check_reduce(self, s, op_name, skipna): expected = exp_op(skipna=skipna) tm.assert_almost_equal(result, expected) - def _get_expected_reduction_dtype(self, arr, op_name: str): + def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): # Find the expected dtype when the given reduction is done on a DataFrame # column with this array. The default assumes float64-like behavior, # i.e. retains the dtype. @@ -58,7 +58,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): kwargs = {"ddof": 1} if op_name in ["var", "std"] else {} - cmp_dtype = self._get_expected_reduction_dtype(arr, op_name) + cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna) # The DataFrame method just calls arr._reduce with keepdims=True, # so this first check is perfunctory. diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index fef4fbea2e485..61474aa94d1c8 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -543,7 +543,7 @@ def test_reduce_series_boolean( return super().test_reduce_series_boolean(data, all_boolean_reductions, skipna) - def _get_expected_reduction_dtype(self, arr, op_name: str): + def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): if op_name in ["max", "min"]: cmp_dtype = arr.dtype elif arr.dtype.name == "decimal128(7, 3)[pyarrow]": diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index c4195be8ea121..588a2fb58d9be 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -20,6 +20,7 @@ IS64, is_platform_windows, ) +from pandas.compat.numpy import np_version_gt2 import pandas as pd import pandas._testing as tm @@ -40,7 +41,7 @@ ) from pandas.tests.extension import base -is_windows_or_32bit = is_platform_windows() or not IS64 +is_windows_or_32bit = (is_platform_windows() and not np_version_gt2) or not IS64 pytestmark = [ pytest.mark.filterwarnings( @@ -325,7 +326,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): expected = pd.NA tm.assert_almost_equal(result, expected) - def _get_expected_reduction_dtype(self, arr, op_name: str): + def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): if tm.is_float_dtype(arr.dtype): cmp_dtype = arr.dtype.name elif op_name in ["mean", "median", "var", "std", "skew"]: @@ -335,16 +336,32 @@ def _get_expected_reduction_dtype(self, arr, op_name: str): elif arr.dtype in ["Int64", "UInt64"]: cmp_dtype = arr.dtype.name elif tm.is_signed_integer_dtype(arr.dtype): - cmp_dtype = "Int32" if is_windows_or_32bit else "Int64" + # TODO: Why does Window Numpy 2.0 dtype depend on skipna? + cmp_dtype = ( + "Int32" + if (is_platform_windows() and (not np_version_gt2 or not skipna)) + or not IS64 + else "Int64" + ) elif tm.is_unsigned_integer_dtype(arr.dtype): - cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64" + cmp_dtype = ( + "UInt32" + if (is_platform_windows() and (not np_version_gt2 or not skipna)) + or not IS64 + else "UInt64" + ) elif arr.dtype.kind == "b": if op_name in ["mean", "median", "var", "std", "skew"]: cmp_dtype = "Float64" elif op_name in ["min", "max"]: cmp_dtype = "boolean" elif op_name in ["sum", "prod"]: - cmp_dtype = "Int32" if is_windows_or_32bit else "Int64" + cmp_dtype = ( + "Int32" + if (is_platform_windows() and (not np_version_gt2 or not skipna)) + or not IS64 + else "Int64" + ) else: raise TypeError("not supposed to reach this") else: @@ -360,7 +377,7 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool): # overwrite to ensure pd.NA is tested instead of np.nan # https://github.com/pandas-dev/pandas/issues/30958 length = 64 - if not IS64 or is_platform_windows(): + if is_windows_or_32bit: # Item "ExtensionDtype" of "Union[dtype[Any], ExtensionDtype]" has # no attribute "itemsize" if not ser.dtype.itemsize == 8: # type: ignore[union-attr] diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 0858e33a989b7..678fec835aa82 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -12,6 +12,7 @@ IS64, is_platform_windows, ) +from pandas.compat.numpy import np_version_gt2 import pandas.util._test_decorators as td import pandas as pd @@ -131,7 +132,7 @@ class TestDataFrameSelectReindex: # test_indexing @pytest.mark.xfail( - not IS64 or is_platform_windows(), + not IS64 or (is_platform_windows() and not np_version_gt2), reason="Passes int32 values to DatetimeArray in make_na_array on " "windows, 32bit linux builds", ) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 61d7c125ee5e7..bec1fcd1e7462 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -10,6 +10,7 @@ IS64, is_platform_windows, ) +from pandas.compat.numpy import np_version_gt2 import pandas.util._test_decorators as td import pandas as pd @@ -32,6 +33,7 @@ nanops, ) +is_windows_np2_or_is32 = (is_platform_windows() and not np_version_gt2) or not IS64 is_windows_or_is32 = is_platform_windows() or not IS64 @@ -1766,13 +1768,13 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype): @pytest.mark.parametrize( "opname, dtype, exp_value, exp_dtype", [ - ("sum", "Int8", 0, ("Int32" if is_windows_or_is32 else "Int64")), - ("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")), - ("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")), + ("sum", "Int8", 0, ("Int32" if is_windows_np2_or_is32 else "Int64")), + ("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")), + ("prod", "Int8", 1, ("Int32" if is_windows_np2_or_is32 else "Int64")), ("sum", "Int64", 0, "Int64"), ("prod", "Int64", 1, "Int64"), - ("sum", "UInt8", 0, ("UInt32" if is_windows_or_is32 else "UInt64")), - ("prod", "UInt8", 1, ("UInt32" if is_windows_or_is32 else "UInt64")), + ("sum", "UInt8", 0, ("UInt32" if is_windows_np2_or_is32 else "UInt64")), + ("prod", "UInt8", 1, ("UInt32" if is_windows_np2_or_is32 else "UInt64")), ("sum", "UInt64", 0, "UInt64"), ("prod", "UInt64", 1, "UInt64"), ("sum", "Float32", 0, "Float32"), @@ -1787,6 +1789,8 @@ def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype expected = Series([exp_value, exp_value], dtype=exp_dtype) tm.assert_series_equal(result, expected) + # TODO: why does min_count=1 impact the resulting Windows dtype + # differently than min_count=0? @pytest.mark.parametrize( "opname, dtype, exp_dtype", [ From 0bf70c7cf6ff0cc3fd18f44f450bef7e527f984c Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Mon, 6 Nov 2023 17:31:11 -0500 Subject: [PATCH 06/15] Backport PR #55761 on branch 2.1.x (BUG: DatetimeIndex.diff raising TypeError) (#55819) * Backport PR #55473: TST: sort index with sliced MultiIndex * BUG: DatetimeIndex.diff raising TypeError (#55761) * BUG: DatetimeIndex.diff raising TypeError * add test and whatsnew * typing * use Index (cherry picked from commit f04da3c1c9f5fd81ce7a04e55a965e4021de2ae9) * fix whatsnew --- doc/source/whatsnew/v2.1.3.rst | 2 +- pandas/core/indexes/base.py | 5 +++-- pandas/tests/indexes/test_datetimelike.py | 8 ++++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.3.rst b/doc/source/whatsnew/v2.1.3.rst index 1359123ef153e..3b1cd1c152baa 100644 --- a/doc/source/whatsnew/v2.1.3.rst +++ b/doc/source/whatsnew/v2.1.3.rst @@ -21,7 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in :meth:`DatetimeIndex.diff` raising ``TypeError`` (:issue:`55080`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b4ef5380d2b30..85b68c1bc2ec7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7024,7 +7024,8 @@ def infer_objects(self, copy: bool = True) -> Index: result._references.add_index_reference(result) return result - def diff(self, periods: int = 1): + @final + def diff(self, periods: int = 1) -> Index: """ Computes the difference between consecutive values in the Index object. @@ -7050,7 +7051,7 @@ def diff(self, periods: int = 1): Index([nan, 10.0, 10.0, 10.0, 10.0], dtype='float64') """ - return self._constructor(self.to_series().diff(periods)) + return Index(self.to_series().diff(periods)) def round(self, decimals: int = 0): """ diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 71cc7f29c62bc..5ad2e9b2f717e 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -159,3 +159,11 @@ def test_where_cast_str(self, simple_index): result = index.where(mask, ["foo"]) tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) + def test_diff(self, unit): + # GH 55080 + dti = pd.to_datetime([10, 20, 30], unit=unit).as_unit(unit) + result = dti.diff(1) + expected = pd.TimedeltaIndex([pd.NaT, 10, 10], unit=unit).as_unit(unit) + tm.assert_index_equal(result, expected) From 7702f416948134d268274dd33f720151e19f3f76 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 7 Nov 2023 05:46:11 -1000 Subject: [PATCH 07/15] Backport PR #55853: DEPS: Use ipython run_cell instead of run_code; remove pytest-asyncio (#55858) --- .github/workflows/unit-tests.yml | 6 +++--- .github/workflows/wheels.yml | 2 +- ci/deps/actions-310.yaml | 1 - ci/deps/actions-311-downstream_compat.yaml | 1 - ci/deps/actions-311-numpydev.yaml | 1 - ci/deps/actions-311-pyarrownightly.yaml | 1 - ci/deps/actions-311.yaml | 1 - ci/deps/actions-39-minimum_versions.yaml | 1 - ci/deps/actions-39.yaml | 1 - ci/deps/actions-pypy-39.yaml | 1 - ci/deps/circle-310-arm64.yaml | 1 - ci/meta.yaml | 1 - environment.yml | 1 - pandas/tests/arrays/categorical/test_warnings.py | 7 ++----- pandas/tests/frame/test_api.py | 8 ++------ pandas/tests/indexes/test_base.py | 6 ++---- pandas/tests/resample/test_resampler_grouper.py | 6 ++---- pandas/util/_test_decorators.py | 12 +----------- pyproject.toml | 6 ++---- requirements-dev.txt | 1 - scripts/tests/data/deps_expected_random.yaml | 1 - scripts/tests/data/deps_minimum.toml | 6 ++---- scripts/tests/data/deps_unmodified_random.yaml | 1 - 23 files changed, 17 insertions(+), 56 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 8ff7d290ea9d1..14abcdbfdb310 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -236,7 +236,7 @@ jobs: . ~/virtualenvs/pandas-dev/bin/activate python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1 python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true" - python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1 + python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1 python -m pip install --no-cache-dir --no-build-isolation -e . python -m pip list --no-cache-dir export PANDAS_CI=1 @@ -274,7 +274,7 @@ jobs: /opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev . ~/virtualenvs/pandas-dev/bin/activate python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1 - python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1 + python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1 python -m pip install --no-cache-dir --no-build-isolation -e . python -m pip list --no-cache-dir @@ -347,7 +347,7 @@ jobs: python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1 python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy python -m pip install versioneer[toml] - python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17 + python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov python -m pip install -ve . --no-build-isolation --no-index --no-deps python -m pip list diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 6759f7b82eacf..95b48ee4b7426 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -168,7 +168,7 @@ jobs: shell: pwsh run: | $TST_CMD = @" - python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17; + python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0; python -m pip install `$(Get-Item pandas\wheelhouse\*.whl); python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\", \"`\"-m not clipboard and not single_cpu and not slow and not network and not db`\"\"])`'; "@ diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index f8fa04a60a378..82f41811bf56e 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -14,7 +14,6 @@ dependencies: - pytest>=7.3.2 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17.0 - pytest-localserver>=0.7.1 - boto3 diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml index 8bf5ea51c4bf3..8422aba5239f9 100644 --- a/ci/deps/actions-311-downstream_compat.yaml +++ b/ci/deps/actions-311-downstream_compat.yaml @@ -15,7 +15,6 @@ dependencies: - pytest>=7.3.2 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17.0 - pytest-localserver>=0.7.1 - boto3 diff --git a/ci/deps/actions-311-numpydev.yaml b/ci/deps/actions-311-numpydev.yaml index 9a6c05a138f9b..695bd483fc572 100644 --- a/ci/deps/actions-311-numpydev.yaml +++ b/ci/deps/actions-311-numpydev.yaml @@ -18,7 +18,6 @@ dependencies: # causes an InternalError within pytest - pytest-xdist>=2.2.0, <3 - hypothesis>=6.46.1 - - pytest-asyncio>=0.17.0 # pandas dependencies - python-dateutil diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml index d7e5523dd2545..b5df68968ea46 100644 --- a/ci/deps/actions-311-pyarrownightly.yaml +++ b/ci/deps/actions-311-pyarrownightly.yaml @@ -15,7 +15,6 @@ dependencies: - pytest-cov - pytest-xdist>=2.2.0 - hypothesis>=6.46.1 - - pytest-asyncio>=0.17.0 # required dependencies - python-dateutil diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index 7f8cca9e65fe1..cb1771c485297 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -14,7 +14,6 @@ dependencies: - pytest>=7.3.2 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17.0 - pytest-localserver>=0.7.1 - boto3 diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml index 5d34940aa27e1..e4db8cb0a540b 100644 --- a/ci/deps/actions-39-minimum_versions.yaml +++ b/ci/deps/actions-39-minimum_versions.yaml @@ -16,7 +16,6 @@ dependencies: - pytest>=7.3.2 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17.0 - pytest-localserver>=0.7.1 - boto3 diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index f5ea116e51dd4..745781298b86a 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -14,7 +14,6 @@ dependencies: - pytest>=7.3.2 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17.0 - pytest-localserver>=0.7.1 - boto3 diff --git a/ci/deps/actions-pypy-39.yaml b/ci/deps/actions-pypy-39.yaml index a489690776be3..ce02ea2b9a090 100644 --- a/ci/deps/actions-pypy-39.yaml +++ b/ci/deps/actions-pypy-39.yaml @@ -16,7 +16,6 @@ dependencies: # test dependencies - pytest>=7.3.2 - pytest-cov - - pytest-asyncio>=0.17.0 - pytest-xdist>=2.2.0 - hypothesis>=6.46.1 diff --git a/ci/deps/circle-310-arm64.yaml b/ci/deps/circle-310-arm64.yaml index a71523e8c3579..08a422c6d538a 100644 --- a/ci/deps/circle-310-arm64.yaml +++ b/ci/deps/circle-310-arm64.yaml @@ -14,7 +14,6 @@ dependencies: - pytest>=7.3.2 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17.0 - pytest-localserver>=0.7.1 - boto3 diff --git a/ci/meta.yaml b/ci/meta.yaml index 77f536a18a204..5b343f60b3dce 100644 --- a/ci/meta.yaml +++ b/ci/meta.yaml @@ -64,7 +64,6 @@ test: requires: - pip - pytest >=7.3.2 - - pytest-asyncio >=0.17.0 - pytest-xdist >=2.2.0 - pytest-cov - hypothesis >=6.46.1 diff --git a/environment.yml b/environment.yml index 98c39378d1ae1..3f6f0de68748b 100644 --- a/environment.yml +++ b/environment.yml @@ -16,7 +16,6 @@ dependencies: - pytest>=7.3.2 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17.0 - coverage # required dependencies diff --git a/pandas/tests/arrays/categorical/test_warnings.py b/pandas/tests/arrays/categorical/test_warnings.py index c0623faccd325..68c59706a6c3b 100644 --- a/pandas/tests/arrays/categorical/test_warnings.py +++ b/pandas/tests/arrays/categorical/test_warnings.py @@ -1,19 +1,16 @@ import pytest -from pandas.util._test_decorators import async_mark - import pandas._testing as tm class TestCategoricalWarnings: - @async_mark() - async def test_tab_complete_warning(self, ip): + def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter code = "import pandas as pd; c = pd.Categorical([])" - await ip.run_code(code) + ip.run_cell(code) # GH 31324 newer jedi version raises Deprecation warning; # appears resolved 2021-02-02 diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 8fc78629beb0a..aa7aa8964a059 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -7,8 +7,6 @@ from pandas._config.config import option_context -from pandas.util._test_decorators import async_mark - import pandas as pd from pandas import ( DataFrame, @@ -288,8 +286,7 @@ def _check_f(base, f): f = lambda x: x.rename({1: "foo"}, inplace=True) _check_f(d.copy(), f) - @async_mark() - async def test_tab_complete_warning(self, ip, frame_or_series): + def test_tab_complete_warning(self, ip, frame_or_series): # GH 16409 pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter @@ -299,8 +296,7 @@ async def test_tab_complete_warning(self, ip, frame_or_series): else: code = "from pandas import Series; obj = Series(dtype=object)" - await ip.run_code(code) - + ip.run_cell(code) # GH 31324 newer jedi version raises Deprecation warning; # appears resolved 2021-02-02 with tm.assert_produces_warning(None, raise_on_extra_warnings=False): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 8fd1e296fb79a..da4b44227bef3 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -10,7 +10,6 @@ from pandas.compat import IS64 from pandas.errors import InvalidIndexError -from pandas.util._test_decorators import async_mark from pandas.core.dtypes.common import ( is_any_real_numeric_dtype, @@ -1218,14 +1217,13 @@ def test_cached_properties_not_settable(self): with pytest.raises(AttributeError, match="Can't set attribute"): index.is_unique = False - @async_mark() - async def test_tab_complete_warning(self, ip): + def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter code = "import pandas as pd; idx = pd.Index([1, 2])" - await ip.run_code(code) + ip.run_cell(code) # GH 31324 newer jedi version raises Deprecation warning; # appears resolved 2021-02-02 diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 6abdde0f5ccff..b5288c793dafa 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -4,7 +4,6 @@ import pytest from pandas.compat import is_platform_windows -from pandas.util._test_decorators import async_mark import pandas as pd from pandas import ( @@ -26,8 +25,7 @@ def test_frame(): ) -@async_mark() -async def test_tab_complete_ipython6_warning(ip): +def test_tab_complete_ipython6_warning(ip): from IPython.core.completer import provisionalcompleter code = dedent( @@ -37,7 +35,7 @@ async def test_tab_complete_ipython6_warning(ip): rs = s.resample("D") """ ) - await ip.run_code(code) + ip.run_cell(code) # GH 31324 newer jedi version raises Deprecation warning; # appears resolved 2021-02-02 diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 03011a1ffe622..93c46274fdc1b 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -38,11 +38,11 @@ def test_foo(): if TYPE_CHECKING: from pandas._typing import F + from pandas.compat import ( IS64, is_platform_windows, ) -from pandas.compat._optional import import_optional_dependency from pandas.core.computation.expressions import ( NUMEXPR_INSTALLED, @@ -214,16 +214,6 @@ def documented_fixture(fixture): return documented_fixture -def async_mark(): - try: - import_optional_dependency("pytest_asyncio") - async_mark = pytest.mark.asyncio - except ImportError: - async_mark = pytest.mark.skip(reason="Missing dependency pytest-asyncio") - - return async_mark - - def mark_array_manager_not_yet_implemented(request) -> None: mark = pytest.mark.xfail(reason="Not yet implemented for ArrayManager") request.node.add_marker(mark) diff --git a/pyproject.toml b/pyproject.toml index e9e3e2f67e2f1..d7634894b4835 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ repository = 'https://github.com/pandas-dev/pandas' matplotlib = "pandas:plotting._matplotlib" [project.optional-dependencies] -test = ['hypothesis>=6.46.1', 'pytest>=7.3.2', 'pytest-xdist>=2.2.0', 'pytest-asyncio>=0.17.0'] +test = ['hypothesis>=6.46.1', 'pytest>=7.3.2', 'pytest-xdist>=2.2.0'] performance = ['bottleneck>=1.3.4', 'numba>=0.55.2', 'numexpr>=2.8.0'] computation = ['scipy>=1.8.1', 'xarray>=2022.03.0'] fss = ['fsspec>=2022.05.0'] @@ -109,7 +109,6 @@ all = ['beautifulsoup4>=4.11.1', 'pyreadstat>=1.1.5', 'pytest>=7.3.2', 'pytest-xdist>=2.2.0', - 'pytest-asyncio>=0.17.0', 'pyxlsb>=1.0.9', 'qtpy>=2.2.0', 'scipy>=1.8.1', @@ -151,7 +150,7 @@ setup = ['--vsenv'] # For Windows skip = "cp36-* cp37-* cp38-* pp* *_i686 *_ppc64le *_s390x *-musllinux_aarch64" build-verbosity = "3" environment = {LDFLAGS="-Wl,--strip-all"} -test-requires = "hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17" +test-requires = "hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0" test-command = """ PANDAS_CI='1' python -c 'import pandas as pd; \ pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db", "-n 2", "--no-strict-data-files"]); \ @@ -514,7 +513,6 @@ markers = [ "arm_slow: mark a test as slow for arm64 architecture", "arraymanager: mark a test to run with ArrayManager enabled", ] -asyncio_mode = "strict" [tool.mypy] # Import discovery diff --git a/requirements-dev.txt b/requirements-dev.txt index 855eaccfd4f5f..af4f82b068d31 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,7 +9,6 @@ meson-python==0.13.1 pytest>=7.3.2 pytest-cov pytest-xdist>=2.2.0 -pytest-asyncio>=0.17.0 coverage python-dateutil numpy<2 diff --git a/scripts/tests/data/deps_expected_random.yaml b/scripts/tests/data/deps_expected_random.yaml index c70025f8f019d..bebb25b9e1bb8 100644 --- a/scripts/tests/data/deps_expected_random.yaml +++ b/scripts/tests/data/deps_expected_random.yaml @@ -14,7 +14,6 @@ dependencies: - pytest-cov - pytest-xdist>=2.2.0 - psutil - - pytest-asyncio>=0.17.0 - boto3 # required dependencies diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml index b43815a982139..5fb51856e841e 100644 --- a/scripts/tests/data/deps_minimum.toml +++ b/scripts/tests/data/deps_minimum.toml @@ -55,7 +55,7 @@ repository = 'https://github.com/pandas-dev/pandas' matplotlib = "pandas:plotting._matplotlib" [project.optional-dependencies] -test = ['hypothesis>=6.34.2', 'pytest>=7.3.2', 'pytest-xdist>=2.2.0', 'pytest-asyncio>=0.17.0'] +test = ['hypothesis>=6.34.2', 'pytest>=7.3.2', 'pytest-xdist>=2.2.0'] performance = ['bottleneck>=1.3.2', 'numba>=0.53.1', 'numexpr>=2.7.1'] timezone = ['tzdata>=2022.1'] computation = ['scipy>=1.7.1', 'xarray>=0.21.0'] @@ -102,7 +102,6 @@ all = ['beautifulsoup4>=5.9.3', 'pyreadstat>=1.1.2', 'pytest>=7.3.2', 'pytest-xdist>=2.2.0', - 'pytest-asyncio>=0.17.0', 'pyxlsb>=1.0.8', 'qtpy>=2.2.0', 'scipy>=1.7.1', @@ -141,7 +140,7 @@ parentdir_prefix = "pandas-" [tool.cibuildwheel] skip = "cp36-* cp37-* pp37-* *-manylinux_i686 *_ppc64le *_s390x *-musllinux*" build-verbosity = "3" -test-requires = "hypothesis>=6.34.2 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17" +test-requires = "hypothesis>=6.34.2 pytest>=7.3.2 pytest-xdist>=2.2.0" test-command = "python {project}/ci/test_wheels.py" [tool.cibuildwheel.macos] @@ -385,7 +384,6 @@ markers = [ "arm_slow: mark a test as slow for arm64 architecture", "arraymanager: mark a test to run with ArrayManager enabled", ] -asyncio_mode = "strict" [tool.mypy] # Import discovery diff --git a/scripts/tests/data/deps_unmodified_random.yaml b/scripts/tests/data/deps_unmodified_random.yaml index 503eb3c7c7734..e54482282fcc8 100644 --- a/scripts/tests/data/deps_unmodified_random.yaml +++ b/scripts/tests/data/deps_unmodified_random.yaml @@ -14,7 +14,6 @@ dependencies: - pytest-cov - pytest-xdist>=2.2.0 - psutil - - pytest-asyncio>=0.17 - boto3 # required dependencies From c9854d9821b2b7e73f88a3c2d6055c8afcf5dff7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 7 Nov 2023 10:07:46 -1000 Subject: [PATCH 08/15] TST: Make read_csv tests pyarrow 13 compatable on 2.1.x (#55855) * TST: Make read_csv tests pyarrow 13 compatable on 2.1.x * Skip on windows for ARROW_TIMEZONE_DATABASE --- pandas/tests/interchange/test_impl.py | 16 +++++++++++++++- .../tests/io/parser/common/test_common_basic.py | 8 ++++++-- pandas/tests/io/parser/test_parse_dates.py | 8 +++++++- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 8a25a2c1889f3..61d3edcbb2964 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -4,6 +4,10 @@ import pytest from pandas._libs.tslibs import iNaT +from pandas.compat import ( + is_ci_environment, + is_platform_windows, +) import pandas.util._test_decorators as td import pandas as pd @@ -309,11 +313,21 @@ def test_datetimetzdtype(tz, unit): tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) -def test_interchange_from_non_pandas_tz_aware(): +def test_interchange_from_non_pandas_tz_aware(request): # GH 54239, 54287 pa = pytest.importorskip("pyarrow", "11.0.0") import pyarrow.compute as pc + if is_platform_windows() and is_ci_environment(): + mark = pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason=( + "TODO: Set ARROW_TIMEZONE_DATABASE environment variable " + "on CI to path to the tzdata for pyarrow." + ), + ) + request.node.add_marker(mark) + arr = pa.array([datetime(2020, 1, 1), None, datetime(2020, 1, 2)]) arr = pc.assume_timezone(arr, "Asia/Kathmandu") table = pa.table({"arr": arr}) diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 00a26a755756f..442aa5ef87b10 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -86,7 +86,9 @@ def test_read_csv_local(all_parsers, csv1): fname = prefix + str(os.path.abspath(csv1)) result = parser.read_csv(fname, index_col=0, parse_dates=True) - + # TODO: make unit check more specific + if parser.engine == "pyarrow": + result.index = result.index.as_unit("ns") expected = DataFrame( [ [0.980269, 3.685731, -0.364216805298, -1.159738], @@ -177,7 +179,9 @@ def test_read_csv_low_memory_no_rows_with_index(all_parsers): def test_read_csv_dataframe(all_parsers, csv1): parser = all_parsers result = parser.read_csv(csv1, index_col=0, parse_dates=True) - + # TODO: make unit check more specific + if parser.engine == "pyarrow": + result.index = result.index.as_unit("ns") expected = DataFrame( [ [0.980269, 3.685731, -0.364216805298, -1.159738], diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 9f7840588f89e..c6afff56de16b 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -979,12 +979,15 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs): ) -def test_parse_tz_aware(all_parsers, request): +def test_parse_tz_aware(all_parsers): # See gh-1693 parser = all_parsers data = "Date,x\n2012-06-13T01:39:00Z,0.5" result = parser.read_csv(StringIO(data), index_col=0, parse_dates=True) + # TODO: make unit check more specific + if parser.engine == "pyarrow": + result.index = result.index.as_unit("ns") expected = DataFrame( {"x": [0.5]}, index=Index([Timestamp("2012-06-13 01:39:00+00:00")], name="Date") ) @@ -2231,6 +2234,9 @@ def test_parse_dates_arrow_engine(all_parsers): 2000-01-01 00:00:01,1""" result = parser.read_csv(StringIO(data), parse_dates=["a"]) + # TODO: make unit check more specific + if parser.engine == "pyarrow": + result["a"] = result["a"].dt.as_unit("ns") expected = DataFrame( { "a": [ From 569f904d45aeb5d0abb925e9875140afd61e3da4 Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 8 Nov 2023 01:08:46 +0100 Subject: [PATCH 09/15] Backport PR #55227 on branch 2.1.x (BUG: Interchange object data buffer has the wrong dtype / from_dataframe incorrect ) (#55863) Backport PR #55227: BUG: Interchange object data buffer has the wrong dtype / from_dataframe incorrect Co-authored-by: Marco Edward Gorelli Co-authored-by: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> --- pandas/core/interchange/from_dataframe.py | 16 ++++++++-------- pandas/tests/interchange/test_impl.py | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index 214fbf9f36435..d45ae37890ba7 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -266,10 +266,9 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]: assert buffers["offsets"], "String buffers must contain offsets" # Retrieve the data buffer containing the UTF-8 code units - data_buff, protocol_data_dtype = buffers["data"] + data_buff, _ = buffers["data"] # We're going to reinterpret the buffer as uint8, so make sure we can do it safely - assert protocol_data_dtype[1] == 8 - assert protocol_data_dtype[2] in ( + assert col.dtype[2] in ( ArrowCTypes.STRING, ArrowCTypes.LARGE_STRING, ) # format_str == utf-8 @@ -377,15 +376,16 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any """ buffers = col.get_buffers() - _, _, format_str, _ = col.dtype - dbuf, dtype = buffers["data"] + _, col_bit_width, format_str, _ = col.dtype + dbuf, _ = buffers["data"] # Consider dtype being `uint` to get number of units passed since the 01.01.1970 + data = buffer_to_ndarray( dbuf, ( - DtypeKind.UINT, - dtype[1], - getattr(ArrowCTypes, f"UINT{dtype[1]}"), + DtypeKind.INT, + col_bit_width, + getattr(ArrowCTypes, f"INT{col_bit_width}"), Endianness.NATIVE, ), offset=col.offset, diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 61d3edcbb2964..97a388569e261 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -18,6 +18,7 @@ DtypeKind, ) from pandas.core.interchange.from_dataframe import from_dataframe +from pandas.core.interchange.utils import ArrowCTypes @pytest.fixture @@ -340,3 +341,24 @@ def test_interchange_from_non_pandas_tz_aware(request): dtype="datetime64[us, Asia/Kathmandu]", ) tm.assert_frame_equal(expected, result) + + +def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None: + # https://github.com/pandas-dev/pandas/issues/54781 + df = pd.DataFrame({"a": ["foo", "bar"]}).__dataframe__() + interchange = df.__dataframe__() + column = interchange.get_column_by_name("a") + buffers = column.get_buffers() + buffers_data = buffers["data"] + buffer_dtype = buffers_data[1] + buffer_dtype = ( + DtypeKind.UINT, + 8, + ArrowCTypes.UINT8, + buffer_dtype[3], + ) + buffers["data"] = (buffers_data[0], buffer_dtype) + column.get_buffers = lambda: buffers + interchange.get_column_by_name = lambda _: column + monkeypatch.setattr(df, "__dataframe__", lambda allow_copy: interchange) + pd.api.interchange.from_dataframe(df) From de8af3c3f4fe9552769b10e458678ff12937bcd0 Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Thu, 9 Nov 2023 18:02:19 +0100 Subject: [PATCH 10/15] Backport PR #55427 on branch 2.1.x (DOC: Remove outdated docs about NumPy's broadcasting) (#55896) Backport PR #55427: DOC: Remove outdated docs about NumPy's broadcasting Co-authored-by: cobalt <61329810+RedGuy12@users.noreply.github.com> --- doc/source/user_guide/basics.rst | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 2e299da5e5794..d3c9d83b943ce 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -408,20 +408,6 @@ raise a ValueError: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo']) -Note that this is different from the NumPy behavior where a comparison can -be broadcast: - -.. ipython:: python - - np.array([1, 2, 3]) == np.array([2]) - -or it can return False if broadcasting can not be done: - -.. ipython:: python - :okwarning: - - np.array([1, 2, 3]) == np.array([1, 2]) - Combining overlapping data sets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 41586668d290d83910c9adcbade49a911025016b Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Thu, 9 Nov 2023 21:23:57 +0100 Subject: [PATCH 11/15] Backport PR #55764 on branch 2.1.x (REGR: fix return class in _constructor_from_mgr for simple subclasses) (#55892) Backport PR #55764: REGR: fix return class in _constructor_from_mgr for simple subclasses Co-authored-by: Isaac Virshup --- doc/source/whatsnew/v2.1.3.rst | 2 +- pandas/core/frame.py | 2 +- pandas/core/series.py | 2 +- pandas/tests/frame/test_subclass.py | 41 +++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.3.rst b/doc/source/whatsnew/v2.1.3.rst index 3b1cd1c152baa..264cd440907fd 100644 --- a/doc/source/whatsnew/v2.1.3.rst +++ b/doc/source/whatsnew/v2.1.3.rst @@ -13,7 +13,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed infinite recursion from operations that return a new object on some DataFrame subclasses (:issue:`55763`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c109070ce461d..605cf49856e16 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -641,7 +641,7 @@ def _constructor(self) -> Callable[..., DataFrame]: def _constructor_from_mgr(self, mgr, axes): if self._constructor is DataFrame: # we are pandas.DataFrame (or a subclass that doesn't override _constructor) - return self._from_mgr(mgr, axes=axes) + return DataFrame._from_mgr(mgr, axes=axes) else: assert axes is mgr.axes return self._constructor(mgr) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8ad049e173507..7b22d89bfe22d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -581,7 +581,7 @@ def _constructor(self) -> Callable[..., Series]: def _constructor_from_mgr(self, mgr, axes): if self._constructor is Series: # we are pandas.Series (or a subclass that doesn't override _constructor) - ser = self._from_mgr(mgr, axes=axes) + ser = Series._from_mgr(mgr, axes=axes) ser._name = None # caller is responsible for setting real name return ser else: diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 8e657f197ca1e..ef78ae62cb4d6 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -771,3 +771,44 @@ def test_constructor_with_metadata(): ) subset = df[["A", "B"]] assert isinstance(subset, MySubclassWithMetadata) + + +class SimpleDataFrameSubClass(DataFrame): + """A subclass of DataFrame that does not define a constructor.""" + + +class SimpleSeriesSubClass(Series): + """A subclass of Series that does not define a constructor.""" + + +class TestSubclassWithoutConstructor: + def test_copy_df(self): + expected = DataFrame({"a": [1, 2, 3]}) + result = SimpleDataFrameSubClass(expected).copy() + + assert ( + type(result) is DataFrame + ) # assert_frame_equal only checks isinstance(lhs, type(rhs)) + tm.assert_frame_equal(result, expected) + + def test_copy_series(self): + expected = Series([1, 2, 3]) + result = SimpleSeriesSubClass(expected).copy() + + tm.assert_series_equal(result, expected) + + def test_series_to_frame(self): + orig = Series([1, 2, 3]) + expected = orig.to_frame() + result = SimpleSeriesSubClass(orig).to_frame() + + assert ( + type(result) is DataFrame + ) # assert_frame_equal only checks isinstance(lhs, type(rhs)) + tm.assert_frame_equal(result, expected) + + def test_groupby(self): + df = SimpleDataFrameSubClass(DataFrame({"a": [1, 2, 3]})) + + for _, v in df.groupby("a"): + assert type(v) is DataFrame From 1691a51ce7d6028ec48ef4f41709f8edce346ab9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Nov 2023 00:24:32 +0100 Subject: [PATCH 12/15] Backport PR #55894 on 2.1.x: Parquet/Feather IO: disable PyExtensionType autoload (#55900) Parquet/Feather IO: disable PyExtensionType autoload (#55894) * Parquet/Feather IO: disable PyExtensionType autoload * don't install hotfix for pyarrow >= 14.0.1 * move patching to extension type definitions * expand error message * fix compat for pyarrow not installed * add whatsnew (cherry picked from commit 851fea0ea38985cd7d5e0a3b7a7a7539b5883307) --- doc/source/whatsnew/v2.1.3.rst | 3 +- pandas/compat/__init__.py | 2 + pandas/compat/pyarrow.py | 2 + pandas/core/arrays/arrow/extension_types.py | 60 +++++++++++++++++++++ pandas/io/feather_format.py | 3 ++ 5 files changed, 69 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.3.rst b/doc/source/whatsnew/v2.1.3.rst index 264cd440907fd..531559c259b44 100644 --- a/doc/source/whatsnew/v2.1.3.rst +++ b/doc/source/whatsnew/v2.1.3.rst @@ -22,7 +22,8 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Bug in :meth:`DatetimeIndex.diff` raising ``TypeError`` (:issue:`55080`) -- +- Bug in :meth:`Index.isin` raising for Arrow backed string and ``None`` value (:issue:`55821`) +- Fix :func:`read_parquet` and :func:`read_feather` for `CVE-2023-47248 `__ (:issue:`55894`) .. --------------------------------------------------------------------------- .. _whatsnew_213.other: diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 684e9dccdc0f9..6896945344b24 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -31,6 +31,7 @@ pa_version_under11p0, pa_version_under13p0, pa_version_under14p0, + pa_version_under14p1, ) if TYPE_CHECKING: @@ -188,6 +189,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]: "pa_version_under11p0", "pa_version_under13p0", "pa_version_under14p0", + "pa_version_under14p1", "IS64", "ISMUSL", "PY310", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 12f58be109d98..96501b47e07f6 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -16,6 +16,7 @@ pa_version_under12p0 = _palv < Version("12.0.0") pa_version_under13p0 = _palv < Version("13.0.0") pa_version_under14p0 = _palv < Version("14.0.0") + pa_version_under14p1 = _palv < Version("14.0.1") except ImportError: pa_version_under7p0 = True pa_version_under8p0 = True @@ -25,3 +26,4 @@ pa_version_under12p0 = True pa_version_under13p0 = True pa_version_under14p0 = True + pa_version_under14p1 = True diff --git a/pandas/core/arrays/arrow/extension_types.py b/pandas/core/arrays/arrow/extension_types.py index 3249c1c829546..36d536bf86847 100644 --- a/pandas/core/arrays/arrow/extension_types.py +++ b/pandas/core/arrays/arrow/extension_types.py @@ -5,6 +5,8 @@ import pyarrow +from pandas.compat import pa_version_under14p1 + from pandas.core.dtypes.dtypes import ( IntervalDtype, PeriodDtype, @@ -112,3 +114,61 @@ def to_pandas_dtype(self): # register the type with a dummy instance _interval_type = ArrowIntervalType(pyarrow.int64(), "left") pyarrow.register_extension_type(_interval_type) + + +_ERROR_MSG = """\ +Disallowed deserialization of 'arrow.py_extension_type': +storage_type = {storage_type} +serialized = {serialized} +pickle disassembly:\n{pickle_disassembly} + +Reading of untrusted Parquet or Feather files with a PyExtensionType column +allows arbitrary code execution. +If you trust this file, you can enable reading the extension type by one of: + +- upgrading to pyarrow >= 14.0.1, and call `pa.PyExtensionType.set_auto_load(True)` +- install pyarrow-hotfix (`pip install pyarrow-hotfix`) and disable it by running + `import pyarrow_hotfix; pyarrow_hotfix.uninstall()` + +We strongly recommend updating your Parquet/Feather files to use extension types +derived from `pyarrow.ExtensionType` instead, and register this type explicitly. +""" + + +def patch_pyarrow(): + # starting from pyarrow 14.0.1, it has its own mechanism + if not pa_version_under14p1: + return + + # if https://github.com/pitrou/pyarrow-hotfix was installed and enabled + if getattr(pyarrow, "_hotfix_installed", False): + return + + class ForbiddenExtensionType(pyarrow.ExtensionType): + def __arrow_ext_serialize__(self): + return b"" + + @classmethod + def __arrow_ext_deserialize__(cls, storage_type, serialized): + import io + import pickletools + + out = io.StringIO() + pickletools.dis(serialized, out) + raise RuntimeError( + _ERROR_MSG.format( + storage_type=storage_type, + serialized=serialized, + pickle_disassembly=out.getvalue(), + ) + ) + + pyarrow.unregister_extension_type("arrow.py_extension_type") + pyarrow.register_extension_type( + ForbiddenExtensionType(pyarrow.null(), "arrow.py_extension_type") + ) + + pyarrow._hotfix_installed = True + + +patch_pyarrow() diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index c463f6e4d2759..b018b5720d126 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -117,6 +117,9 @@ def read_feather( import_optional_dependency("pyarrow") from pyarrow import feather + # import utils to register the pyarrow extension types + import pandas.core.arrays.arrow.extension_types # pyright: ignore[reportUnusedImport] # noqa: F401,E501 + check_dtype_backend(dtype_backend) with get_handle( From 92ce245e5d84e05a2943966e328b4e6b632bc8e2 Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 10 Nov 2023 14:54:05 +0100 Subject: [PATCH 13/15] Backport PR #55907 on branch 2.1.x (DOC: Add release date for 2.1.3) (#55913) Backport PR #55907: DOC: Add release date for 2.1.3 Co-authored-by: Thomas Li <47963215+lithomas1@users.noreply.github.com> --- doc/source/whatsnew/v2.1.3.rst | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v2.1.3.rst b/doc/source/whatsnew/v2.1.3.rst index 531559c259b44..af626895a9e0e 100644 --- a/doc/source/whatsnew/v2.1.3.rst +++ b/doc/source/whatsnew/v2.1.3.rst @@ -1,6 +1,6 @@ .. _whatsnew_213: -What's new in 2.1.3 (November ??, 2023) +What's new in 2.1.3 (November 10, 2023) --------------------------------------- These are the changes in pandas 2.1.3. See :ref:`release` for a full changelog @@ -14,7 +14,6 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed infinite recursion from operations that return a new object on some DataFrame subclasses (:issue:`55763`) -- .. --------------------------------------------------------------------------- .. _whatsnew_213.bug_fixes: @@ -25,14 +24,6 @@ Bug fixes - Bug in :meth:`Index.isin` raising for Arrow backed string and ``None`` value (:issue:`55821`) - Fix :func:`read_parquet` and :func:`read_feather` for `CVE-2023-47248 `__ (:issue:`55894`) -.. --------------------------------------------------------------------------- -.. _whatsnew_213.other: - -Other -~~~~~ -- -- - .. --------------------------------------------------------------------------- .. _whatsnew_213.contributors: From d9665ca37076d4dd6c18a22e0b3a767fdc097691 Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 10 Nov 2023 16:41:43 +0100 Subject: [PATCH 14/15] Backport PR #55911 on branch 2.1.x (DOC: convert outdated example of NumPy's broadcasting to literal code block) (#55912) Backport PR #55911: DOC: convert outdated example of NumPy's broadcasting to literal code block Co-authored-by: Joris Van den Bossche --- doc/source/whatsnew/v0.17.0.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.rst b/doc/source/whatsnew/v0.17.0.rst index abbda2ffc9be2..5a668e5fabd9c 100644 --- a/doc/source/whatsnew/v0.17.0.rst +++ b/doc/source/whatsnew/v0.17.0.rst @@ -726,10 +726,10 @@ be broadcast: or it can return False if broadcasting can not be done: -.. ipython:: python - :okwarning: +.. code-block:: ipython - np.array([1, 2, 3]) == np.array([1, 2]) + In [11]: np.array([1, 2, 3]) == np.array([1, 2]) + Out[11]: False Changes to boolean comparisons vs. None ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 2a953cf80b77e4348bf50ed724f8abc0d814d9dd Mon Sep 17 00:00:00 2001 From: Pandas Development Team Date: Fri, 10 Nov 2023 18:14:03 +0100 Subject: [PATCH 15/15] RLS: 2.1.3