From bd1b261452689dee607005b35dfb336606a32cff Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Dec 2019 21:44:56 -0800 Subject: [PATCH 1/6] COMPAT: np 1.18 wants explicit dtype=object) --- pandas/core/indexes/base.py | 4 +++- pandas/core/internals/blocks.py | 3 +++ pandas/core/ops/dispatch.py | 32 +++++++++++++++++----------- pandas/core/strings.py | 14 ++++++++++-- pandas/io/formats/format.py | 5 +++++ pandas/tests/extension/test_numpy.py | 10 +++++---- pandas/tests/io/json/test_ujson.py | 5 +++-- pandas/tests/test_multilevel.py | 3 ++- pandas/tests/test_strings.py | 6 ++++-- 9 files changed, 57 insertions(+), 25 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b99e60f8c6278..5e99ed3efd9e6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -331,12 +331,14 @@ def __new__( # extension dtype elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): - data = np.asarray(data) if not (dtype is None or is_object_dtype(dtype)): # coerce to the provided dtype ea_cls = dtype.construct_array_type() data = ea_cls._from_sequence(data, dtype=dtype, copy=False) + else: + data = np.asarray(data, dtype=object) + # coerce to the object dtype data = data.astype(object) return Index(data, dtype=object, copy=copy, name=name, **kwargs) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8a543832b50fe..116effa892e81 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1764,6 +1764,9 @@ def get_values(self, dtype=None): return values def to_dense(self): + if self.dtype.kind == "O": + # See https://github.com/numpy/numpy/issues/15041 + return np.asarray(self.values, dtype=object) return np.asarray(self.values) def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): diff --git a/pandas/core/ops/dispatch.py b/pandas/core/ops/dispatch.py index c39f4d6d9698d..09662fd23d619 100644 --- a/pandas/core/ops/dispatch.py +++ b/pandas/core/ops/dispatch.py @@ -2,6 +2,7 @@ Functions for defining unary operations. """ from typing import Any, Callable, Union +import warnings import numpy as np @@ -132,19 +133,24 @@ def dispatch_to_extension_op( # The op calls will raise TypeError if the op is not defined # on the ExtensionArray - try: - res_values = op(left, right) - except NullFrequencyError: - # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError - # on add/sub of integers (or int-like). We re-raise as a TypeError. - if keep_null_freq: - # TODO: remove keep_null_freq after Timestamp+int deprecation - # GH#22535 is enforced - raise - raise TypeError( - "incompatible type for a datetime/timedelta " - "operation [{name}]".format(name=op.__name__) - ) + with warnings.catch_warnings(): + # See https://github.com/numpy/numpy/issues/15041 + warnings.filterwarnings("ignore", ".*with automatic object dtype.*") + + try: + res_values = op(left, right) + except NullFrequencyError: + # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError + # on add/sub of integers (or int-like). We re-raise as a TypeError. + if keep_null_freq: + # TODO: remove keep_null_freq after Timestamp+int deprecation + # GH#22535 is enforced + raise + raise TypeError( + "incompatible type for a datetime/timedelta " + "operation [{name}]".format(name=op.__name__) + ) + return res_values diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 137c37f938dfa..51e59835091a8 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -74,10 +74,20 @@ def cat_core(list_of_columns: List, sep: str): """ if sep == "": # no need to interleave sep if it is empty - return np.sum(list_of_columns, axis=0) + with warnings.catch_warnings(): + # See https://github.com/numpy/numpy/issues/15041 + warnings.filterwarnings("ignore", ".*with automatic object dtype.*") + out = np.sum(list_of_columns, axis=0) + return out + list_with_sep = [sep] * (2 * len(list_of_columns) - 1) list_with_sep[::2] = list_of_columns - return np.sum(list_with_sep, axis=0) + + with warnings.catch_warnings(): + # See https://github.com/numpy/numpy/issues/15041 + warnings.filterwarnings("ignore", ".*with automatic object dtype.*") + out = np.sum(list_with_sep, axis=0) + return out def cat_safe(list_of_columns: List, sep: str): diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3adf8d7bbdd11..4dbf2f03fbdf5 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1482,6 +1482,11 @@ def _format_strings(self) -> List[str]: if is_categorical_dtype(values.dtype): # Categorical is special for now, so that we can preserve tzinfo array = values._internal_get_values() + elif values.dtype.kind == "O": + # numpy>=1.18 wants object dtype passed explicitly + # Note: dtype.kind check works for json extension tests, while + # dtype == object check does not. + array = np.asarray(values, dtype=object) else: array = np.asarray(values) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 221cf0787d839..beb3fc80eccd6 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -51,7 +51,7 @@ def data_missing(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": if _np_version_under1p16: raise pytest.skip("Skipping for NumPy <1.16") - return PandasArray(np.array([np.nan, (1,)])) + return PandasArray(np.array([np.nan, (1,)], dtype=object)) return PandasArray(np.array([np.nan, 1.0])) @@ -78,7 +78,7 @@ def data_for_sorting(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": # Use an empty tuple for first element, then remove, # to disable np.array's shape inference. - return PandasArray(np.array([(), (2,), (3,), (1,)])[1:]) + return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:]) return PandasArray(np.array([1, 2, 0])) @@ -90,7 +90,7 @@ def data_missing_for_sorting(allow_in_pandas, dtype): A < B and NA missing. """ if dtype.numpy_dtype == "object": - return PandasArray(np.array([(1,), np.nan, (0,)])) + return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object)) return PandasArray(np.array([1, np.nan, 0])) @@ -106,7 +106,9 @@ def data_for_grouping(allow_in_pandas, dtype): a, b, c = (1,), (2,), (3,) else: a, b, c = np.arange(3) - return PandasArray(np.array([b, b, np.nan, np.nan, a, a, b, c])) + return PandasArray( + np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype) + ) @pytest.fixture diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 8dcc77fc2fbc1..bb150c5825650 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -761,8 +761,9 @@ def test_array_list(self): ["a", "b"], {"key": "val"}, ] - arr = np.array(arr_list) - tm.assert_numpy_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr) + arr = np.array(arr_list, dtype=object) + result = np.array(ujson.decode(ujson.encode(arr)), dtype=object) + tm.assert_numpy_array_equal(result, arr) def test_array_float(self): dtype = np.float32 diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 44829423be1bb..7f6a1c7b89e20 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -121,7 +121,8 @@ def test_append_index(self): (1.2, tz.localize(datetime.datetime(2011, 1, 2)), "B"), (1.3, tz.localize(datetime.datetime(2011, 1, 3)), "C"), ] - + expected_tuples + + expected_tuples, + dtype=object, ), None, ) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 0e2f8ee6543e1..556e05de96053 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2853,7 +2853,8 @@ def test_partition_index(self): result = values.str.partition("_", expand=False) exp = Index( np.array( - [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None] + [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None], + dtype=object, ) ) tm.assert_index_equal(result, exp) @@ -2862,7 +2863,8 @@ def test_partition_index(self): result = values.str.rpartition("_", expand=False) exp = Index( np.array( - [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None] + [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None], + dtype=object, ) ) tm.assert_index_equal(result, exp) From af573eb44fde1b682d49a9da51a5592652c16f9c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Dec 2019 11:28:33 -0800 Subject: [PATCH 2/6] suppress more --- pandas/core/arrays/numpy_.py | 7 ++++++- pandas/core/common.py | 9 ++++++++- pandas/core/dtypes/cast.py | 7 ++++++- pandas/core/dtypes/concat.py | 13 ++++++++++++- pandas/tests/dtypes/test_inference.py | 2 +- pandas/tests/extension/base/interface.py | 6 +++++- pandas/tests/io/test_html.py | 1 + 7 files changed, 39 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index e116d180e3a14..373180da128a3 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -1,5 +1,6 @@ import numbers from typing import Union +import warnings import numpy as np from numpy.lib.mixins import NDArrayOperatorsMixin @@ -147,7 +148,11 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): if isinstance(dtype, PandasDtype): dtype = dtype._dtype - result = np.asarray(scalars, dtype=dtype) + with warnings.catch_warnings(): + # See https://github.com/numpy/numpy/issues/15041 + warnings.filterwarnings("ignore", ".*with automatic object dtype.*") + result = np.asarray(scalars, dtype=dtype) + if copy and result is scalars: result = result.copy() return cls(result) diff --git a/pandas/core/common.py b/pandas/core/common.py index d62f1557952a8..0c680d316f521 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -10,6 +10,7 @@ from functools import partial import inspect from typing import Any, Iterable, Union +import warnings import numpy as np @@ -224,7 +225,13 @@ def asarray_tuplesafe(values, dtype=None): if isinstance(values, list) and dtype in [np.object_, object]: return construct_1d_object_array_from_listlike(values) - result = np.asarray(values, dtype=dtype) + if dtype is None: + with warnings.catch_warnings(): + # See https://github.com/numpy/numpy/issues/15041 + warnings.filterwarnings("ignore", ".*with automatic object dtype.*") + result = np.asarray(values) + else: + result = np.asarray(values, dtype=dtype) if issubclass(result.dtype.type, str): result = np.asarray(values, dtype=object) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index acf8b6ca4e312..b0ebe43db84e0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1,6 +1,7 @@ """ routings for casting """ from datetime import datetime, timedelta +import warnings import numpy as np @@ -1029,7 +1030,11 @@ def maybe_infer_to_datetimelike(value, convert_dates: bool = False): if not is_list_like(v): v = [v] - v = np.array(v, copy=False) + + with warnings.catch_warnings(): + # See https://github.com/numpy/numpy/issues/15041 + warnings.filterwarnings("ignore", ".*with automatic object dtype.*") + v = np.array(v, copy=False) # we only care about object dtypes if not is_object_dtype(v): diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 7b3e7d4f42121..ecd4d2aa316a5 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -1,6 +1,7 @@ """ Utility functions related to concat """ +import warnings import numpy as np @@ -134,6 +135,7 @@ def is_nonempty(x) -> bool: # coerce to object to_concat = [x.astype("object") for x in to_concat] + to_concat = [_safe_array(x) for x in to_concat] return np.concatenate(to_concat, axis=axis) @@ -172,7 +174,7 @@ def concat_categorical(to_concat, axis: int = 0): to_concat = [ x._internal_get_values() if is_categorical_dtype(x.dtype) - else np.asarray(x).ravel() + else _safe_array(x).ravel() if not is_datetime64tz_dtype(x) else np.asarray(x.astype(object)) for x in to_concat @@ -183,6 +185,15 @@ def concat_categorical(to_concat, axis: int = 0): return result +def _safe_array(x): + # FIXME: kludge + with warnings.catch_warnings(): + # See https://github.com/numpy/numpy/issues/15041 + warnings.filterwarnings("ignore", ".*with automatic object dtype.*") + arr = np.asarray(x) + return arr + + def union_categoricals( to_union, sort_categories: bool = False, ignore_order: bool = False ): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 75e86a2ee7ecc..28ed7dc282da2 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -449,7 +449,7 @@ def test_scientific_no_exponent(self): def test_convert_non_hashable(self): # GH13324 # make sure that we are handing non-hashables - arr = np.array([[10.0, 2], 1.0, "apple"]) + arr = np.array([[10.0, 2], 1.0, "apple"], dtype=object) result = lib.maybe_convert_numeric(arr, set(), False, True) tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index a29f6deeffae6..b20ef96fa42f5 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -32,7 +32,11 @@ def test_memory_usage(self, data): assert result == s.nbytes def test_array_interface(self, data): - result = np.array(data) + if hasattr(data, "dtype") and data.dtype.kind == "O": + # e.g. JSONArray + result = np.array(data, dtype=object) + else: + result = np.array(data) assert result[0] == data[0] result = np.array(data, dtype=object) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 353946a311c1a..ca4e230912ffb 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -84,6 +84,7 @@ def test_same_ordering(datapath): assert_framelist_equal(dfs_lxml, dfs_bs4) +@td.skip_if_no("bs4") @pytest.mark.parametrize( "flavor", [ From 1c6c7ffef4967d49554759182f89006ca9972a25 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Dec 2019 11:31:03 -0800 Subject: [PATCH 3/6] reenable npdev build --- ci/azure/posix.yml | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 081145f846571..a10fd402b6733 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -44,15 +44,13 @@ jobs: PATTERN: "not slow and not network" LOCALE_OVERRIDE: "zh_CN.UTF-8" - # Disabled for NumPy object-dtype warning. - # https://github.com/pandas-dev/pandas/issues/30043 - # py37_np_dev: - # ENV_FILE: ci/deps/azure-37-numpydev.yaml - # CONDA_PY: "37" - # PATTERN: "not slow and not network" - # TEST_ARGS: "-W error" - # PANDAS_TESTING_MODE: "deprecate" - # EXTRA_APT: "xsel" + py37_np_dev: + ENV_FILE: ci/deps/azure-37-numpydev.yaml + CONDA_PY: "37" + PATTERN: "not slow and not network" + TEST_ARGS: "-W error" + PANDAS_TESTING_MODE: "deprecate" + EXTRA_APT: "xsel" steps: - script: | From 1ba69b1b719a6a1f39d8221e4e4c7b7d3c97914d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 16 Dec 2019 10:46:48 -0800 Subject: [PATCH 4/6] fix merge mixup --- pandas/core/indexes/base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e0c71e335c0a7..5abd049b9564c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -338,9 +338,6 @@ def __new__( else: data = np.asarray(data, dtype=object) - else: - data = np.asarray(data, dtype=object) - # coerce to the object dtype data = data.astype(object) return Index(data, dtype=object, copy=copy, name=name, **kwargs) From 5ebff2e398d68d779810f0ea8d70e879797122ec Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 16 Dec 2019 10:49:34 -0800 Subject: [PATCH 5/6] post-merge fixups --- pandas/core/ops/dispatch.py | 1 - pandas/tests/io/test_html.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/core/ops/dispatch.py b/pandas/core/ops/dispatch.py index 18df5cb4c47e6..6a2aba4264874 100644 --- a/pandas/core/ops/dispatch.py +++ b/pandas/core/ops/dispatch.py @@ -2,7 +2,6 @@ Functions for defining unary operations. """ from typing import Any, Callable, Union -import warnings import numpy as np diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 97e8a191cafab..39cbe843d1f2b 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -84,7 +84,6 @@ def test_same_ordering(datapath): assert_framelist_equal(dfs_lxml, dfs_bs4) -@td.skip_if_no("bs4") @pytest.mark.parametrize( "flavor", [ From 3584dab49a97c210d791df7a36878286baec8f80 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 17 Dec 2019 12:03:17 -0800 Subject: [PATCH 6/6] revert kludges --- pandas/core/arrays/numpy_.py | 7 +------ pandas/core/common.py | 9 +-------- pandas/core/dtypes/cast.py | 7 +------ pandas/core/dtypes/concat.py | 13 +------------ 4 files changed, 4 insertions(+), 32 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 6a82ab376212a..deec30dfe34ff 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -1,6 +1,5 @@ import numbers from typing import Union -import warnings import numpy as np from numpy.lib.mixins import NDArrayOperatorsMixin @@ -159,11 +158,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): if isinstance(dtype, PandasDtype): dtype = dtype._dtype - with warnings.catch_warnings(): - # See https://github.com/numpy/numpy/issues/15041 - warnings.filterwarnings("ignore", ".*with automatic object dtype.*") - result = np.asarray(scalars, dtype=dtype) - + result = np.asarray(scalars, dtype=dtype) if copy and result is scalars: result = result.copy() return cls(result) diff --git a/pandas/core/common.py b/pandas/core/common.py index 0ead03ee0e98f..9017584171850 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -10,7 +10,6 @@ from functools import partial import inspect from typing import Any, Iterable, Union -import warnings import numpy as np @@ -225,13 +224,7 @@ def asarray_tuplesafe(values, dtype=None): if isinstance(values, list) and dtype in [np.object_, object]: return construct_1d_object_array_from_listlike(values) - if dtype is None: - with warnings.catch_warnings(): - # See https://github.com/numpy/numpy/issues/15041 - warnings.filterwarnings("ignore", ".*with automatic object dtype.*") - result = np.asarray(values) - else: - result = np.asarray(values, dtype=dtype) + result = np.asarray(values, dtype=dtype) if issubclass(result.dtype.type, str): result = np.asarray(values, dtype=object) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6313e1bc3c2df..b398a197a4bc0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1,7 +1,6 @@ """ routings for casting """ from datetime import datetime, timedelta -import warnings import numpy as np @@ -1047,11 +1046,7 @@ def maybe_infer_to_datetimelike(value, convert_dates: bool = False): if not is_list_like(v): v = [v] - - with warnings.catch_warnings(): - # See https://github.com/numpy/numpy/issues/15041 - warnings.filterwarnings("ignore", ".*with automatic object dtype.*") - v = np.array(v, copy=False) + v = np.array(v, copy=False) # we only care about object dtypes if not is_object_dtype(v): diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index ecd4d2aa316a5..7b3e7d4f42121 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -1,7 +1,6 @@ """ Utility functions related to concat """ -import warnings import numpy as np @@ -135,7 +134,6 @@ def is_nonempty(x) -> bool: # coerce to object to_concat = [x.astype("object") for x in to_concat] - to_concat = [_safe_array(x) for x in to_concat] return np.concatenate(to_concat, axis=axis) @@ -174,7 +172,7 @@ def concat_categorical(to_concat, axis: int = 0): to_concat = [ x._internal_get_values() if is_categorical_dtype(x.dtype) - else _safe_array(x).ravel() + else np.asarray(x).ravel() if not is_datetime64tz_dtype(x) else np.asarray(x.astype(object)) for x in to_concat @@ -185,15 +183,6 @@ def concat_categorical(to_concat, axis: int = 0): return result -def _safe_array(x): - # FIXME: kludge - with warnings.catch_warnings(): - # See https://github.com/numpy/numpy/issues/15041 - warnings.filterwarnings("ignore", ".*with automatic object dtype.*") - arr = np.asarray(x) - return arr - - def union_categoricals( to_union, sort_categories: bool = False, ignore_order: bool = False ):