8000 ENH: add BooleanArray extension array by jorisvandenbossche · Pull Request #29555 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

ENH: add BooleanArray extension array #29555

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Nov 25, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
640dac9
ENH: add BooleanArray extension array
jorisvandenbossche Nov 11, 2019
b9597bb
enable arithmetic ops + ufuncs
jorisvandenbossche Nov 12, 2019
fa77b7a
switch back to object dtype for __array__ + astype tests
jorisvandenbossche Nov 12, 2019
29415a9
temp
jorisvandenbossche Nov 12, 2019
c4a53f2
Merge remote-tracking branch 'upstream/master' into boolean-EA
jorisvandenbossche Nov 14, 2019
b1182bc
updates for feedback + add BooleanArray docstring
jorisvandenbossche Nov 15, 2019
94c5a90
Merge remote-tracking branch 'upstream/master' into boolean-EA
jorisvandenbossche Nov 18, 2019
1861602
try fix test for old numpy
jorisvandenbossche Nov 18, 2019
ad6c477
fix in place modification of mask / follow numpy for division
jorisvandenbossche Nov 18, 2019
67bf21a
string -> boolean copy paste errors
jorisvandenbossche Nov 18, 2019
f153fb2
add basic docs
jorisvandenbossche Nov 18, 2019
e24c097
empty test
jorisvandenbossche Nov 18, 2019
f0d0c6e
fix BooleanDtype construction + doc lint
jorisvandenbossche Nov 19, 2019
a3e1e93
Merge remote-tracking branch 'upstream/master' into boolean-EA
jorisvandenbossche Nov 20, 2019
1717583
add extra tests for constructors + check dimensionality
jorisvandenbossche Nov 20, 2019
5ce67e2
validate values when converting to boolean array
jorisvandenbossche Nov 20, 2019
8c0abe6
various updates
jorisvandenbossche Nov 20, 2019
031a113
fix + test return types of reducers
jorisvandenbossche Nov 20, 2019
90558d6
fix base reduction tests
jorisvandenbossche Nov 20, 2019
af82754
Merge remote-tracking branch 'upstream/master' into boolean-EA
jorisvandenbossche Nov 25, 2019
0eb3ca2
small edits
jorisvandenbossche Nov 25, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
validate values when converting to boolean array
  • Loading branch information
jorisvandenbossche committed Nov 20, 2019
commit 5ce67e2626b238d2c96becf4663618aec51891d1
16 changes: 16 additions & 0 deletions pandas/core/arrays/boolean.py
8000
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,27 @@ def coerce_to_array(values, mask=None, copy=False):
if copy:
values = values.copy()
else:
# TODO conversion from integer/float ndarray can be done more efficiently
# (avoid roundtrip through object)
values_object = np.asarray(values, dtype=object)

inferred_dtype = lib.infer_dtype(values_object, skipna=True)
integer_like = ("floating", "integer", "mixed-integer-float")
if inferred_dtype not in ("boolean", "empty") + integer_like:
raise TypeError("Need to pass bool-like values")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tests hit here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tests hit here?

Yes, there is a test that passes all kinds of non-boolean-like values

In general, I ran locally pytest with coverage, and there is 97% coverage for this file. The main non-covered things are some parts of the ufunc related code, and some length mismatch errors in the ops code.


mask_values = isna(values_object)
values = np.zeros(len(values), dtype=bool)
values[~mask_values] = values_object[~mask_values].astype(bool)

# if the values were integer-like, validate it were actually 0/1's
if inferred_dtype in integer_like:
if not np.all(
values[~mask_values].astype(float)
== values_object[~mask_values].astype(float)
):
raise TypeError("Need to pass bool-like values")

if mask is None and mask_values is None:
mask = np.zeros(len(values), dtype=bool)
elif mask is None:
Expand Down
89 changes: 66 additions & 23 deletions pandas/tests/arrays/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,38 @@ def test_boolean_array_constructor_copy():
assert result._mask is not mask


def test_to_boolean_array():
expected = BooleanArray(
np.array([True, False, True]), np.array([False, False, False])
)

result = pd.array([True, False, True], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, True]), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)

# with missing values
expected = BooleanArray(
np.array([True, False, True]), np.array([False, False, True])
)

result = pd.array([True, False, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)


def test_to_boolean_array_all_none():
expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True]))

result = pd.array([None, None, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)


@pytest.mark.parametrize(
"a, b",
[
Expand All @@ -79,33 +111,44 @@ def test_to_boolean_array_none_is_nan(a, b):
tm.assert_extension_array_equal(result, expected)


# @pytest.mark.parametrize(
# "values",
# [
# ["foo", "bar"],
# ["1", "2"],
# "foo",
# [1],
# [1.0],
# pd.date_range("20130101", periods=2),
# np.array(["foo"]),
# [[1, 2], [3, 4]],
# [np.nan, {"a": 1}],
# ],
# )
# def test_to_boolean_array_error(values):
# # error in converting existing arrays to BooleanArray
# with pytest.raises(TypeError):
# pd.array(values, dtype="boolean")


def test_to_boolean_array_integer():
@pytest.mark.parametrize(
"values",
[
["foo", "bar"],
["1", "2"],
# "foo",
[1, 2],
[1.0, 2.0],
pd.date_range("20130101", periods=2),
np.array(["foo"]),
[np.nan, {"a": 1}],
],
)
def test_to_boolean_array_error(values):
# error in converting existing arrays to BooleanArray
with pytest.raises(TypeError):
pd.array(values, dtype="boolean")


def test_to_boolean_array_integer_like():
# integers of 0's and 1's
result = pd.array([1, 0, 1, 0], dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)

# with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
# pd.array([1, 2, 3], dtype="boolean")
result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
tm.assert_extension_array_equal(result, expected)

result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
tm.assert_extension_array_equal(result, expected)

# with missing values
result = pd.array([1, 0, 1, None], dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)

result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
tm.assert_extension_array_equal(result, expected)


def test_coerce_to_array():
Expand Down
0