8000 PERF: construct DataFrame with string array and dtype=str by topper-123 · Pull Request #36432 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

PERF: construct DataFrame with string array and dtype=str #36432

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 19, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
PERF: construct DataFrame with string array and dtype=str
  • Loading branch information
topper-123 committed Sep 19, 2020
commit 439d17ae739f3c465867f8276067b428af730473
2 changes: 1 addition & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1618,7 +1618,7 @@ def construct_1d_ndarray_preserving_na(
array(['1.0', '2.0', None], dtype=object)
"""

if dtype is not None and dtype.kind == "U":
if is_string_dtype(dtype):
subarr = lib.ensure_string_array(values, convert_na_value=False, copy=copy)
else:
subarr = np.array(values, dtype=dtype, copy=copy)
Expand Down
20 changes: 11 additions & 9 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from pandas.core.dtypes.cast import (
construct_1d_arraylike_from_scalar,
construct_1d_ndarray_preserving_na,
maybe_cast_to_datetime,
maybe_convert_platform,
maybe_infer_to_datetimelike,
Expand Down Expand Up @@ -189,15 +190,16 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
# the dtypes will be coerced to a single dtype
values = _prep_ndarray(values, copy=copy)

if dtype is not None:
if not is_dtype_equal(values.dtype, dtype):
try:
values = values.astype(dtype)
except Exception as orig:
# e.g. ValueError when trying to cast object dtype to float64
raise ValueError(
f"failed to cast to '{dtype}' (Exception was: {orig})"
) from orig
if not is_dtype_equal(values.dtype, dtype):
try:
values = construct_1d_ndarray_preserving_na(
values.ravel(), dtype=dtype, copy=False
).reshape(values.shape)
except Exception as orig:
# e.g. ValueError when trying to cast object dtype to float64
raise ValueError(
f"failed to cast to '{dtype}' (Exception was: {orig})"
) from orig
45EF
# _prep_ndarray ensures that values.ndim == 2 at this point
index, columns = _get_axes(
Expand Down
0