8000 ENH: Quoting column names containing spaces with backticks to use them in query and eval. by hwalinga · Pull Request #24955 · pandas-dev/pandas · GitHub
[go: up one dir, main page]

Skip to content

ENH: Quoting column names containing spaces with backticks to use them in query and eval. #24955

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Mar 20, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add 'versionadded' and move column resolvers logic to common.py.
  • Loading branch information
hwalinga committed Feb 24, 2019
commit 212506887f955b9a9ddc8a97ba89bcd56f74187b
14 changes: 13 additions & 1 deletion pandas/core/computation/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,25 @@ def _result_type_many(*arrays_and_dtypes):
return reduce(np.result_type, arrays_and_dtypes)


def clean_column_name_with_spaces(name):
def _clean_column_name_with_spaces(name):
"""Check if name contains any spaces, if it contains any spaces
the spaces will be removed and an underscore suffix is added."""
if not isinstance(name, string_types) or " " not in name:
return name
return "_BACKTICK_QUOTED_STRING_" + name.replace(" ", "_")


def _get_column_resolvers(dataFrame):
"""Return the axis resolvers of a dataframe.

Column names with spaces are 'cleaned up' so that they can be referred to
by backtick quoting. See also :func:`_clean_spaces_backtick_quoted_names`
from :mod:`pandas.core.computation`
"""

return {_clean_column_name_with_spaces(k): v for k, v
in dataFrame.iteritems()}


class NameResolutionError(NameError):
pass
9 changes: 5 additions & 4 deletions pandas/core/computation/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pandas.core import common as com
from pandas.core.base import StringMixin
from pandas.core.computation.common import (
_BACKTICK_QUOTED_STRING, clean_column_name_with_spaces)
_BACKTICK_QUOTED_STRING, _clean_column_name_with_spaces)
from pandas.core.computation.ops import (
_LOCAL_TAG, BinOp, Constant, Div, FuncNode, Op, Term, UnaryOp,
UndefinedVariableError, _arith_ops_syms, _bool_ops_syms, _cmp_ops_syms,
Expand Down Expand Up @@ -117,8 +117,9 @@ def _clean_spaces_backtick_quoted_names(tok):

Backtick quoted string are indicated by a certain tokval value. If a string
is a backtick quoted token it will processed by
:func:`clean_column_name_with_spaces` so that the parser can find this
string when the query is executed. See also :meth:`DataFrame.eval`.
:func:`_clean_column_name_with_spaces` so that the parser can find this
string when the query is executed. See also :func:`_get_column_resolvers`
used in :meth:`DataFrame.eval`.

Parameters
----------
Expand All @@ -132,7 +133,7 @@ def _clean_spaces_backtick_quoted_names(tok):
"""
toknum, tokval = tok
if toknum == _BACKTICK_QUOTED_STRING:
return tokenize.NAME, clean_column_name_with_spaces(tokval)
return tokenize.NAME, _clean_column_name_with_spaces(tokval)
return toknum, tokval


Expand Down
17 changes: 9 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
PY36, raise_with_traceback,
string_and_binary_types)
from pandas.compat.numpy import function as nv
from pandas.core.computation.common import clean_column_name_with_spaces
from pandas.core.dtypes.cast import (
maybe_upcast,
cast_scalar_to_array,
Expand Down Expand Up @@ -2966,8 +2965,13 @@ def query(self, expr, inplace=False, **kwargs):
expr : str
The query string to evaluate. You can refer to variables
in the environment by prefixing them with an '@' character like
``@a + b``. You can refer to column names with spaces by quoting
``@a + b``.

.. versionadded:: 0.25.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add an example in the Examples section as well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, but don't know what this means:

1 Warnings found:
No extended summary found
Docstring for "pandas.DataFrame.query" correct. :)


You can refer to column names that contain spaces by surrounding
them in backticks like ```a a` + b``.

inplace : bool
Whether the query should modify the data in place or return
a modified copy.
Expand Down Expand Up @@ -3160,13 +3164,10 @@ def eval(self, expr, inplace=False, **kwargs):
resolvers = kwargs.pop('resolvers', None)
kwargs['level'] = kwargs.pop('level', 0) + 1
if resolvers is None:
from pandas.core.computation.common import _get_column_resolvers

index_resolvers = self._get_index_resolvers()
# column names with spaces are altered so that they can be referred
# to by backtick quoting.
# Also see _clean_spaces_backtick_quoted_names from
# pandas/core/computation/expr.py
column_resolvers = {clean_column_name_with_spaces(k): v
for k, v in self.iteritems()}
column_resolvers = _get_column_resolvers(self)
resolvers = column_resolvers, index_resolvers
if 'target' not in kwargs:
kwargs['target'] = self
Expand Down
0