More clarity in comments; Moved column resolver to class; Use uuid

pandas-dev · jreback · Mar 20, 2019 · Jan 26, 2019 · Jan 26, 2019 · Feb 15, 2019
commit 63c25bfe0f0dff747b4975428930dd722f80268c
diff --git a/pandas/core/computation/common.py b/pandas/core/computation/common.py
@@ -1,3 +1,5 @@
+import uuid
+
 import numpy as np
 
 from pandas.compat import reduce, string_types
@@ -25,24 +27,15 @@ def _result_type_many(*arrays_and_dtypes):
         return reduce(np.result_type, arrays_and_dtypes)
 
 
-def _clean_column_name_with_spaces(name):
+def _remove_spaces_column_name(name):
     """Check if name contains any spaces, if it contains any spaces
     the spaces will be removed and an underscore suffix is added."""
     if not isinstance(name, string_types) or " " not in name:
         return name
-    return "_BACKTICK_QUOTED_STRING_" + name.replace(" ", "_")
-
-
-def _get_column_resolvers(dataFrame):
-    """Return the axis resolvers of a dataframe.
-
-    Column names with spaces are 'cleaned up' so that they can be referred to
-    by backtick quoting. See also :func:`_clean_spaces_backtick_quoted_names`
-    from :mod:`pandas.core.computation`
-    """
 
-    return {_clean_column_name_with_spaces(k): v for k, v
-            in dataFrame.iteritems()}
+    # uuid3 will provide a unique string that can be independently reproduced.
+    return name.replace(" ", "_") + "_" + \
+        str(uuid.uuid3(uuid.NAMESPACE_DNS, name)).replace("-", "")
 
 
 class NameResolutionError(NameError):

diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
@@ -16,7 +16,7 @@
 from pandas.core import common as com
 from pandas.core.base import StringMixin
 from pandas.core.computation.common import (
-    _BACKTICK_QUOTED_STRING, _clean_column_name_with_spaces)
+    _BACKTICK_QUOTED_STRING, _remove_spaces_column_name)
 from pandas.core.computation.ops import (
     _LOCAL_TAG, BinOp, Constant, Div, FuncNode, Op, Term, UnaryOp,
     UndefinedVariableError, _arith_ops_syms, _bool_ops_syms, _cmp_ops_syms,
@@ -36,6 +36,10 @@ def tokenize_string(source):
     """
     line_reader = StringIO(source).readline
     token_generator = tokenize.generate_tokens(line_reader)
+
+    # Loop over all tokens till a backtick (`) is found.
+    # Then, take all tokens till the next backtick to form a backtick quoted
+    # string.
     for toknum, tokval, _, _, _ in token_generator:
         if tokval == '`':
             tokval = " ".join(it.takewhile(
@@ -117,9 +121,9 @@ def _clean_spaces_backtick_quoted_names(tok):
 
     Backtick quoted string are indicated by a certain tokval value. If a string
     is a backtick quoted token it will processed by
-    :func:`_clean_column_name_with_spaces` so that the parser can find this
-    string when the query is executed. See also :func:`_get_column_resolvers`
-    used in :meth:`DataFrame.eval`.
+    :func:`_remove_spaces_column_name` so that the parser can find this
+    string when the query is executed.
+    See also :meth:`NDFrame._get_space_character_free_column_resolver`.
 
     Parameters
     ----------
@@ -133,7 +137,7 @@ def _clean_spaces_backtick_quoted_names(tok):
     """
     toknum, tokval = tok
     if toknum == _BACKTICK_QUOTED_STRING:
-        return tokenize.NAME, _clean_column_name_with_spaces(tokval)
+        return tokenize.NAME, _remove_spaces_column_name(tokval)
     return toknum, tokval
 
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2971,7 +2971,10 @@ def query(self, expr, inplace=False, **kwargs):
             .. versionadded:: 0.25.0
 
             You can refer to column names that contain spaces by surrounding
-            them in backticks like ```a a` + b``.
+            them in backticks.
+
+            For example, if one of your columns is called ``a a`` and you want
+            to sum it with ``b``, your query should be ```a a` + b``.
 
         inplace : bool
             Whether the query should modify the data in place or return
@@ -3165,10 +3168,9 @@ def eval(self, expr, inplace=False, **kwargs):
         resolvers = kwargs.pop('resolvers', None)
         kwargs['level'] = kwargs.pop('level', 0) + 1
         if resolvers is None:
-            from pandas.core.computation.common import _get_column_resolvers
-
             index_resolvers = self._get_index_resolvers()
-            column_resolvers = _get_column_resolvers(self)
+            column_resolvers = \
+                self._get_space_character_free_column_resolvers()
             resolvers = column_resolvers, index_resolvers
         if 'target' not in kwargs:
             kwargs['target'] = self

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -38,6 +38,7 @@
 import pandas.core.algorithms as algos
 from pandas.core.base import PandasObject, SelectionMixin
 import pandas.core.common as com
+from pandas.core.computation.common import _remove_spaces_column_name
 from pandas.core.index import (
     Index, InvalidIndexError, MultiIndex, RangeIndex, ensure_index)
 from pandas.core.indexes.datetimes import DatetimeIndex
@@ -423,6 +424,17 @@ def _get_index_resolvers(self):
             d.update(self._get_axis_resolvers(axis_name))
         return d
 
+    def _get_space_character_free_column_resolvers(self):
+        """Return the space character free column resolvers of a dataframe.
+
+        Column names with spaces are 'cleaned up' so that they can be referred
+        to by backtick quoting.
+        Used in :meth:`DataFrame.eval`.
+        """
+
+        return {_remove_spaces_column_name(k): v for k, v
+                in self.iteritems()}
+
     @property
     def _info_axis(self):
         return getattr(self, self._info_axis_name)