Change arg implied_value to implied_category

pandas-dev · jreback · Jun 30, 2022 · Jun 9, 2021 · Jun 9, 2021 · Jun 14, 2021
commit e45d3f880afea3a8aefe39cf09c2a3ae8681a4df
diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
@@ -740,16 +740,16 @@ for example ``k`` columns of a ``DataFrame`` containing 1s and 0s can derive a
 
    pd.from_dummies(df, sep="_")
 
-Dummy coded data only requires ``k - 1`` values to be included, in this case
-the ``k`` th value implied by not being assigned any of the other ``k - 1`` values
-can be passed via ``implied_value``.
+Dummy coded data only requires ``k - 1`` categories to be included, in this case
+the ``k`` th categories, implied by not being assigned any of the other ``k - 1``
+categories, can be passed via ``implied_category``.
 
 .. ipython:: python
 
    df = pd.DataFrame({"prefix_a": [0, 1, 0]})
    df
 
-   pd.from_dummies(df, sep="_", implied_value="b")
+   pd.from_dummies(df, sep="_", implied_category="b")
 
 .. _reshaping.factorize:
 

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -1100,7 +1100,7 @@ def get_empty_frame(data) -> DataFrame:
 def from_dummies(
     data: DataFrame,
     sep: None | str = None,
-    implied_value: None | Hashable | dict[str, Hashable] = None,
+    implied_category: None | Hashable | dict[str, Hashable] = None,
 ) -> DataFrame:
     """
     Create a categorical `DataFrame` from a `DataFrame` of dummy variables.
@@ -1116,10 +1116,10 @@ def from_dummies(
         character indicating the separation of the categorical names from the prefixes.
         For example, if your column names are 'prefix_A' and 'prefix_B',
         you can strip the underscore by specifying sep='_'.
-    implied_value : None, Hashable or dict of Hashables, default None
-        The implied value the dummy takes when all values are zero.
+    implied_category : None, Hashable or dict of Hashables, default None
+        The implied category the dummy takes when all values are zero.
         Can be a a single value for all variables or a dict directly mapping the
-        implied values to a prefix of a variable.
+        implied categories to a prefix of a variable.
 
     Returns
     -------
@@ -1160,7 +1160,7 @@ def from_dummies(
     ...                    "col2_a": [0, 1, 0], "col2_b": [1, 0, 0],
     ...                    "col2_c": [0, 0, 0]})
 
-    >>> pd.from_dummies(df, sep="_", implied_value={"col1": "d", "col2": "e"})
+    >>> pd.from_dummies(df, sep="_", implied_category={"col1": "d", "col2": "e"})
         col1    col2
     0    a       b
     1    b       a
@@ -1199,7 +1199,7 @@ def from_dummies(
             f"Received 'sep' of type: {type(sep).__name__}"
         )
 
-    # validate number of implied_value
+    # validate number of implied_category
     def check_len(item, name) -> None:
         if not len(item) == len(variables_slice):
             len_msg = (
@@ -1209,18 +1209,19 @@ def check_len(item, name) -> None:
             )
             raise ValueError(len_msg)
 
-    if implied_value:
-        if isinstance(implied_value, dict):
-            check_len(implied_value, "implied_value")
-        elif isinstance(implied_value, Hashable):
-            implied_value = dict(
-                zip(variables_slice, [implied_value] * len(variables_slice))
+    if implied_category:
+        if isinstance(implied_category, dict):
+            check_len(implied_category, "implied_category")
+        elif isinstance(implied_category, Hashable):
+            implied_category = dict(
+                zip(variables_slice, [implied_category] * len(variables_slice))
             )
         else:
             raise TypeError(
-                f"Expected 'implied_value' to be of type "
+                f"Expected 'implied_category' to be of type "
                 f"'None', 'Hashable', or 'dict'; "
-                f"Received 'implied_value' of type: {type(implied_value).__name__}"
+                f"Received 'implied_category' of type: "
+                f"{type(implied_category).__name__}"
             )
 
     cat_data = {}
@@ -1238,8 +1239,8 @@ def check_len(item, name) -> None:
                 f"First instance in row: {assigned.argmax()}"
             )
         elif any(assigned == 0):
-            if isinstance(implied_value, dict):
-                cats.append(implied_value[prefix])
+            if isinstance(implied_category, dict):
+                cats.append(implied_category[prefix])
             else:
                 raise ValueError(
                     f"Dummy DataFrame contains unassigned value(s); "

diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
@@ -113,23 +113,23 @@ def test_from_dummies_no_prefix_contains_unassigned():
         from_dummies(dummies)
 
 
-def test_from_dummies_no_prefix_string_cats_implied_value():
+def test_from_dummies_no_prefix_string_cats_implied_category():
     dummies = DataFrame({"a": [1, 0, 0], "b": [0, 1, 0]})
     expected = DataFrame({"": ["a", "b", "c"]})
-    result = from_dummies(dummies, implied_value="c")
+    result = from_dummies(dummies, implied_category="c")
     tm.assert_frame_equal(result, expected)
 
 
-def test_from_dummies_no_prefix_wrong_implied_value_type():
+def test_from_dummies_no_prefix_wrong_implied_category_type():
     dummies = DataFrame({"a": [1, 0, 1], "b": [0, 1, 1]})
     with pytest.raises(
         TypeError,
         match=(
-            r"Expected 'implied_value' to be of type 'None', 'Hashable', or 'dict'; "
-            r"Received 'implied_value' of type: list"
+            r"Expected 'implied_category' to be of type 'None', 'Hashable', or 'dict'; "
+            r"Received 'implied_category' of type: list"
         ),
     ):
-        from_dummies(dummies, implied_value=["c", "d"])
+        from_dummies(dummies, implied_category=["c", "d"])
 
 
 def test_from_dummies_no_prefix_multi_assignment():
@@ -225,54 +225,58 @@ def test_from_dummies_with_prefix_contains_unassigned(dummies_with_unassigned):
         from_dummies(dummies_with_unassigned, sep="_")
 
 
-def test_from_dummies_with_prefix_implied_value_str(dummies_with_unassigned):
+def test_from_dummies_with_prefix_implied_category_str(dummies_with_unassigned):
     expected = DataFrame({"col1": ["a", "b", "x"], "col2": ["x", "a", "c"]})
-    result = from_dummies(dummies_with_unassigned, sep="_", implied_value="x")
+    result = from_dummies(dummies_with_unassigned, sep="_", implied_category="x")
     tm.assert_frame_equal(result, expected)
 
 
-def test_from_dummies_with_prefix_implied_value_wrong_type(dummies_with_unassigned):
+def test_from_dummies_with_prefix_implied_category_wrong_type(dummies_with_unassigned):
     with pytest.raises(
         TypeError,
         match=(
-            r"Expected 'implied_value' to be of type 'None', 'Hashable', or 'dict'; "
-            r"Received 'implied_value' of type: list"
+            r"Expected 'implied_category' to be of type 'None', 'Hashable', or 'dict'; "
+            r"Received 'implied_category' of type: list"
         ),
     ):
-        from_dummies(dummies_with_unassigned, sep="_", implied_value=["x", "y"])
+        from_dummies(dummies_with_unassigned, sep="_", implied_category=["x", "y"])
 
 
-def test_from_dummies_with_prefix_implied_value_int_and_float(dummies_with_unassigned):
+def test_from_dummies_with_prefix_implied_category_int_and_float(
+    dummies_with_unassigned,
+):
     expected = DataFrame({"col1": ["a", "b", 2.5], "col2": [1, "a", "c"]})
     result = from_dummies(
         dummies_with_unassigned,
         sep="_",
-        implied_value={"col2": 1, "col1": 2.5},
+        implied_category={"col2": 1, "col1": 2.5},
     )
     tm.assert_frame_equal(result, expected)
 
 
-def test_from_dummies_with_prefix_implied_value_bool_and_none(dummies_with_unassigned):
+def test_from_dummies_with_prefix_implied_category_bool_and_none(
+    dummies_with_unassigned,
+):
     expected = DataFrame({"col1": ["a", "b", False], "col2": [None, "a", "c"]})
     result = from_dummies(
         dummies_with_unassigned,
         sep="_",
-        implied_value={"col2": None, "col1": False},
+        implied_category={"col2": None, "col1": False},
     )
     tm.assert_frame_equal(result, expected)
 
 
-def test_from_dummies_with_prefix_implied_value_dict_not_complete(
+def test_from_dummies_with_prefix_implied_category_dict_not_complete(
     dummies_with_unassigned,
 ):
     with pytest.raises(
         ValueError,
         match=(
-            r"Length of 'implied_value' \(1\) did not match "
+            r"Length of 'implied_category' \(1\) did not match "
             r"the length of the columns being encoded \(2\)"
         ),
     ):
-        from_dummies(dummies_with_unassigned, sep="_", implied_value={"col1": "x"})
+        from_dummies(dummies_with_unassigned, sep="_", implied_category={"col1": "x"})
 
 
 def test_from_dummies_with_prefix_contains_nan(dummies_basic):