From 36416096f9fe3aa39a64facfba228825cfa09f18 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 21 Dec 2023 02:57:16 +0000 Subject: [PATCH 1/2] docs: code samples for `Series.{add, replace, unique, T, transpose}` --- .../bigframes_vendored/pandas/core/frame.py | 4 +- .../bigframes_vendored/pandas/core/series.py | 190 +++++++++++++++++- 2 files changed, 191 insertions(+), 3 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 00be9e5e9e..21fbe38227 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -4038,7 +4038,7 @@ def stack(self, level=-1): BigQuery DataFrames does not support stack operations that would combine columns of different dtypes. - **Example:** + **Examples:** >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None @@ -4077,7 +4077,7 @@ def unstack(self, level=-1): If the index is not a MultiIndex, the output will be a Series (the analogue of stack when the columns are not a MultiIndex). - **Example:** + **Examples:** >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index d054684598..2e093f80ce 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -143,13 +143,51 @@ def name(self) -> Hashable: @property def T(self) -> Series: - """Return the transpose, which is by definition self.""" + """Return the transpose, which is by definition self. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(['Ant', 'Bear', 'Cow']) + >>> s + 0 Ant + 1 Bear + 2 Cow + dtype: string + + >>> s.T + 0 Ant + 1 Bear + 2 Cow + dtype: string + + """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def transpose(self) -> Series: """ Return the transpose, which is by definition self. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(['Ant', 'Bear', 'Cow']) + >>> s + 0 Ant + 1 Bear + 2 Cow + dtype: string + + >>> s.transpose() + 0 Ant + 1 Bear + 2 Cow + dtype: string + Returns: Series: Series. """ @@ -492,6 +530,36 @@ def nunique(self) -> int: """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def unique(self) -> Series: + """ + Return unique values of Series object. + + Uniques are returned in order of appearance. Hash table-based unique, + therefore does NOT sort. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([2, 1, 3, 3], name='A') + >>> s + 0 2 + 1 1 + 2 3 + 3 3 + Name: A, dtype: Int64 + >>> s.unique() + 0 2 + 1 1 + 2 3 + Name: A, dtype: Int64 + + Returns: + Series: The unique values returned as a Series. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def mode(self) -> Series: """ Return the mode(s) of the Series. @@ -1248,6 +1316,77 @@ def replace( expressions. If this is ``True`` then `to_replace` *must* be a string. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3, 4, 5]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: Int64 + + >>> s.replace(1, 5) + 0 5 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: Int64 + + You can replace a list of values: + + >>> s.replace([1, 3, 5], -1) + 0 -1 + 1 2 + 2 -1 + 3 4 + 4 -1 + dtype: Int64 + + You can use a replacement mapping: + + >>> s.replace({1: 5, 3: 10}) + 0 5 + 1 2 + 2 10 + 3 4 + 4 5 + dtype: Int64 + + With a string Series you can use a simple string replacement or a regex + replacement: + + >>> s = bpd.Series(["Hello", "Another Hello"]) + >>> s.replace("Hello", "Hi") + 0 Hi + 1 Another Hello + dtype: string + + >>> s.replace("Hello", "Hi", regex=True) + 0 Hi + 1 Another Hi + dtype: string + + >>> s.replace("^Hello", "Hi", regex=True) + 0 Hi + 1 Another Hello + dtype: string + + >>> s.replace("Hello$", "Hi", regex=True) + 0 Hi + 1 Another Hi + dtype: string + + >>> s.replace("[Hh]e", "__", regex=True) + 0 __llo + 1 Anot__r __llo + dtype: string + Returns: Series/DataFrame: Object after replacement. @@ -1475,6 +1614,55 @@ def add(self, other) -> Series: Equivalent to ``series + other``, but with support to substitute a fill_value for missing data in either one of the inputs. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 2, 3, bpd.NA]) + >>> a + 0 1.0 + 1 2.0 + 2 3.0 + 3 + dtype: Float64 + + >>> b = bpd.Series([10, 20, 30, 40]) + >>> b + 0 10 + 1 20 + 2 30 + 3 40 + dtype: Int64 + + >>> a.add(b) + 0 11.0 + 1 22.0 + 2 33.0 + 3 + dtype: Float64 + + You can also use the mathematical operator ``+``: + + >>> a + b + 0 11.0 + 1 22.0 + 2 33.0 + 3 + dtype: Float64 + + Adding two Series with explicit indexes: + + >>> a = bpd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) + >>> b = bpd.Series([10, 20, 30, 40], index=['a', 'b', 'd', 'e']) + >>> a.add(b) + a 11 + b 22 + c + d 34 + e + dtype: Int64 + Args: other (Series, or scalar value): From be29b4bee1230c9c6fdb64a7c86773839ae1599c Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 21 Dec 2023 23:47:08 +0000 Subject: [PATCH 2/2] place Examples before Args --- .../bigframes_vendored/pandas/core/series.py | 68 +++++++++---------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 2e093f80ce..25c889ae53 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -1282,40 +1282,6 @@ def replace( This differs from updating with ``.loc`` or ``.iloc``, which require you to specify a location to update with some value. - Args: - to_replace (str, regex, list, int, float or None): - How to find the values that will be replaced. - - * numeric, str or regex: - - - numeric: numeric values equal to `to_replace` will be - replaced with `value` - - str: string exactly matching `to_replace` will be replaced - with `value` - - regex: regexs matching `to_replace` will be replaced with - `value` - - * list of str, regex, or numeric: - - - First, if `to_replace` and `value` are both lists, they - **must** be the same length. - - Second, if ``regex=True`` then all of the strings in **both** - lists will be interpreted as regexs otherwise they will match - directly. This doesn't matter much for `value` since there - are only a few possible substitution regexes you can use. - - str, regex and numeric rules apply as above. - - value (scalar, default None): - Value to replace any values matching `to_replace` with. - For a DataFrame a dict of values can be used to specify which - value to use for each column (columns not in the dict will not be - filled). Regular expressions, strings and lists or dicts of such - objects are also allowed. - regex (bool, default False): - Whether to interpret `to_replace` and/or `value` as regular - expressions. If this is ``True`` then `to_replace` *must* be a - string. - **Examples:** >>> import bigframes.pandas as bpd @@ -1387,6 +1353,40 @@ def replace( 1 Anot__r __llo dtype: string + Args: + to_replace (str, regex, list, int, float or None): + How to find the values that will be replaced. + + * numeric, str or regex: + + - numeric: numeric values equal to `to_replace` will be + replaced with `value` + - str: string exactly matching `to_replace` will be replaced + with `value` + - regex: regexs matching `to_replace` will be replaced with + `value` + + * list of str, regex, or numeric: + + - First, if `to_replace` and `value` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexs otherwise they will match + directly. This doesn't matter much for `value` since there + are only a few possible substitution regexes you can use. + - str, regex and numeric rules apply as above. + + value (scalar, default None): + Value to replace any values matching `to_replace` with. + For a DataFrame a dict of values can be used to specify which + value to use for each column (columns not in the dict will not be + filled). Regular expressions, strings and lists or dicts of such + objects are also allowed. + regex (bool, default False): + Whether to interpret `to_replace` and/or `value` as regular + expressions. If this is ``True`` then `to_replace` *must* be a + string. + Returns: Series/DataFrame: Object after replacement.