diff --git a/bigframes/core/validations.py b/bigframes/core/validations.py index 9c03ddb930..daa1252824 100644 --- a/bigframes/core/validations.py +++ b/bigframes/core/validations.py @@ -17,7 +17,7 @@ from __future__ import annotations import functools -from typing import Optional, Protocol, TYPE_CHECKING +from typing import Optional, Protocol, TYPE_CHECKING, Union import bigframes.constants import bigframes.exceptions @@ -25,6 +25,8 @@ if TYPE_CHECKING: from bigframes import Session from bigframes.core.blocks import Block + from bigframes.dataframe import DataFrame + from bigframes.operations.base import SeriesMethods class HasSession(Protocol): @@ -37,6 +39,16 @@ def _block(self) -> Block: ... +def requires_index(meth): + @functools.wraps(meth) + def guarded_meth(df: Union[DataFrame, SeriesMethods], *args, **kwargs): + df._throw_if_null_index(meth.__name__) + return meth(df, *args, **kwargs) + + guarded_meth._validations_requires_index = True # type: ignore + return guarded_meth + + def requires_ordering(suggestion: Optional[str] = None): def decorator(meth): @functools.wraps(meth) @@ -44,6 +56,7 @@ def guarded_meth(object: HasSession, *args, **kwargs): enforce_ordered(object, meth.__name__, suggestion) return meth(object, *args, **kwargs) + guarded_meth._validations_requires_ordering = True # type: ignore return guarded_meth return decorator diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index dabe85c923..a174ef0b0f 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -17,7 +17,6 @@ from __future__ import annotations import datetime -import functools import inspect import re import sys @@ -92,15 +91,6 @@ ) -def requires_index(meth): - @functools.wraps(meth) - def guarded_meth(df: DataFrame, *args, **kwargs): - df._throw_if_null_index(meth.__name__) - return meth(df, *args, **kwargs) - - return guarded_meth - - # Inherits from pandas DataFrame so that we can use the same docstrings. @log_adapter.class_logger class DataFrame(vendored_pandas_frame.DataFrame): @@ -261,7 +251,7 @@ def _sql_names( return results @property - @requires_index + @validations.requires_index def index( self, ) -> indexes.Index: @@ -277,7 +267,7 @@ def index(self, value): self.index.name = value.name if hasattr(value, "name") else None @property - @requires_index + @validations.requires_index def loc(self) -> indexers.LocDataFrameIndexer: return indexers.LocDataFrameIndexer(self) @@ -292,7 +282,7 @@ def iat(self) -> indexers.IatDataFrameIndexer: return indexers.IatDataFrameIndexer(self) @property - @requires_index + @validations.requires_index def at(self) -> indexers.AtDataFrameIndexer: return indexers.AtDataFrameIndexer(self) @@ -348,7 +338,7 @@ def _has_index(self) -> bool: def T(self) -> DataFrame: return DataFrame(self._get_block().transpose()) - @requires_index + @validations.requires_index @validations.requires_ordering() def transpose(self) -> DataFrame: return self.T @@ -417,7 +407,7 @@ def memory_usage(self, index: bool = True): column_sizes = pandas.concat([index_size, column_sizes]) return column_sizes - @requires_index + @validations.requires_index def info( self, verbose: Optional[bool] = None, @@ -1682,7 +1672,7 @@ def set_index( col_ids_strs: List[str] = [col_id for col_id in col_ids if col_id is not None] return DataFrame(self._block.set_index(col_ids_strs, append=append, drop=drop)) - @requires_index + @validations.requires_index def sort_index( self, ascending: bool = True, na_position: Literal["first", "last"] = "last" ) -> DataFrame: @@ -1884,7 +1874,7 @@ def reindex( if columns is not None: return self._reindex_columns(columns) - @requires_index + @validations.requires_index def _reindex_rows( self, index, @@ -1931,12 +1921,12 @@ def _reindex_columns(self, columns): result_df.columns = new_column_index return result_df - @requires_index + @validations.requires_index def reindex_like(self, other: DataFrame, *, validate: typing.Optional[bool] = None): return self.reindex(index=other.index, columns=other.columns, validate=validate) @validations.requires_ordering() - @requires_index + @validations.requires_index def interpolate(self, method: str = "linear") -> DataFrame: if method == "pad": return self.ffill() @@ -2231,12 +2221,12 @@ def agg( aggregate = agg aggregate.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.agg) - @requires_index + @validations.requires_index @validations.requires_ordering() def idxmin(self) -> bigframes.series.Series: return bigframes.series.Series(block_ops.idxmin(self._block)) - @requires_index + @validations.requires_index @validations.requires_ordering() def idxmax(self) -> bigframes.series.Series: return bigframes.series.Series(block_ops.idxmax(self._block)) @@ -2345,7 +2335,7 @@ def _pivot( ) return DataFrame(pivot_block) - @requires_index + @validations.requires_index @validations.requires_ordering() def pivot( self, @@ -2360,7 +2350,7 @@ def pivot( ) -> DataFrame: return self._pivot(columns=columns, index=index, values=values) - @requires_index + @validations.requires_index @validations.requires_ordering() def pivot_table( self, @@ -2460,7 +2450,7 @@ def _stack_multi(self, level: LevelsType = -1): block = block.stack(levels=len(level)) return DataFrame(block) - @requires_index + @validations.requires_index @validations.requires_ordering() def unstack(self, level: LevelsType = -1): if not utils.is_list_like(level): @@ -2711,7 +2701,7 @@ def groupby( else: raise TypeError("You have to supply one of 'by' and 'level'") - @requires_index + @validations.requires_index def _groupby_level( self, level: LevelsType, diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index f339345971..1daa1ea5ae 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -14,7 +14,6 @@ from __future__ import annotations -import functools import typing from typing import List, Sequence @@ -35,15 +34,6 @@ import bigframes.session -def requires_index(meth): - @functools.wraps(meth) - def guarded_meth(df: SeriesMethods, *args, **kwargs): - df._throw_if_null_index(meth.__name__) - return meth(df, *args, **kwargs) - - return guarded_meth - - class SeriesMethods: def __init__( self, diff --git a/bigframes/series.py b/bigframes/series.py index 7ba4858b5e..c2137bca35 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -52,7 +52,6 @@ import bigframes.operations as ops import bigframes.operations.aggregations as agg_ops import bigframes.operations.base -from bigframes.operations.base import requires_index import bigframes.operations.datetimes as dt import bigframes.operations.plotting as plotting import bigframes.operations.strings as strings @@ -88,7 +87,7 @@ def dtypes(self): return self._dtype @property - @requires_index + @validations.requires_index def loc(self) -> bigframes.core.indexers.LocSeriesIndexer: return bigframes.core.indexers.LocSeriesIndexer(self) @@ -103,7 +102,7 @@ def iat(self) -> bigframes.core.indexers.IatSeriesIndexer: return bigframes.core.indexers.IatSeriesIndexer(self) @property - @requires_index + @validations.requires_index def at(self) -> bigframes.core.indexers.AtSeriesIndexer: return bigframes.core.indexers.AtSeriesIndexer(self) @@ -142,7 +141,7 @@ def values(self) -> numpy.ndarray: return self.to_numpy() @property - @requires_index + @validations.requires_index def index(self) -> indexes.Index: return indexes.Index.from_frame(self) @@ -245,7 +244,7 @@ def rename( raise ValueError(f"Unsupported type of parameter index: {type(index)}") - @requires_index + @validations.requires_index def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], @@ -404,12 +403,12 @@ def drop( block = block.drop_columns([condition_id]) return Series(block.select_column(self._value_column)) - @requires_index + @validations.requires_index def droplevel(self, level: LevelsType, axis: int | str = 0): resolved_level_ids = self._resolve_levels(level) return Series(self._block.drop_levels(resolved_level_ids)) - @requires_index + @validations.requires_index def swaplevel(self, i: int = -2, j: int = -1): level_i = self._block.index_columns[i] level_j = self._block.index_columns[j] @@ -419,7 +418,7 @@ def swaplevel(self, i: int = -2, j: int = -1): ] return Series(self._block.reorder_levels(reordering)) - @requires_index + @validations.requires_index def reorder_levels(self, order: LevelsType, axis: int | str = 0): resolved_level_ids = self._resolve_levels(order) return Series(self._block.reorder_levels(resolved_level_ids)) @@ -609,7 +608,7 @@ def _mapping_replace(self, mapping: dict[typing.Hashable, typing.Hashable]): return Series(block.select_column(result)) @validations.requires_ordering() - @requires_index + @validations.requires_index def interpolate(self, method: str = "linear") -> Series: if method == "pad": return self.ffill() @@ -1162,7 +1161,7 @@ def argmin(self) -> int: scalars.Scalar, Series(block.select_column(row_nums)).iloc[0] ) - @requires_index + @validations.requires_index def unstack(self, level: LevelsType = -1): if isinstance(level, int) or isinstance(level, str): level = [level] @@ -1186,7 +1185,7 @@ def unstack(self, level: LevelsType = -1): ) return bigframes.dataframe.DataFrame(pivot_block) - @requires_index + @validations.requires_index def idxmax(self) -> blocks.Label: block = self._block.order_by( [ @@ -1200,7 +1199,7 @@ def idxmax(self) -> blocks.Label: block = block.slice(0, 1) return indexes.Index(block).to_pandas()[0] - @requires_index + @validations.requires_index def idxmin(self) -> blocks.Label: block = self._block.order_by( [ @@ -1314,7 +1313,7 @@ def sort_values( ) return Series(block) - @requires_index + @validations.requires_index def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series: # TODO(tbergeron): Support level parameter once multi-index introduced. if na_position not in ["first", "last"]: @@ -1377,7 +1376,7 @@ def groupby( else: raise TypeError("You have to supply one of 'by' and 'level'") - @requires_index + @validations.requires_index def _groupby_level( self, level: int | str | typing.Sequence[int] | typing.Sequence[str], @@ -1518,11 +1517,11 @@ def combine( materialized_series = result_series._cached() return materialized_series - @requires_index + @validations.requires_index def add_prefix(self, prefix: str, axis: int | str | None = None) -> Series: return Series(self._get_block().add_prefix(prefix)) - @requires_index + @validations.requires_index def add_suffix(self, suffix: str, axis: int | str | None = None) -> Series: return Series(self._get_block().add_suffix(suffix)) @@ -1574,7 +1573,7 @@ def filter( else: raise ValueError("Need to provide 'items', 'like', or 'regex'") - @requires_index + @validations.requires_index def reindex(self, index=None, *, validate: typing.Optional[bool] = None): if validate and not self.index.is_unique: raise ValueError("Original index must be unique to reindex") @@ -1603,7 +1602,7 @@ def reindex(self, index=None, *, validate: typing.Optional[bool] = None): )._block return Series(result_block) - @requires_index + @validations.requires_index def reindex_like(self, other: Series, *, validate: typing.Optional[bool] = None): return self.reindex(other.index, validate=validate) diff --git a/scripts/publish_api_coverage.py b/scripts/publish_api_coverage.py index 25fbfbf988..0292d4880d 100644 --- a/scripts/publish_api_coverage.py +++ b/scripts/publish_api_coverage.py @@ -116,7 +116,15 @@ def generate_pandas_api_coverage(): """Inspect all our pandas objects, and compare with the real pandas objects, to see which methods we implement. For each, generate a regex that can be used to check if its present in a notebook""" - header = ["api", "pattern", "kind", "is_in_bigframes", "missing_parameters"] + header = [ + "api", + "pattern", + "kind", + "is_in_bigframes", + "missing_parameters", + "requires_index", + "requires_ordering", + ] api_patterns = [] indexers = ["loc", "iloc", "iat", "ix", "at"] for name, pandas_obj, bigframes_obj in PANDAS_TARGETS: @@ -156,6 +164,13 @@ def generate_pandas_api_coverage(): token_type = "property" is_in_bigframes = hasattr(bigframes_obj, member) + requires_index = False + requires_ordering = False + + if is_in_bigframes: + attr = getattr(bigframes_obj, member) + requires_index = hasattr(attr, "_validations_requires_index") + requires_ordering = hasattr(attr, "_validations_requires_ordering") api_patterns.append( [ @@ -164,6 +179,8 @@ def generate_pandas_api_coverage(): token_type, is_in_bigframes, missing_parameters, + requires_index, + requires_ordering, ] ) @@ -287,6 +304,7 @@ def generate_api_coverage(df, api_prefix): dataframe_apis["missing_parameters"].str.len() != 0 ) & dataframe_apis["is_in_bigframes"] not_implemented = ~dataframe_apis["is_in_bigframes"] + dataframe_table = pd.DataFrame( { "API": format_api( @@ -295,12 +313,16 @@ def generate_api_coverage(df, api_prefix): api_prefix, ), "Implemented": "", + "Requires index": "", + "Requires ordering": "", "Missing parameters": dataframe_apis["missing_parameters"], } ) dataframe_table.loc[fully_implemented, "Implemented"] = "Y" dataframe_table.loc[partial_implemented, "Implemented"] = "P" dataframe_table.loc[not_implemented, "Implemented"] = "N" + dataframe_table.loc[dataframe_apis["requires_index"], "Requires index"] = "Y" + dataframe_table.loc[dataframe_apis["requires_ordering"], "Requires ordering"] = "Y" return dataframe_table