8000 feat: Implement item() for Series and Index by tswast · Pull Request #1792 · googleapis/python-bigquery-dataframes · GitHub
[go: up one dir, main page]

Skip to content

feat: Implement item() for Series and Index #1792

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bigframes/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,10 @@ def to_numpy(self, dtype=None, *, allow_large_results=None, **kwargs) -> np.ndar
def __len__(self):
return self.shape[0]

def item(self):
# Docstring is in third_party/bigframes_vendored/pandas/core/indexes/base.py
return self.to_series().peek(2).item()


def _should_create_datetime_index(block: blocks.Block) -> bool:
if len(block.index.dtypes) != 1:
Expand Down
4 changes: 4 additions & 0 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,10 @@ def peek(
as_series.name = self.name
return as_series

def item(self):
# Docstring is in third_party/bigframes_vendored/pandas/core/series.py
return self.peek(2).item()

def nlargest(self, n: int = 5, keep: str = "first") -> Series:
if keep not in ("first", "last", "all"):
raise ValueError("'keep must be one of 'first', 'last', or 'all'")
Expand Down
41 changes: 41 additions & 0 deletions tests/system/small/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re

import numpy
import pandas as pd
import pytest
Expand Down Expand Up @@ -458,3 +460,42 @@ def test_multiindex_repr_includes_all_names(session):
)
index = session.read_pandas(df).set_index(["A", "B"]).index
assert "names=['A', 'B']" in repr(index)


def test_index_item(session):
# Test with a single item
bf_idx_single = bpd.Index([42], session=session)
pd_idx_single = pd.Index([42])
assert bf_idx_single.item() == pd_idx_single.item()


def test_index_item_with_multiple(session):
# Test with multiple items
bf_idx_multiple = bpd.Index([1, 2, 3], session=session)
pd_idx_multiple = pd.Index([1, 2, 3])

try:
pd_idx_multiple.item()
except ValueError as e:
expected_message = str(e)
else:
raise AssertionError("Expected ValueError from pandas, but didn't get one")

with pytest.raises(ValueError, match=re.escape(expected_message)):
bf_idx_multiple.item()


def test_index_item_with_empty(session):
# Test with an empty Index
bf_idx_empty = bpd.Index([], dtype="Int64", session=session)
pd_idx_empty: pd.Index = pd.Index([], dtype="Int64")

try:
pd_idx_empty.item()
except ValueError as e:
expected_message = str(e)
else:
raise AssertionError("Expected ValueError from pandas, but didn't get one")

with pytest.raises(ValueError, match=re.escape(expected_message)):
bf_idx_empty.item()
39 changes: 39 additions & 0 deletions tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4642,3 +4642,42 @@ def test_series_to_pandas_dry_run(scalars_df_index):

assert isinstance(result, pd.Series)
assert len(result) > 0


def test_series_item(session):
# Test with a single item
bf_s_single = bigframes.pandas.Series([42], session=session)
pd_s_single = pd.Series([42])
assert bf_s_single.item() == pd_s_single.item()


def test_series_item_with_multiple(session):
# Test with multiple items
bf_s_multiple = bigframes.pandas.Series([1, 2, 3], session=session)
pd_s_multiple = pd.Series([1, 2, 3])

try:
pd_s_multiple.item()
except ValueError as e:
expected_message = str(e)
else:
raise AssertionError("Expected ValueError from pandas, but didn't get one")

with pytest.raises(ValueError, match=re.escape(expected_message)):
bf_s_multiple.item()


def test_series_item_with_empty(session):
# Test with an empty Series
bf_s_empty = bigframes.pandas.Series([], dtype="Int64", session=session)
pd_s_empty = pd.Series([], dtype="Int64")

try:
pd_s_empty.item()
except ValueError as e:
expected_message = str(e)
else:
raise AssertionError("Expected ValueError from pandas, but didn't get one")

with pytest.raises(ValueError, match=re.escape(expected_message)):
bf_s_empty.item()
19 changes: 19 additions & 0 deletions third_party/bigframes_vendored/pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1087,6 +1087,25 @@ def unique(self, level: Hashable | int | None = None):
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def item(self, *args, **kwargs):
"""Return the first element of the underlying data as a Python scalar.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> s = bpd.Series([1], index=['a'])
>>> s.index.item()
'a'

Returns:
scalar: The first element of Index.

Raises:
ValueError: If the data is not length = 1.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def to_numpy(self, dtype, *, allow_large_results=None):
"""
A NumPy ndarray representing the values in this Series or Index.
Expand Down
20 changes: 20 additions & 0 deletions third_party/bigframes_vendored/pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4933,6 +4933,26 @@ def kurt(self):
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def item(self: Series, *args, **kwargs):
"""Return the first element of the underlying data as a Python scalar.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> s = bpd.Series([1])
>>> s.item()
np.int64(1)

Returns:
scalar: The first element of Series.

Raises:
ValueError: If the data is not length = 1.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def items(self):
"""
Lazily iterate over (index, value) tuples.
Expand Down
0