10000 Add metadata attribute to DataFrame and Column · iskode/dataframe-api@1436aef · GitHub
[go: up one dir, main page]

Skip to content

Commit 1436aef

Browse files
committed
Add metadata attribute to DataFrame and Column
1 parent 1f0286b commit 1436aef

File tree

2 files changed

+46
-1
lines changed

2 files changed

+46
-1
lines changed

protocol/dataframe_protocol.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,13 @@ def null_count(self) -> Optional[int]:
285285
"""
286286
pass
287287

288+
@property
289+
def metadata(self) -> Dict[str, Any]:
290+
"""
291+
Store the metadata specific to the column.
292+
"""
293+
pass
294+
288295
def num_chunks(self) -> int:
289296
"""
290297
Return the number of chunks the column consists of.
@@ -350,6 +357,13 @@ def __dataframe__(self, nan_as_null : bool = False) -> dict:
350357
"version": 0 # Version number of the protocol
351358
}
352359

360+
@property
361+
def metadata(self) -> Dict[str, Any]:
362+
"""
363+
Store the metadata specific to the DataFrame
364+
"""
365+
pass
366+
353367
def num_columns(self) -> int:
354368
"""
355369
Return the number of columns in the DataFrame

protocol/pandas_implementation.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,13 @@ def null_count(self) -> int:
426426
"""
427427
return self._col.isna().sum()
428428

429+
@property
430+
def metadata(self) -> Dict[str, Any]:
431+
"""
432+
Store specific metadata of the column.
433+
"""
434+
return {"num_chunks": self.num_chunks()}
435+
429436
def num_chunks(self) -> int:
430437
"""
431438
Return the number of chunks the column consists of.
@@ -495,6 +502,11 @@ def __init__(self, df : pd.DataFrame, nan_as_null : bool = False) -> None:
495502
# dtypes is added, this value should be propagated to columns.
496503
self._nan_as_null = nan_as_null
497504

505+
@property
506+
def metadata(self):
507+
return {"num_chunks": self.num_chunks(),
508+
"num_columns": self.num_columns()}
509+
498510
def num_columns(self) -> int:
499511
return len(self._df.columns)
500512

@@ -578,9 +590,28 @@ def test_categorical_dtype():
578590
tm.assert_frame_equal(df, df2)
579591

580592

593+
def test_metadata():
594+
df = pd.DataFrame(data=dict(a=[1, 2, 3], b=[4, 5, 6], c=[7, 8, 9]))
595+
596+
# Check the metadata from the dataframe
597+
df_metadata = df.__dataframe__().metadata
598+
excpected = {"num_chunks": 1, "num_columns": 3}
599+
for key in df_metadata:
600+
assert df_metadata[key] == excpected[key]
601+
602+
# Check the metadata from the column
603+
col_metadata = df.__dataframe__().get_column(0).metadata
604+
expected = {"num_chunks": 1}
605+
for key in col_metadata:
606+
assert col_metadata[key] == excpected[key]
607+
608+
df2 = from_dataframe(df)
609+
tm.assert_frame_equal(df, df2)
610+
611+
581612
if __name__ == '__main__':
582613
test_categorical_dtype()
583614
test_float_only()
584615
test_mixed_intfloat()
585616
test_noncontiguous_columns()
586-
617+
test_metadata()

0 commit comments

Comments
 (0)
0