@@ -426,6 +426,13 @@ def null_count(self) -> int:
426
426
"""
427
427
return self ._col .isna ().sum ()
428
428
429
+ @property
430
+ def metadata (self ) -> Dict [str , Any ]:
431
+ """
432
+ Store specific metadata of the column.
433
+ """
434
+ return {"num_chunks" : self .num_chunks ()}
435
+
429
436
def num_chunks (self ) -> int :
430
437
"""
431
438
Return the number of chunks the column consists of.
@@ -495,6 +502,11 @@ def __init__(self, df : pd.DataFrame, nan_as_null : bool = False) -> None:
495
502
# dtypes is added, this value should be propagated to columns.
496
503
self ._nan_as_null = nan_as_null
497
504
505
+ @property
506
+ def metadata (self ):
507
+ return {"num_chunks" : self .num_chunks (),
508
+ "num_columns" : self .num_columns ()}
509
+
498
510
def num_columns (self ) -> int :
499
511
return len (self ._df.columns )
500
512
@@ -578,9 +590,28 @@ def test_categorical_dtype():
578
590
tm .assert_frame_equal (df , df2 )
579
591
580
592
593
+ def test_metadata ():
594
+ df = pd .DataFrame (data = dict (a = [1 , 2 , 3 ], b = [4 , 5 , 6 ], c = [7 , 8 , 9 ]))
595
+
596
+ # Check the metadata from the dataframe
597
+ df_metadata = df .__dataframe__ ().metadata
598
+ excpected = {"num_chunks" : 1 , "num_columns" : 3 }
599
+ for key in df_metadata :
600
+ assert df_metadata [key ] == excpected [key ]
601
+
602
+ # Check the metadata from the column
603
+ col_metadata = df .__dataframe__ ().get_column (0 ).metadata
604
+ expected = {"num_chunks" : 1 }
605
+ for key in col_metadata :
606
+ assert col_metadata [key ] == excpected [key ]
607
+
608
+ df2 = from_dataframe (df )
609
+ tm .assert_frame_equal (df , df2 )
610
+
611
+
581
612
if __name__ == '__main__' :
582
613
test_categorical_dtype ()
583
614
test_float_only ()
584
615
test_mixed_intfloat ()
585
616
test_noncontiguous_columns ()
586
-
617
+ test_metadata ()
0 commit comments