@@ -99,7 +99,7 @@ def convert_column_to_ndarray(col : ColumnObject) -> np.ndarray:
99
99
raise NotImplementedError ("Null values represented as masks or "
100
100
"sentinel values not handled yet" )
101
101
102
- _buffer , _dtype = col .get_data_buffer ()
102
+ _buffer , _dtype = col .get_buffers ()[ "data" ]
103
103
return buffer_to_ndarray (_buffer , _dtype )
104
104
105
105
@@ -143,7 +143,7 @@ def convert_categorical_column(col : ColumnObject) -> pd.Series:
143
143
# categories = col._col.values.categories.values
144
144
# codes = col._col.values.codes
145
145
categories = np .asarray (list (mapping .values ()))
146
- codes_buffer , codes_dtype = col .get_data_buffer ()
146
+ codes_buffer , codes_dtype = col .get_buffers ()[ "data" ]
147
147
codes = buffer_to_ndarray (codes_buffer , codes_dtype )
148
148
values = categories [codes ]
149
149
@@ -166,14 +166,17 @@ def convert_string_column(col : ColumnObject) -> np.ndarray:
166
166
"""
167
167
Convert a string column to a NumPy array.
168
168
"""
169
+ # Retrieve the data buffers:
170
+ buffers = col .get_buffers ()
171
+
169
172
# Retrieve the data buffer containing the UTF-8 code units
170
- dbuffer , bdtype = col . get_data_buffer ()
173
+ dbuffer , bdtype = buffers [ "data" ]
171
174
172
175
# Retrieve the offsets buffer containing the index offsets demarcating the beginning and end of each string
173
- obuffer , odtype = col . get_offsets_buffer ()
176
+ obuffer , odtype = buffers [ "offsets" ]
174
177
175
178
# Retrieve the mask buffer indicating the presence of missing values:
176
- mbuffer , mdtype = col . get_validity_buffer ()
179
+ mbuffer , mdtype = buffers [ "validity" ]
177
180
178
181
# Retrieve the missing value encoding:
179
182
null_value = col .describe_null [1 ]
@@ -500,7 +503,42 @@ def get_chunks(self, n_chunks : Optional[int] = None) -> Iterable['_PandasColumn
500
503
"""
501
504
return (self ,)
502
505
503
- def get_data_buffer (self ) -> Tuple [_PandasBuffer , Any ]: # Any is for self.dtype tuple
506
+ def get_buffers (self ) -> Dict [str , Any ]:
507
+ """
508
+ Return a dictionary containing the underlying buffers.
509
+
510
+ The returned dictionary has the following contents:
511
+
512
+ - "data": a two-element tuple whose first element is a tuple
513
+ containing the buffer containing the data and whose second
514
+ element is the data buffer's associated dtype.
515
+ - "validity": a two-element tuple whose first element is a tuple
516
+ containing the buffer containing mask values
517
+ indicating missing data and whose second element is
518
+ the mask value buffer's associated dtype. None if the
519
+ null representation is not a bit or byte mask.
520
+ - "offsets": a two-element tuple whose first element is a tuple
521
+ containing the buffer containing the offset values for
522
+ variable-size binary data (e.g., variable-length
523
+ strings) and whose second element is the offsets
524
+ buffer's associated dtype. None if the data buffer does
525
+ not have an associated offsets buffer.
526
+ """
527
+ buffers = {}
528
+ buffers ["data" ] = self ._get_data_buffer ()
529
+ try :
530
+ buffers ["validity" ] = self ._get_validity_buffer ()
531
+ except :
532
+ buffers ["validity" ] = None
533
+
534
+ try :
535
+ buffers ["offsets" ] = self ._get_offsets_buffer ()
536
+ except :
537
+ buffers ["offsets" ] = None
538
+
539
+ return buffers
540
+
541
+ def _get_data_buffer (self ) -> Tuple [_PandasBuffer , Any ]: # Any is for self.dtype tuple
504
542
"""
505
543
Return the buffer containing the data and the buffer's associated dtype.
506
544
"""
@@ -532,7 +570,7 @@ def get_data_buffer(self) -> Tuple[_PandasBuffer, Any]: # Any is for self.dtype
532
570
533
571
return buffer , dtype
534
572
535
- def get_validity_buffer (self ) -> Tuple [_PandasBuffer , Any ]:
573
+ def _get_validity_buffer (self ) -> Tuple [_PandasBuffer , Any ]:
536
574
"""
537
575
Return the buffer containing the mask values indicating missing data and
538
576
the buffer's associated dtype.
@@ -578,7 +616,7 @@ def get_validity_buffer(self) -> Tuple[_PandasBuffer, Any]:
578
616
579
617
raise RuntimeError (msg )
580
618
581
- def get_offsets_buffer (self ) -> Tuple [_PandasBuffer , Any ]:
619
+ def _get_offsets_buffer (self ) -> Tuple [_PandasBuffer , Any ]:
582
620
"""
583
621
Return the buffer containing the offset values for variable-size binary
584
622
data (e.g., variable-length strings) and the buffer's associated dtype.
0 commit comments