8000 Add support for returning a data buffer for string dtypes · iskode/dataframe-api@a982987 · GitHub
[go: up one dir, main page]

Skip to content

Commit a982987

Browse files
committed
Add support for returning a data buffer for string dtypes
1 parent 040f928 commit a982987

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

protocol/pandas_implementation.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,8 +470,16 @@ def get_data_buffer(self) -> Tuple[_PandasBuffer, Any]: # Any is for self.dtype
470470
buffer = _PandasBuffer(codes)
471471
dtype = self._dtype_from_pandasdtype(codes.dtype)
472472
elif self.dtype[0] == _k.STRING:
473-
buffer = _PandasBuffer(self._col.to_numpy())
474-
dtype = (_k.STRING, 8, '|U', '=')
473+
# Marshal the strings from a NumPy object array into a byte array
474+
b = bytearray()
475+
for v in self._col:
476+
b.extend(v.encode(encoding="utf-8"))
477+
478+
# Convert the byte array to a Pandas "buffer" using a NumPy array as the backing store
479+
buffer = _PandasBuffer(np.frombuffer(b, dtype="uint8"))
480+
481+
# Define the dtype for the returned buffer
482+
dtype = (_k.STRING, 8, "=U1", "=")
475483
else:
476484
raise NotImplementedError(f"Data type {self._col.dtype} not handled yet")
477485

0 commit comments

Comments
 (0)
0