8000 Update offsets buffer accessor · iskode/dataframe-api@fd4d71b · GitHub
[go: up one dir, main page]

Skip to content

Commit fd4d71b

Browse files
committed
Update offsets buffer accessor
1 parent a982987 commit fd4d71b

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

protocol/pandas_implementation.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ def describe_null(self) -> Tuple[int, Any]:
432432
elif kind == _k.STRING:
433433
null = 1 # np.nan (object dtype)
434434
else:
435-
raise NotImplementedError(f'Data type {self.dtype} not yet supported')
435+
raise NotImplementedError(f"Data type {self.dtype} not yet supported")
436436

437437
return null, value
438438

@@ -479,7 +479,7 @@ def get_data_buffer(self) -> Tuple[_PandasBuffer, Any]: # Any is for self.dtype
479479
buffer = _PandasBuffer(np.frombuffer(b, dtype="uint8"))
480480

481481
# Define the dtype for the returned buffer
482-
dtype = (_k.STRING, 8, "=U1", "=")
482+
dtype = (_k.STRING, 8, "=U1", "=") # note: currently only support native endianness
483483
else:
484484
raise NotImplementedError(f"Data type {self._col.dtype} not handled yet")
485485

@@ -498,7 +498,7 @@ def get_mask(self) -> Tuple[_PandasBuffer, Any]:
498498
elif null == 1:
499499
msg = "This column uses NaN as null so does not have a separate mask"
500500
else:
501-
raise NotImplementedError('See self.describe_null')
501+
raise NotImplementedError("See self.describe_null")
502502

503503
raise RuntimeError(msg)
504504

@@ -521,8 +521,15 @@ def get_offsets(self) -> Tuple[_PandasBuffer, Any]:
521521
ptr += len(b)
522522
offsets.append(ptr)
523523

524-
buffer = np.asarray(offsets, dtype='int64')
525-
dtype = (_k.INT, buffer.itemsize*8, buffer.str, buffer.byteorder)
524+
# Convert the list of offsets to a NumPy array of signed 64-bit integers (note: Arrow allows the offsets array to be either `int32` or `int64`; here, we default to the latter)
525+
buf = np.asarray(offsets, dtype="int64")
526+
527+
# Convert the offsets to a Pandas "buffer" using the NumPy array as the backing store
528+
buffer = _PandasBuffer(buf)
529+
530+
# Assemble the buffer dtype info
531+
bdtype = buf.dtype;
532+
dtype = (_k.INT, bdtype.itemsize*8, bdtype.str, "=") # note: currently only support native endianness
526533
else:
527534
raise RuntimeError("This column has a fixed-length dtype so does not have an offsets buffer")
528535

0 commit comments

Comments
 (0)
0