8000 refactor: from_arrow (#917) · kylebarron/datafusion-python@494b89a · GitHub
[go: up one dir, main page]

Skip to content

Commit 494b89a

Browse files
authored
refactor: from_arrow (apache#917)
1 parent b4b03fe commit 494b89a

File tree

1 file changed

+31
-3
lines changed

1 file changed

+31
-3
lines changed

python/datafusion/context.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from datafusion.record_batch import RecordBatchStream
3131
from datafusion.udf import ScalarUDF, AggregateUDF, WindowUDF
3232

33-
from typing import Any, TYPE_CHECKING
33+
from typing import Any, TYPE_CHECKING, Protocol
3434
from typing_extensions import deprecated
3535

3636
if TYPE_CHECKING:
@@ -41,6 +41,28 @@
4141
from datafusion.plan import LogicalPlan, ExecutionPlan
4242

4343

44+
class ArrowStreamExportable(Protocol):
45+
"""Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface.
46+
47+
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
48+
"""
49+
50+
def __arrow_c_stream__( # noqa: D105
51+
self, requested_schema: object | None = None
52+
) -> object: ...
53+
54+
55+
class ArrowArrayExportable(Protocol):
56+
"""Type hint for object exporting Arrow C Array via Arrow PyCapsule Interface.
57+
58+
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
59+
"""
60+
61+
def __arrow_c_array__( # noqa: D105
62+
self, requested_schema: object | None = None
63+
) -> tuple[object, object]: ...
64+
65+
4466
class SessionConfig:
4567
"""Session configuration options."""
4668

@@ -592,12 +614,18 @@ def from_pydict(
592614
"""
593615
return DataFrame(self.ctx.from_pydict(data, name))
594616

595-
def from_arrow(self, data: Any, name: str | None = None) -> DataFrame:
617+
def from_arrow(
618+
self,
619+
data: ArrowStreamExportable | ArrowArrayExportable,
620+
name: str | None = None,
621+
) -> DataFrame:
596622
"""Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow source.
597623
598624
The Arrow data source can be any object that implements either
599625
``__arrow_c_stream__`` or ``__arrow_c_array__``. For the latter, it must return
600-
a struct array. Common examples of sources from pyarrow include
626+
a struct array.
627+
628+
Arrow data can be Polars, Pandas, Pyarrow etc.
601629
602630
Args:
603631
data: Arrow data source.

0 commit comments

Comments
 (0)
0