|
30 | 30 | from datafusion.record_batch import RecordBatchStream
|
31 | 31 | from datafusion.udf import ScalarUDF, AggregateUDF, WindowUDF
|
32 | 32 |
|
33 |
| -from typing import Any, TYPE_CHECKING |
| 33 | +from typing import Any, TYPE_CHECKING, Protocol |
34 | 34 | from typing_extensions import deprecated
|
35 | 35 |
|
36 | 36 | if TYPE_CHECKING:
|
|
41 | 41 | from datafusion.plan import LogicalPlan, ExecutionPlan
|
42 | 42 |
|
43 | 43 |
|
| 44 | +class ArrowStreamExportable(Protocol): |
| 45 | + """Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface. |
| 46 | +
|
| 47 | + https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html |
| 48 | + """ |
| 49 | + |
| 50 | + def __arrow_c_stream__( # noqa: D105 |
| 51 | + self, requested_schema: object | None = None |
| 52 | + ) -> object: ... |
| 53 | + |
| 54 | + |
| 55 | +class ArrowArrayExportable(Protocol): |
| 56 | + """Type hint for object exporting Arrow C Array via Arrow PyCapsule Interface. |
| 57 | +
|
| 58 | + https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html |
| 59 | + """ |
| 60 | + |
| 61 | + def __arrow_c_array__( # noqa: D105 |
| 62 | + self, requested_schema: object | None = None |
| 63 | + ) -> tuple[object, object]: ... |
| 64 | + |
| 65 | + |
44 | 66 | class SessionConfig:
|
45 | 67 | """Session configuration options."""
|
46 | 68 |
|
@@ -592,12 +614,18 @@ def from_pydict(
|
592 | 614 | """
|
593 | 615 | return DataFrame(self.ctx.from_pydict(data, name))
|
594 | 616 |
|
595 |
| - def from_arrow(self, data: Any, name: str | None = None) -> DataFrame: |
| 617 | + def from_arrow( |
| 618 | + self, |
| 619 | + data: ArrowStreamExportable | ArrowArrayExportable, |
| 620 | + name: str | None = None, |
| 621 | + ) -> DataFrame: |
596 | 622 | """Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow source.
|
597 | 623 |
|
598 | 624 | The Arrow data source can be any object that implements either
|
599 | 625 | ``__arrow_c_stream__`` or ``__arrow_c_array__``. For the latter, it must return
|
600 |
| - a struct array. Common examples of sources from pyarrow include |
| 626 | + a struct array. |
| 627 | +
|
| 628 | + Arrow data can be Polars, Pandas, Pyarrow etc. |
601 | 629 |
|
602 | 630 | Args:
|
603 | 631 | data: Arrow data source.
|
|
0 commit comments