8000 Add datafusion.extract (#959) · nirnayroy/datafusion-python@2690e61 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2690e61

Browse files
authored
Add datafusion.extract (apache#959)
* feat: add extract function as an alias for date_part * docs: update user guide to include examples for date_part and extract functions * fix: update examples in user guide to use f.to_timestamp for date extraction
1 parent deb1f25 commit 2690e61

File tree

3 files changed

+20
-0
lines changed

3 files changed

+20
-0
lines changed

docs/source/user-guide/common-operations/functions.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,15 @@ Convert to timestamps using :py:func:`~datafusion.functions.to_timestamp`
7272
7373
df.select(f.to_timestamp(col('"Total"')).alias("timestamp"))
7474
75+
Extracting parts of a date using :py:func:`~datafusion.functions.date_part` (alias :py:func:`~datafusion.functions.extract`)
76+
77+
.. ipython:: python
78+
79+
df.select(
80+
f.date_part(literal("month"), f.to_timestamp(col('"Total"'))).alias("month"),
81+
f.extract(literal("day"), f.to_timestamp(col('"Total"'))).alias("day")
82+
)
83+
7584
String
7685
------
7786

python/datafusion/functions.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@
128128
"empty",
129129
"encode",
130130
"ends_with",
131+
"extract",
131132
"exp",
132133
"factorial",
133134
"find_in_set",
@@ -994,6 +995,14 @@ def date_part(part: Expr, date: Expr) -> Expr:
994995
return Expr(f.date_part(part.expr, date.expr))
995996

996997

998+
def extract(part: Expr, date: Expr) -> Expr:
999+
"""Extracts a subfield from the date.
1000+
1001+
This is an alias for :py:func:`date_part`.
1002+
"""
1003+
return date_part(part, date)
1004+
1005+
9971006
def date_trunc(part: Expr, date: Expr) -> Expr:
9981007
"""Truncates the date to a specified level of precision."""
9991008
return Expr(f.date_trunc(part.expr, date.expr))

python/tests/test_functions.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,7 @@ def test_temporal_functions(df):
866866
f.to_timestamp_seconds(literal("2023-09-07 05:06:14.523952")),
867867
f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")),
868868
f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")),
869+
f.extract(literal("day"), column("d")),
869870
)
870871
result = df.collect()
871872
assert len(result) == 1
@@ -903,6 +904,7 @@ def test_temporal_functions(df):
903904
assert result.column(9) == pa.array(
904905
[datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us")
905906
)
907+
assert result.column(10) == pa.array([31, 26, 2], type=pa.float64())
906908

907909

908910
def test_case(df):

0 commit comments

Comments
 (0)
0