10000 [python] add digest python function (#1127) · tfeda/datafusion-python@aa37b8a · GitHub
[go: up one dir, main page]

Skip to content

Commit aa37b8a

Browse files
committed
[python] add digest python function (#1127)
* add digest python function * add test result * ignore long lines GitOrigin-RevId: f38443d2338ea6953e3ce3178e937173bb53df94
1 parent d085189 commit aa37b8a

File tree

2 files changed

+85
-0
lines changed

2 files changed

+85
-0
lines changed

src/functions.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,18 @@ fn random() -> expression::Expression {
9393
}
9494
}
9595

96+
/// Computes a binary hash of the given data. type is the algorithm to use.
97+
/// Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, blake2b, and blake3.
98+
#[pyfunction(value, method)]
99+
fn digest(
100+
value: expression::Expression,
101+
method: expression::Expression,
102+
) -> expression::Expression {
103+
expression::Expression {
104+
expr: logical_plan::digest(value.expr, method.expr),
105+
}
106+
}
107+
96108
/// Concatenates the text representations of all the arguments.
97109
/// NULL arguments are ignored.
98110
#[pyfunction(args = "*")]
@@ -340,6 +352,7 @@ pub fn init(module: &PyModule) -> PyResult<()> {
340352
module.add_function(wrap_pyfunction!(ltrim, module)?)?;
341353
module.add_function(wrap_pyfunction!(max, module)?)?;
342354
module.add_function(wrap_pyfunction!(md5, module)?)?;
355+
module.add_function(wrap_pyfunction!(digest, module)?)?;
343356
module.add_function(wrap_pyfunction!(min, module)?)?;
344357
module.add_function(wrap_pyfunction!(now, module)?)?;
345358
module.add_function(wrap_pyfunction!(octet_length, module)?)?;

tests/test_string_functions.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,75 @@ def test_string_functions(df):
4747
]
4848
)
4949
assert result.column(1) == pa.array(["hello", "world", "!"])
50+
51+
52+
def test_hash_functions(df):
53+
df = df.select(
54+
*[
55+
f.digest(f.col("a"), f.lit(m))
56+
for m in ("md5", "sha256", "sha512", "blake2s", "blake3")
57+
]
58+
)
59+
result = df.collect()
60+
assert len(result) == 1
61+
result = result[0]
62+
b = bytearray.fromhex
63+
assert result.column(0) == pa.array(
64+
[
65+
b("8B1A9953C4611296A827ABF8C47804D7"),
66+
b("F5A7924E621E84C9280A9A27E1BCB7F6"),
67+
b("9033E0E305F247C0C3C80D0C7848C8B3"),
68+
]
69+
)
70+
assert result.column(1) == pa.array(
71+
[
72+
b(
73+
"185F8DB32271FE25F561A6FC938B2E264306EC304EDA518007D1764826381969"
74+
),
75+
b(
76+
"78AE647DC5544D227130A0682A51E30BC7777FBB6D8A8F17007463A3ECD1D524"
77+
),
78+
b(
79+
"BB7208BC9B5D7C04F1236A82A0093A5E33F40423D5BA8D4266F7092C3BA43B62"
80+
),
81+
]
82+
)
83+
assert result.column(2) == pa.array(
84+
[
85+
b(
86+
"3615F80C9D293ED7402687F94B22D58E529B8CC7916F8FAC7FDDF7FBD5AF4CF777D3D795A7A00A16BF7E7F3FB9561EE9BAAE480DA9FE7A18769E71886B03F315"
87+
),
88+
b(
89+
"8EA77393A42AB8FA92500FB077A9509CC32BC95E72712EFA116EDAF2EDFAE34FBB682EFDD6C5DD13C117E08BD4AAEF71291D8AACE2F890273081D0677C16DF0F"
90+
),
91+
b(
92+
"3831A6A6155E509DEE59A7F451EB35324D8F8F2DF6E3708894740F98FDEE23889F4DE5ADB0C5010DFB555CDA77C8AB5DC902094C52DE3278F35A75EBC25F093A"
93+
),
94+
]
95+
)
96+
assert result.column(3) == pa.array(
97+
[
98+
b(
99+
"F73A5FBF881F89B814871F46E26AD3FA37CB2921C5E8561618639015B3CCBB71"
100+
),
101+
b(
102+
"B792A0383FB9E7A189EC150686579532854E44B71AC394831DAED169BA85CCC5"
103+
),
104+
b(
105+
"27988A0E51812297C77A433F635233346AEE29A829DCF4F46E0F58F402C6CFCB"
106+
),
107+
]
108+
)
109+
assert result.column(4) == pa.array(
110+
[
111+
b(
112+
"FBC2B0516EE8744D293B980779178A3508850FDCFE965985782C39601B65794F"
113+
),
114+
b(
115+
"BF73D18575A736E4037D45F9E316085B86C19BE6363DE6AA789E13DEAACC1C4E"
116+
),
117+
b(
118+
"C8D11B9F7237E4034ADBCD2005735F9BC4C597C75AD89F4492BEC8F77D15F7EB"
119+
),
120+
]
121+
)

0 commit comments

Comments
 (0)
0