From ace5aae0b52b07488b2f42e95fe16f7c89e4637d Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 29 Jul 2024 15:01:35 -0500 Subject: [PATCH 01/13] make PyExpr::to_variant arms explicit --- src/expr.rs | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/expr.rs b/src/expr.rs index aab0daa6f..39b037252 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -141,10 +141,31 @@ impl PyExpr { Expr::AggregateFunction(expr) => { Ok(PyAggregateFunction::from(expr.clone()).into_py(py)) } - other => Err(py_runtime_err(format!( - "Cannot convert this Expr to a Python object: {:?}", - other - ))), + Expr::SimilarTo(value) => Ok(PySimilarTo::from(value.clone()).into_py(py)), + Expr::Between(value) => Ok(between::PyBetween::from(value.clone()).into_py(py)), + Expr::Case(value) => Ok(case::PyCase::from(value.clone()).into_py(py)), + Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_py(py)), + Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_py(py)), + Expr::Sort(_value) => { + todo!("upstream has two different Sort classes, need to figure out which one to use") + // let py_sort = PySort::from(value.clone()); + // let py_object = py_sort.into_py(py); + // Ok(py_object) + }, + Expr::ScalarFunction(_) => todo!(), + Expr::WindowFunction(_) => todo!(), + Expr::InList(_) => todo!(), + Expr::Exists(_) => todo!(), + Expr::InSubquery(_) => todo!(), + Expr::ScalarSubquery(value) => Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_py(py)), + Expr::Wildcard { qualifier } => { + let _ = qualifier; + todo!() + }, + Expr::GroupingSet(value) => Ok(grouping_set::PyGroupingSet::from(value.clone()).into_py(py)), + Expr::Placeholder(_) => todo!(), + Expr::OuterReferenceColumn(_, _) => todo!(), + Expr::Unnest(_) => todo!(), }) } From 0c2bfe851e42b1668af2dbe76487620dd23d206c Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 29 Jul 2024 15:09:28 -0500 Subject: [PATCH 02/13] update PyInList to wrap expr::InList --- src/expr.rs | 2 +- src/expr/in_list.rs | 22 ++++++++-------------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/expr.rs b/src/expr.rs index 39b037252..ff4e8c67b 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -154,7 +154,7 @@ impl PyExpr { }, Expr::ScalarFunction(_) => todo!(), Expr::WindowFunction(_) => todo!(), - Expr::InList(_) => todo!(), + Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_py(py)) , Expr::Exists(_) => todo!(), Expr::InSubquery(_) => todo!(), Expr::ScalarSubquery(value) => Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_py(py)), diff --git a/src/expr/in_list.rs b/src/expr/in_list.rs index 840eee2ce..c1a99a3c8 100644 --- a/src/expr/in_list.rs +++ b/src/expr/in_list.rs @@ -16,38 +16,32 @@ // under the License. use crate::expr::PyExpr; -use datafusion_expr::Expr; +use datafusion_expr::expr::InList; use pyo3::prelude::*; #[pyclass(name = "InList", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PyInList { - expr: Box, - list: Vec, - negated: bool, + in_list: InList, } -impl PyInList { - pub fn new(expr: Box, list: Vec, negated: bool) -> Self { - Self { - expr, - list, - negated, - } +impl From for PyInList { + fn from(in_list: InList) -> Self { + PyInList { in_list } } } #[pymethods] impl PyInList { fn expr(&self) -> PyExpr { - (*self.expr).clone().into() + (*self.in_list.expr).clone().into() } fn list(&self) -> Vec { - self.list.iter().map(|e| e.clone().into()).collect() + self.in_list.list.iter().map(|e| e.clone().into()).collect() } fn negated(&self) -> bool { - self.negated + self.in_list.negated } } From cedcf4e7da257734e0d3866d2dfcadcd80039858 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 29 Jul 2024 15:12:28 -0500 Subject: [PATCH 03/13] update PyExists to wrap expr::Exists --- src/expr.rs | 20 +++++++++++++------- src/expr/exists.rs | 15 +++++++-------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/expr.rs b/src/expr.rs index ff4e8c67b..d62d77fb5 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -147,22 +147,28 @@ impl PyExpr { Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_py(py)), Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_py(py)), Expr::Sort(_value) => { - todo!("upstream has two different Sort classes, need to figure out which one to use") + todo!( + "upstream has two different Sort classes, need to figure out which one to use" + ) // let py_sort = PySort::from(value.clone()); // let py_object = py_sort.into_py(py); // Ok(py_object) - }, + } Expr::ScalarFunction(_) => todo!(), Expr::WindowFunction(_) => todo!(), - Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_py(py)) , - Expr::Exists(_) => todo!(), + Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_py(py)), + Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_py(py)), Expr::InSubquery(_) => todo!(), - Expr::ScalarSubquery(value) => Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_py(py)), + Expr::ScalarSubquery(value) => { + Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_py(py)) + } Expr::Wildcard { qualifier } => { let _ = qualifier; todo!() - }, - Expr::GroupingSet(value) => Ok(grouping_set::PyGroupingSet::from(value.clone()).into_py(py)), + } + Expr::GroupingSet(value) => { + Ok(grouping_set::PyGroupingSet::from(value.clone()).into_py(py)) + } Expr::Placeholder(_) => todo!(), Expr::OuterReferenceColumn(_, _) => todo!(), Expr::Unnest(_) => todo!(), diff --git a/src/expr/exists.rs b/src/expr/exists.rs index 7df9a6e81..fd2aa8c2f 100644 --- a/src/expr/exists.rs +++ b/src/expr/exists.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use datafusion_expr::Subquery; +use datafusion_expr::expr::Exists; use pyo3::prelude::*; use super::subquery::PySubquery; @@ -23,23 +23,22 @@ use super::subquery::PySubquery; #[pyclass(name = "Exists", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PyExists { - subquery: Subquery, - negated: bool, + exists: Exists, } -impl PyExists { - pub fn new(subquery: Subquery, negated: bool) -> Self { - Self { subquery, negated } +impl From for PyExists { + fn from(exists: Exists) -> Self { + PyExists { exists } } } #[pymethods] impl PyExists { fn subquery(&self) -> PySubquery { - self.subquery.clone().into() + self.exists.subquery.clone().into() } fn negated(&self) -> bool { - self.negated + self.exists.negated } } From 1ed848157496027d45e942b0295c52aa6dbd68fb Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 29 Jul 2024 15:15:46 -0500 Subject: [PATCH 04/13] update PyInSubquery to wrap expr::InSubquery --- src/expr.rs | 2 +- src/expr/in_subquery.rs | 22 ++++++++-------------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/expr.rs b/src/expr.rs index d62d77fb5..bb1d9b986 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -158,7 +158,7 @@ impl PyExpr { Expr::WindowFunction(_) => todo!(), Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_py(py)), Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_py(py)), - Expr::InSubquery(_) => todo!(), + Expr::InSubquery(value) => Ok(in_subquery::PyInSubquery::from(value.clone()).into_py(py)), Expr::ScalarSubquery(value) => { Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_py(py)) } diff --git a/src/expr/in_subquery.rs b/src/expr/in_subquery.rs index 6cee4a1f0..7dfafdbf0 100644 --- a/src/expr/in_subquery.rs +++ b/src/expr/in_subquery.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use datafusion_expr::{Expr, Subquery}; +use datafusion_expr::expr::InSubquery; use pyo3::prelude::*; use super::{subquery::PySubquery, PyExpr}; @@ -23,32 +23,26 @@ use super::{subquery::PySubquery, PyExpr}; #[pyclass(name = "InSubquery", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PyInSubquery { - expr: Box, - subquery: Subquery, - negated: bool, + in_subquery: InSubquery, } -impl PyInSubquery { - pub fn new(expr: Box, subquery: Subquery, negated: bool) -> Self { - Self { - expr, - subquery, - negated, - } +impl From for PyInSubquery { + fn from(in_subquery: InSubquery) -> Self { + PyInSubquery { in_subquery } } } #[pymethods] impl PyInSubquery { fn expr(&self) -> PyExpr { - (*self.expr).clone().into() + (*self.in_subquery.expr).clone().into() } fn subquery(&self) -> PySubquery { - self.subquery.clone().into() + self.in_subquery.subquery.clone().into() } fn negated(&self) -> bool { - self.negated + self.in_subquery.negated } } From a750062c9420501faa4439b8c4e033f3dbad2196 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 29 Jul 2024 15:19:47 -0500 Subject: [PATCH 05/13] update Placeholder to wrap expr::Placeholder --- src/expr.rs | 8 ++++++-- src/expr/placeholder.rs | 21 ++++++++++----------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/expr.rs b/src/expr.rs index bb1d9b986..c68a933ee 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -158,7 +158,9 @@ impl PyExpr { Expr::WindowFunction(_) => todo!(), Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_py(py)), Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_py(py)), - Expr::InSubquery(value) => Ok(in_subquery::PyInSubquery::from(value.clone()).into_py(py)), + Expr::InSubquery(value) => { + Ok(in_subquery::PyInSubquery::from(value.clone()).into_py(py)) + } Expr::ScalarSubquery(value) => { Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_py(py)) } @@ -169,7 +171,9 @@ impl PyExpr { Expr::GroupingSet(value) => { Ok(grouping_set::PyGroupingSet::from(value.clone()).into_py(py)) } - Expr::Placeholder(_) => todo!(), + Expr::Placeholder(value) => { + Ok(placeholder::PyPlaceholder::from(value.clone()).into_py(py)) + } Expr::OuterReferenceColumn(_, _) => todo!(), Expr::Unnest(_) => todo!(), }) diff --git a/src/expr/placeholder.rs b/src/expr/placeholder.rs index e37c8b561..ca75ce37e 100644 --- a/src/expr/placeholder.rs +++ b/src/expr/placeholder.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use datafusion::arrow::datatypes::DataType; +use datafusion_expr::expr::Placeholder; use pyo3::prelude::*; use crate::common::data_type::PyDataType; @@ -23,26 +23,25 @@ use crate::common::data_type::PyDataType; #[pyclass(name = "Placeholder", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PyPlaceholder { - id: String, - data_type: Option, + placeholder: Placeholder, } -impl PyPlaceholder { - pub fn new(id: String, data_type: DataType) -> Self { - Self { - id, - data_type: Some(data_type), - } +impl From for PyPlaceholder { + fn from(placeholder: Placeholder) -> Self { + PyPlaceholder { placeholder } } } #[pymethods] impl PyPlaceholder { fn id(&self) -> String { - self.id.clone() + self.placeholder.id.clone() } fn data_type(&self) -> Option { - self.data_type.as_ref().map(|e| e.clone().into()) + self.placeholder + .data_type + .as_ref() + .map(|e| e.clone().into()) } } From 27a81c2417ccff6348ca57eaf4f26118909fd257 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 29 Jul 2024 15:54:15 -0500 Subject: [PATCH 06/13] make PyLogicalPlan::to_variant match arms explicit --- src/sql/logical.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/sql/logical.rs b/src/sql/logical.rs index b1446b92a..de87731a2 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -81,10 +81,16 @@ impl PyLogicalPlan { LogicalPlan::SubqueryAlias(plan) => PySubqueryAlias::from(plan.clone()).to_variant(py), LogicalPlan::Unnest(plan) => PyUnnest::from(plan.clone()).to_variant(py), LogicalPlan::Window(plan) => PyWindow::from(plan.clone()).to_variant(py), - other => Err(py_unsupported_variant_err(format!( - "Cannot convert this plan to a LogicalNode: {:?}", - other - ))), + LogicalPlan::Repartition(_) => todo!(), + LogicalPlan::Union(_) => todo!(), + LogicalPlan::Statement(_) => todo!(), + LogicalPlan::Values(_) => todo!(), + LogicalPlan::Prepare(_) => todo!(), + LogicalPlan::Dml(_) => todo!(), + LogicalPlan::Ddl(_) => todo!(), + LogicalPlan::Copy(_) => todo!(), + LogicalPlan::DescribeTable(_) => todo!(), + LogicalPlan::RecursiveQuery(_) => todo!(), } } From 0d571abbec3f90d56b97256689cc63652183e754 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 29 Jul 2024 16:15:15 -0500 Subject: [PATCH 07/13] add PySortExpr wrapper --- src/expr.rs | 11 ++----- src/expr/sort_expr.rs | 71 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 8 deletions(-) create mode 100644 src/expr/sort_expr.rs diff --git a/src/expr.rs b/src/expr.rs index c68a933ee..657192e6f 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -84,6 +84,7 @@ pub mod scalar_subquery; pub mod scalar_variable; pub mod signature; pub mod sort; +pub mod sort_expr; pub mod subquery; pub mod subquery_alias; pub mod table_scan; @@ -146,14 +147,7 @@ impl PyExpr { Expr::Case(value) => Ok(case::PyCase::from(value.clone()).into_py(py)), Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_py(py)), Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_py(py)), - Expr::Sort(_value) => { - todo!( - "upstream has two different Sort classes, need to figure out which one to use" - ) - // let py_sort = PySort::from(value.clone()); - // let py_object = py_sort.into_py(py); - // Ok(py_object) - } + Expr::Sort(value) => Ok(sort_expr::PySortExpr::from(value.clone()).into_py(py)), Expr::ScalarFunction(_) => todo!(), Expr::WindowFunction(_) => todo!(), Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_py(py)), @@ -637,6 +631,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/expr/sort_expr.rs b/src/expr/sort_expr.rs new file mode 100644 index 000000000..6a8a0cf0c --- /dev/null +++ b/src/expr/sort_expr.rs @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::expr::PyExpr; +use datafusion_expr::SortExpr; +use pyo3::prelude::*; +use std::fmt::{self, Display, Formatter}; + +#[pyclass(name = "SortExpr", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PySortExpr { + sort: SortExpr, +} + +impl From for SortExpr { + fn from(sort: PySortExpr) -> Self { + sort.sort + } +} + +impl From for PySortExpr { + fn from(sort: SortExpr) -> PySortExpr { + PySortExpr { sort } + } +} + +impl Display for PySortExpr { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "Sort + Expr: {:?} + Asc: {:?} + NullsFirst: {:?}", + &self.sort.expr, &self.sort.asc, &self.sort.nulls_first + ) + } +} + +#[pymethods] +impl PySortExpr { + fn expr(&self) -> PyResult { + Ok((*self.sort.expr).clone().into()) + } + + fn ascending(&self) -> PyResult { + Ok(self.sort.asc) + } + + fn nulls_first(&self) -> PyResult { + Ok(self.sort.nulls_first) + } + + fn __repr__(&self) -> String { + format!("{}", self) + } +} From ffed35d15c57b431e76af2afa06c0a9498a06a32 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Mon, 29 Jul 2024 16:22:32 -0500 Subject: [PATCH 08/13] add PyUnnestExpr wrapper --- src/expr.rs | 4 ++- src/expr/unnest_expr.rs | 67 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 src/expr/unnest_expr.rs diff --git a/src/expr.rs b/src/expr.rs index 657192e6f..f2209b90c 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -90,6 +90,7 @@ pub mod subquery_alias; pub mod table_scan; pub mod union; pub mod unnest; +pub mod unnest_expr; pub mod window; /// A PyExpr that can be used on a DataFrame @@ -169,7 +170,7 @@ impl PyExpr { Ok(placeholder::PyPlaceholder::from(value.clone()).into_py(py)) } Expr::OuterReferenceColumn(_, _) => todo!(), - Expr::Unnest(_) => todo!(), + Expr::Unnest(value) => Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_py(py)), }) } @@ -624,6 +625,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/src/expr/unnest_expr.rs b/src/expr/unnest_expr.rs new file mode 100644 index 000000000..a2f8230cc --- /dev/null +++ b/src/expr/unnest_expr.rs @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_expr::expr::Unnest; +use pyo3::prelude::*; +use std::fmt::{self, Display, Formatter}; + +use super::PyExpr; + +#[pyclass(name = "UnnestExpr", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyUnnestExpr { + unnest: Unnest, +} + +impl From for PyUnnestExpr { + fn from(unnest: Unnest) -> PyUnnestExpr { + PyUnnestExpr { unnest } + } +} + +impl From for Unnest { + fn from(unnest: PyUnnestExpr) -> Self { + unnest.unnest + } +} + +impl Display for PyUnnestExpr { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "Unnest + Expr: {:?}", + &self.unnest.expr, + ) + } +} + +#[pymethods] +impl PyUnnestExpr { + /// Retrieves the expression that is being unnested + fn expr(&self) -> PyResult { + Ok((*self.unnest.expr).clone().into()) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("UnnestExpr({})", self)) + } + + fn __name__(&self) -> PyResult { + Ok("UnnestExpr".to_string()) + } +} From 886a77652c6324e233027d277820557f569d71a0 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Tue, 30 Jul 2024 08:44:12 -0500 Subject: [PATCH 09/13] update PyAlias to wrap upstream Alias --- src/expr.rs | 2 +- src/expr/alias.rs | 32 +++++++++++++++++--------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/expr.rs b/src/expr.rs index f2209b90c..c55351ef1 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -122,7 +122,7 @@ impl PyExpr { /// Return the specific expression fn to_variant(&self, py: Python) -> PyResult { Python::with_gil(|_| match &self.expr { - Expr::Alias(alias) => Ok(PyAlias::new(&alias.expr, &alias.name).into_py(py)), + Expr::Alias(alias) => Ok(PyAlias::from(alias.clone()).into_py(py)), Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_py(py)), Expr::ScalarVariable(data_type, variables) => { Ok(PyScalarVariable::new(data_type, variables).into_py(py)) diff --git a/src/expr/alias.rs b/src/expr/alias.rs index 2ce656342..3208800ad 100644 --- a/src/expr/alias.rs +++ b/src/expr/alias.rs @@ -19,13 +19,24 @@ use crate::expr::PyExpr; use pyo3::prelude::*; use std::fmt::{self, Display, Formatter}; -use datafusion_expr::Expr; +use datafusion_expr::expr::Alias; #[pyclass(name = "Alias", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PyAlias { - expr: PyExpr, - alias_name: String, + alias: Alias, +} + +impl From for PyAlias { + fn from(alias: Alias) -> Self { + Self { alias } + } +} + +impl From for Alias { + fn from(py_alias: PyAlias) -> Self { + py_alias.alias + } } impl Display for PyAlias { @@ -35,29 +46,20 @@ impl Display for PyAlias { "Alias \nExpr: `{:?}` \nAlias Name: `{}`", - &self.expr, &self.alias_name + &self.alias.expr, &self.alias.name ) } } -impl PyAlias { - pub fn new(expr: &Expr, alias_name: &String) -> Self { - Self { - expr: expr.clone().into(), - alias_name: alias_name.to_owned(), - } - } -} - #[pymethods] impl PyAlias { /// Retrieve the "name" of the alias fn alias(&self) -> PyResult { - Ok(self.alias_name.clone()) + Ok(self.alias.name.clone()) } fn expr(&self) -> PyResult { - Ok(self.expr.clone()) + Ok((*self.alias.expr.clone()).into()) } /// Get a String representation of this column From 3b194f374675d672c1b5455845948e21517ea774 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Wed, 31 Jul 2024 01:24:50 -0500 Subject: [PATCH 10/13] return not implemented error for unimplemnted variants in PyExpr::to_variant --- src/expr.rs | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/expr.rs b/src/expr.rs index c55351ef1..04bfc85c2 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -33,7 +33,7 @@ use datafusion_expr::{ }; use crate::common::data_type::{DataTypeMap, RexType}; -use crate::errors::{py_runtime_err, py_type_err, DataFusionError}; +use crate::errors::{py_runtime_err, py_type_err, py_unsupported_variant_err, DataFusionError}; use crate::expr::aggregate_expr::PyAggregateFunction; use crate::expr::binary_expr::PyBinaryExpr; use crate::expr::column::PyColumn; @@ -121,7 +121,8 @@ pub fn py_expr_list(expr: &[Expr]) -> PyResult> { impl PyExpr { /// Return the specific expression fn to_variant(&self, py: Python) -> PyResult { - Python::with_gil(|_| match &self.expr { + Python::with_gil(|_| { + match &self.expr { Expr::Alias(alias) => Ok(PyAlias::from(alias.clone()).into_py(py)), Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_py(py)), Expr::ScalarVariable(data_type, variables) => { @@ -149,8 +150,14 @@ impl PyExpr { Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_py(py)), Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_py(py)), Expr::Sort(value) => Ok(sort_expr::PySortExpr::from(value.clone()).into_py(py)), - Expr::ScalarFunction(_) => todo!(), - Expr::WindowFunction(_) => todo!(), + Expr::ScalarFunction(value) => Err(py_unsupported_variant_err(format!( + "Converting Expr::ScalarFunction to a Python object is not implemented: {:?}", + value + ))), + Expr::WindowFunction(value) => Err(py_unsupported_variant_err(format!( + "Converting Expr::WindowFunction to a Python object is not implemented: {:?}", + value + ))), Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_py(py)), Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_py(py)), Expr::InSubquery(value) => { @@ -159,18 +166,22 @@ impl PyExpr { Expr::ScalarSubquery(value) => { Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_py(py)) } - Expr::Wildcard { qualifier } => { - let _ = qualifier; - todo!() - } + Expr::Wildcard { qualifier } => Err(py_unsupported_variant_err(format!( + "Converting Expr::Wildcard to a Python object is not implemented : {:?}", + qualifier + ))), Expr::GroupingSet(value) => { Ok(grouping_set::PyGroupingSet::from(value.clone()).into_py(py)) } Expr::Placeholder(value) => { Ok(placeholder::PyPlaceholder::from(value.clone()).into_py(py)) } - Expr::OuterReferenceColumn(_, _) => todo!(), + Expr::OuterReferenceColumn(data_type, column) => Err(py_unsupported_variant_err(format!( + "Converting Expr::OuterReferenceColumn to a Python object is not implemented: {:?} - {:?}", + data_type, column + ))), Expr::Unnest(value) => Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_py(py)), + } }) } From 50f88bacb01b805d7a2888b6949e532633870dac Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Wed, 31 Jul 2024 01:53:59 -0500 Subject: [PATCH 11/13] added to_variant python test from the GH issue --- python/datafusion/tests/test_expr.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/python/datafusion/tests/test_expr.py b/python/datafusion/tests/test_expr.py index c9f0e98d5..1a41120a5 100644 --- a/python/datafusion/tests/test_expr.py +++ b/python/datafusion/tests/test_expr.py @@ -139,3 +139,31 @@ def test_relational_expr(test_ctx): assert df.filter(col("b") != "beta").count() == 2 assert df.filter(col("a") == "beta").count() == 0 + + +def test_expr_to_variant(): + # Taken from https://github.com/apache/datafusion-python/issues/781 + from datafusion import SessionContext + from datafusion.expr import Filter + + + def traverse_logical_plan(plan): + cur_node = plan.to_variant() + if isinstance(cur_node, Filter): + return cur_node.predicate().to_variant() + if hasattr(plan, 'inputs'): + for input_plan in plan.inputs(): + res = traverse_logical_plan(input_plan) + if res is not None: + return res + + ctx = SessionContext() + data = {'id': [1, 2, 3], 'name': ['Alice', 'Bob', 'Charlie']} + ctx.from_pydict(data, name='table1') + query = "SELECT * FROM table1 t1 WHERE t1.name IN ('dfa', 'ad', 'dfre', 'vsa')" + logical_plan = ctx.sql(query).optimized_logical_plan() + variant = traverse_logical_plan(logical_plan) + assert variant is not None + assert variant.expr().to_variant().qualified_name() == 'table1.name' + assert str(variant.list()) == '[Expr(Utf8("dfa")), Expr(Utf8("ad")), Expr(Utf8("dfre")), Expr(Utf8("vsa"))]' + assert not variant.negated() From 5804ced96d836b820cc2a44b13be9177180b5e83 Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Wed, 31 Jul 2024 01:56:12 -0500 Subject: [PATCH 12/13] remove unused import --- src/sql/logical.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sql/logical.rs b/src/sql/logical.rs index de87731a2..0e530598f 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -17,7 +17,6 @@ use std::sync::Arc; -use crate::errors::py_unsupported_variant_err; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; use crate::expr::cross_join::PyCrossJoin; From e3e8af76983a1f4bc9ce921920c0f7440c8b7a4c Mon Sep 17 00:00:00 2001 From: Michael-J-Ward Date: Wed, 31 Jul 2024 02:36:24 -0500 Subject: [PATCH 13/13] return unsupported_variants for unimplemented variants in PyLogicalPlan::to_variant --- src/sql/logical.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 0e530598f..c4471f503 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use crate::errors::py_unsupported_variant_err; use crate::expr::aggregate::PyAggregate; use crate::expr::analyze::PyAnalyze; use crate::expr::cross_join::PyCrossJoin; @@ -80,16 +81,19 @@ impl PyLogicalPlan { LogicalPlan::SubqueryAlias(plan) => PySubqueryAlias::from(plan.clone()).to_variant(py), LogicalPlan::Unnest(plan) => PyUnnest::from(plan.clone()).to_variant(py), LogicalPlan::Window(plan) => PyWindow::from(plan.clone()).to_variant(py), - LogicalPlan::Repartition(_) => todo!(), - LogicalPlan::Union(_) => todo!(), - LogicalPlan::Statement(_) => todo!(), - LogicalPlan::Values(_) => todo!(), - LogicalPlan::Prepare(_) => todo!(), - LogicalPlan::Dml(_) => todo!(), - LogicalPlan::Ddl(_) => todo!(), - LogicalPlan::Copy(_) => todo!(), - LogicalPlan::DescribeTable(_) => todo!(), - LogicalPlan::RecursiveQuery(_) => todo!(), + LogicalPlan::Repartition(_) + | LogicalPlan::Union(_) + | LogicalPlan::Statement(_) + | LogicalPlan::Values(_) + | LogicalPlan::Prepare(_) + | LogicalPlan::Dml(_) + | LogicalPlan::Ddl(_) + | LogicalPlan::Copy(_) + | LogicalPlan::DescribeTable(_) + | LogicalPlan::RecursiveQuery(_) => Err(py_unsupported_variant_err(format!( + "Conversion of variant not implemented: {:?}", + self.plan + ))), } }