paleolimbot commented on code in PR #468: URL: https://github.com/apache/sedona-db/pull/468#discussion_r2660118381
########## r/sedonadb/tools/savvy-update.sh: ########## @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +set -eu + +main() { + local -r source_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + local -r source_rpkg_dir="$(cd "${source_dir}/../" && pwd)" + + # Run the updater + savvy-cli update "${source_rpkg_dir}" + + # Post-process files + local -r api_h="${source_rpkg_dir}/src/rust/api.h" + local -r init_c="${source_rpkg_dir}/src/init.c" + local -r wrappers_r="${source_rpkg_dir}/R/000-wrappers.R" + + mv "${api_h}" "${api_h}.tmp" + mv "${init_c}" "${init_c}.tmp" + mv "${wrappers_r}" "${wrappers_r}.tmp" + + # Add license header to api.h + echo "${LICENSE_C}" > "${api_h}" + cat "${api_h}.tmp" >> "${api_h}" + + # Add license header, put includes on their own lines, and fix a typo in init.c + echo "${LICENSE_C}" > "${init_c}" + sed 's/#include/\n#include/g' "${init_c}.tmp" | \ + sed '1s/^\n//' | \ + sed 's/initialzation/initialization/g' >> "${init_c}" Review Comment: @yutannihilation I will open an issue and/or try to fix this in savvy when I have a moment, but while I'm thinking about it: - Includes have to go on their own lines, or else clang-format will reorder them in a way such that they won't compile - There's a typo in one of the comments that causes our pre-commit to fail ("initialzation") ...the other stuff (license files, clang-format) is specific to our setup and I'm not sure that's savvy's issue. ########## r/sedonadb/R/expression.R: ########## @@ -0,0 +1,287 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' Create SedonaDB logical expressions +#' +#' @param column_name A column name +#' @param qualifier An optional qualifier (e.g., table reference) that may be +#' used to disambiguate a specific reference +#' @param function_name The name of the function to call. This name is resolved +#' from the context associated with `factory`. +#' @param type A destination type into which `expr` should be cast. +#' @param expr A SedonaDBExpr or object coercible to one with [as_sd_expr()]. +#' @param alias An alias to apply to `expr`. +#' @param op Operator name for a binary expression. In general these follow +#' R function names (e.g., `>`, `<`, `+`, `-`). +#' @param lhs,rhs Arguments to a binary expression +#' @param factory A [sd_expr_factory()]. This factory wraps a SedonaDB context +#' and is used to resolve scalar functions and/or retrieve options. +#' +#' @returns An object of class SedonaDBExpr +#' @export +#' +#' @examples +#' sd_expr_column("foofy") +#' sd_expr_literal(1L) +#' sd_expr_scalar_function("abs", list(1L)) +#' sd_expr_cast(1L, nanoarrow::na_int64()) +#' sd_expr_alias(1L, "foofy") +#' +sd_expr_column <- function(column_name, qualifier = NULL, factory = sd_expr_factory()) { + factory$column(column_name, qualifier) +} Review Comment: @e-kotov I tried to mirror the DuckDB expression constructors you linked! The names are slightly different because I used DataFusion naming conventions. ########## r/sedonadb/src/rust/src/expression.rs: ########## @@ -0,0 +1,190 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use datafusion_common::{Column, ScalarValue}; +use datafusion_expr::{ + expr::{AggregateFunction, FieldMetadata, ScalarFunction}, + sqlparser::ast::NullTreatment, + BinaryExpr, Cast, Expr, Operator, +}; +use savvy::{savvy, savvy_err}; +use sedona::context::SedonaContext; + +use crate::{ + context::InternalContext, + ffi::{import_array, import_field}, +}; + +#[savvy] +pub struct SedonaDBExpr { + pub inner: Expr, +} + +#[savvy] +impl SedonaDBExpr { + fn display(&self) -> savvy::Result<savvy::Sexp> { + format!("{}", self.inner).try_into() + } + + fn debug_string(&self) -> savvy::Result<savvy::Sexp> { + format!("{:?}", self.inner).try_into() + } + + fn alias(&self, name: &str) -> savvy::Result<SedonaDBExpr> { + let inner = self.inner.clone().alias_if_changed(name.to_string())?; + Ok(Self { inner }) + } + + fn cast(&self, schema_xptr: savvy::Sexp) -> savvy::Result<SedonaDBExpr> { + let field = import_field(schema_xptr)?; + if let Some(type_name) = field.extension_type_name() { + return Err(savvy_err!( + "Can't cast to Arrow extension type '{type_name}'" + )); + } + + let inner = Expr::Cast(Cast::new( + self.inner.clone().into(), + field.data_type().clone(), + )); + + Ok(Self { inner }) + } + + fn negate(&self) -> savvy::Result<SedonaDBExpr> { + let inner = Expr::Negative(Box::new(self.inner.clone())); + Ok(Self { inner }) + } +} + +#[savvy] +pub struct SedonaDBExprFactory { + pub ctx: Arc<SedonaContext>, +} + +#[savvy] +impl SedonaDBExprFactory { + fn new(ctx: &InternalContext) -> Self { + Self { + ctx: ctx.inner.clone(), + } + } + + fn literal(array_xptr: savvy::Sexp, schema_xptr: savvy::Sexp) -> savvy::Result<SedonaDBExpr> { + let (field, array_ref) = import_array(array_xptr, schema_xptr)?; + let metadata = if field.metadata().is_empty() { + None + } else { + Some(FieldMetadata::new_from_field(&field)) + }; + + let scalar_value = ScalarValue::try_from_array(&array_ref, 0)?; + let inner = Expr::Literal(scalar_value, metadata); + Ok(SedonaDBExpr { inner }) + } + + fn column(&self, name: &str, qualifier: Option<&str>) -> savvy::Result<SedonaDBExpr> { + let inner = Expr::Column(Column::new(qualifier, name)); + Ok(SedonaDBExpr { inner }) + } + + fn binary( + &self, + op: &str, + lhs: &SedonaDBExpr, + rhs: &SedonaDBExpr, + ) -> savvy::Result<SedonaDBExpr> { + let operator = match op { + "==" => Operator::Eq, + "!=" => Operator::NotEq, + ">" => Operator::Gt, + ">=" => Operator::GtEq, + "<" => Operator::Lt, + "<=" => Operator::LtEq, + "+" => Operator::Plus, + "-" => Operator::Minus, + "*" => Operator::Multiply, + "/" => Operator::Divide, + "&" => Operator::And, + "|" => Operator::Or, + other => return Err(savvy_err!("Unimplemented binary operation '{other}'")), + }; + + let inner = Expr::BinaryExpr(BinaryExpr::new( + Box::new(lhs.inner.clone()), + operator, + Box::new(rhs.inner.clone()), + )); + Ok(SedonaDBExpr { inner }) + } + + fn scalar_function(&self, name: &str, args: savvy::Sexp) -> savvy::Result<SedonaDBExpr> { + if let Some(udf) = self.ctx.ctx.state().scalar_functions().get(name) { + let args = Self::exprs(args)?; + let inner = Expr::ScalarFunction(ScalarFunction::new_udf(udf.clone(), args)); + Ok(SedonaDBExpr { inner }) + } else { + Err(savvy_err!("Scalar UDF '{name}' not found")) + } + } + + fn aggregate_function( + &self, + name: &str, + args: savvy::Sexp, + na_rm: Option<bool>, + distinct: Option<bool>, + ) -> savvy::Result<SedonaDBExpr> { + if let Some(udf) = self.ctx.ctx.state().aggregate_functions().get(name) { + let args = Self::exprs(args)?; + let null_treatment = if na_rm.unwrap_or(true) { + NullTreatment::IgnoreNulls + } else { + NullTreatment::RespectNulls + }; + + let inner = Expr::AggregateFunction(AggregateFunction::new_udf( + udf.clone(), + args, + distinct.unwrap_or(false), + None, // filter + vec![], // order by + Some(null_treatment), + )); + + Ok(SedonaDBExpr { inner }) + } else { + Err(savvy_err!("Aggregate UDF '{name}' not found")) + } + } +} + +impl SedonaDBExprFactory { + fn exprs(exprs_sexp: savvy::Sexp) -> savvy::Result<Vec<Expr>> { + savvy::ListSexp::try_from(exprs_sexp)? + .iter() + .map(|(_, item)| -> savvy::Result<Expr> { + // This seems to require $.ptr from the list() input (can't just + // use list of R SedonaDBExpr objects) + let expr_wrapper: &SedonaDBExpr = item.try_into()?; + Ok(expr_wrapper.inner.clone()) Review Comment: @yutannihilation Is there a trick here that I should be using to get a list of a `#[savvy] struct XXX` back into Rust land? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
