This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new e80fb37b feat(r/sedonadb): Translate R expressions to DataFusion
logical Expr (#468)
e80fb37b is described below
commit e80fb37b2a15a9708420fef0cc575b9e5274972e
Author: Dewey Dunnington <[email protected]>
AuthorDate: Mon Jan 5 14:40:09 2026 -0600
feat(r/sedonadb): Translate R expressions to DataFusion logical Expr (#468)
Co-authored-by: Copilot <[email protected]>
---
.pre-commit-config.yaml | 4 +-
dev/release/rat_exclude_files.txt | 1 +
r/sedonadb/.gitattributes | 20 ++
r/sedonadb/NAMESPACE | 22 ++
r/sedonadb/R/000-wrappers.R | 123 +++++++++++
r/sedonadb/R/expression.R | 283 +++++++++++++++++++++++++
r/sedonadb/R/literal.R | 98 +++++++++
r/sedonadb/man/as_sedonadb_literal.Rd | 30 +++
r/sedonadb/man/sd_expr_column.Rd | 82 +++++++
r/sedonadb/src/init.c | 91 ++++++++
r/sedonadb/src/rust/api.h | 24 +++
r/sedonadb/src/rust/src/expression.rs | 199 +++++++++++++++++
r/sedonadb/src/rust/src/ffi.rs | 24 ++-
r/sedonadb/src/rust/src/lib.rs | 1 +
r/sedonadb/tests/testthat/_snaps/expression.md | 170 +++++++++++++++
r/sedonadb/tests/testthat/_snaps/literal.md | 8 +
r/sedonadb/tests/testthat/test-expression.R | 80 +++++++
r/sedonadb/tests/testthat/test-literal.R | 61 ++++++
r/sedonadb/tools/savvy-update.sh | 94 ++++++++
19 files changed, 1412 insertions(+), 3 deletions(-)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d8a93289..fba9714a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,6 +25,8 @@ repos:
- id: check-yaml
- id: detect-private-key
- id: end-of-file-fixer
+ # R snapshot test files may have arbitrary file endings based on test
results
+ exclude: "_snaps"
- id: fix-byte-order-marker
- id: trailing-whitespace
@@ -34,7 +36,7 @@ repos:
- id: codespell
# types_or: [markdown, c, c++, rust, python]
additional_dependencies: [tomli]
- exclude:
"^c/(sedona-geoarrow-c/src/geoarrow|sedona-geoarrow-c/src/nanoarrow|sedona-libgpuspatial/libgpuspatial|sedona-tg/src/tg)/.*|^docs/image/sedonadb-architecture\\.svg$"
+ exclude:
"^c/(sedona-geoarrow-c/src/geoarrow|sedona-geoarrow-c/src/nanoarrow|sedona-libgpuspatial/libgpuspatial|sedona-tg/src/tg)/.*|^docs/image/sedonadb-architecture\\.svg$|^r/sedonadb/tools/savvy-update.sh$"
args: ["--ignore-words-list=thirdparty"]
- repo: https://github.com/astral-sh/ruff-pre-commit
diff --git a/dev/release/rat_exclude_files.txt
b/dev/release/rat_exclude_files.txt
index 6938c0fc..bad4cb5c 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -25,4 +25,5 @@ r/sedonadb/.Rbuildignore
r/sedonadb/DESCRIPTION
r/sedonadb/NAMESPACE
r/sedonadb/src/sedonadb-win.def
+r/sedonadb/tests/testthat/_snaps/*
submodules/geoarrow-data/*
diff --git a/r/sedonadb/.gitattributes b/r/sedonadb/.gitattributes
new file mode 100644
index 00000000..3f8d0f75
--- /dev/null
+++ b/r/sedonadb/.gitattributes
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+R/000-wrappers.R linguist-generated
+src/init.c linguist-generated
+src/rust/api.h linguist-generated
diff --git a/r/sedonadb/NAMESPACE b/r/sedonadb/NAMESPACE
index f7eb1ee7..11a141b9 100644
--- a/r/sedonadb/NAMESPACE
+++ b/r/sedonadb/NAMESPACE
@@ -10,19 +10,41 @@ S3method(as_sedonadb_dataframe,nanoarrow_array)
S3method(as_sedonadb_dataframe,nanoarrow_array_stream)
S3method(as_sedonadb_dataframe,sedonadb_dataframe)
S3method(as_sedonadb_dataframe,sf)
+S3method(as_sedonadb_literal,"NULL")
+S3method(as_sedonadb_literal,character)
+S3method(as_sedonadb_literal,double)
+S3method(as_sedonadb_literal,integer)
+S3method(as_sedonadb_literal,nanoarrow_array)
+S3method(as_sedonadb_literal,raw)
+S3method(as_sedonadb_literal,wk_wkb)
S3method(dim,sedonadb_dataframe)
S3method(dimnames,sedonadb_dataframe)
S3method(head,sedonadb_dataframe)
S3method(infer_nanoarrow_schema,sedonadb_dataframe)
S3method(print,"sedonadb::InternalContext__bundle")
S3method(print,"sedonadb::InternalDataFrame__bundle")
+S3method(print,"sedonadb::SedonaDBExprFactory__bundle")
+S3method(print,"sedonadb::SedonaDBExpr__bundle")
+S3method(print,SedonaDBExpr)
S3method(print,sedonadb_dataframe)
+export(as_sd_expr)
export(as_sedonadb_dataframe)
+export(as_sedonadb_literal)
+export(is_sd_expr)
export(sd_collect)
export(sd_compute)
export(sd_configure_proj)
export(sd_count)
export(sd_drop_view)
+export(sd_expr_aggregate_function)
+export(sd_expr_alias)
+export(sd_expr_binary)
+export(sd_expr_cast)
+export(sd_expr_column)
+export(sd_expr_factory)
+export(sd_expr_literal)
+export(sd_expr_negative)
+export(sd_expr_scalar_function)
export(sd_preview)
export(sd_read_parquet)
export(sd_register_udf)
diff --git a/r/sedonadb/R/000-wrappers.R b/r/sedonadb/R/000-wrappers.R
index df1f61fd..40fd3ce4 100644
--- a/r/sedonadb/R/000-wrappers.R
+++ b/r/sedonadb/R/000-wrappers.R
@@ -265,3 +265,126 @@ class(`InternalDataFrame`) <-
c("sedonadb::InternalDataFrame__bundle", "savvy_se
`print.sedonadb::InternalDataFrame__bundle` <- function(x, ...) {
cat('sedonadb::InternalDataFrame\n')
}
+
+### wrapper functions for SedonaDBExpr
+
+`SedonaDBExpr_alias` <- function(self) {
+ function(`name`) {
+ .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExpr_alias__impl, `self`,
`name`))
+ }
+}
+
+`SedonaDBExpr_cast` <- function(self) {
+ function(`schema_xptr`) {
+ .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExpr_cast__impl, `self`,
`schema_xptr`))
+ }
+}
+
+`SedonaDBExpr_debug_string` <- function(self) {
+ function() {
+ .Call(savvy_SedonaDBExpr_debug_string__impl, `self`)
+ }
+}
+
+`SedonaDBExpr_display` <- function(self) {
+ function() {
+ .Call(savvy_SedonaDBExpr_display__impl, `self`)
+ }
+}
+
+`SedonaDBExpr_negate` <- function(self) {
+ function() {
+ .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExpr_negate__impl, `self`))
+ }
+}
+
+`.savvy_wrap_SedonaDBExpr` <- function(ptr) {
+ e <- new.env(parent = emptyenv())
+ e$.ptr <- ptr
+ e$`alias` <- `SedonaDBExpr_alias`(ptr)
+ e$`cast` <- `SedonaDBExpr_cast`(ptr)
+ e$`debug_string` <- `SedonaDBExpr_debug_string`(ptr)
+ e$`display` <- `SedonaDBExpr_display`(ptr)
+ e$`negate` <- `SedonaDBExpr_negate`(ptr)
+
+ class(e) <- c("sedonadb::SedonaDBExpr", "SedonaDBExpr",
"savvy_sedonadb__sealed")
+ e
+}
+
+
+
+`SedonaDBExpr` <- new.env(parent = emptyenv())
+
+### associated functions for SedonaDBExpr
+
+
+
+class(`SedonaDBExpr`) <- c("sedonadb::SedonaDBExpr__bundle",
"savvy_sedonadb__sealed")
+
+#' @export
+`print.sedonadb::SedonaDBExpr__bundle` <- function(x, ...) {
+ cat('sedonadb::SedonaDBExpr\n')
+}
+
+### wrapper functions for SedonaDBExprFactory
+
+`SedonaDBExprFactory_aggregate_function` <- function(self) {
+ function(`name`, `args`, `na_rm` = NULL, `distinct` = NULL) {
+
.savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_aggregate_function__impl,
`self`, `name`, `args`, `na_rm`, `distinct`))
+ }
+}
+
+`SedonaDBExprFactory_binary` <- function(self) {
+ function(`op`, `lhs`, `rhs`) {
+ `lhs` <- .savvy_extract_ptr(`lhs`, "sedonadb::SedonaDBExpr")
+ `rhs` <- .savvy_extract_ptr(`rhs`, "sedonadb::SedonaDBExpr")
+ .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_binary__impl,
`self`, `op`, `lhs`, `rhs`))
+ }
+}
+
+`SedonaDBExprFactory_column` <- function(self) {
+ function(`name`, `qualifier` = NULL) {
+ .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_column__impl,
`self`, `name`, `qualifier`))
+ }
+}
+
+`SedonaDBExprFactory_scalar_function` <- function(self) {
+ function(`name`, `args`) {
+
.savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_scalar_function__impl,
`self`, `name`, `args`))
+ }
+}
+
+`.savvy_wrap_SedonaDBExprFactory` <- function(ptr) {
+ e <- new.env(parent = emptyenv())
+ e$.ptr <- ptr
+ e$`aggregate_function` <- `SedonaDBExprFactory_aggregate_function`(ptr)
+ e$`binary` <- `SedonaDBExprFactory_binary`(ptr)
+ e$`column` <- `SedonaDBExprFactory_column`(ptr)
+ e$`scalar_function` <- `SedonaDBExprFactory_scalar_function`(ptr)
+
+ class(e) <- c("sedonadb::SedonaDBExprFactory", "SedonaDBExprFactory",
"savvy_sedonadb__sealed")
+ e
+}
+
+
+
+`SedonaDBExprFactory` <- new.env(parent = emptyenv())
+
+### associated functions for SedonaDBExprFactory
+
+`SedonaDBExprFactory`$`literal` <- function(`array_xptr`, `schema_xptr`) {
+ .savvy_wrap_SedonaDBExpr(.Call(savvy_SedonaDBExprFactory_literal__impl,
`array_xptr`, `schema_xptr`))
+}
+
+`SedonaDBExprFactory`$`new` <- function(`ctx`) {
+ `ctx` <- .savvy_extract_ptr(`ctx`, "sedonadb::InternalContext")
+ .savvy_wrap_SedonaDBExprFactory(.Call(savvy_SedonaDBExprFactory_new__impl,
`ctx`))
+}
+
+
+class(`SedonaDBExprFactory`) <- c("sedonadb::SedonaDBExprFactory__bundle",
"savvy_sedonadb__sealed")
+
+#' @export
+`print.sedonadb::SedonaDBExprFactory__bundle` <- function(x, ...) {
+ cat('sedonadb::SedonaDBExprFactory\n')
+}
diff --git a/r/sedonadb/R/expression.R b/r/sedonadb/R/expression.R
new file mode 100644
index 00000000..50240d4a
--- /dev/null
+++ b/r/sedonadb/R/expression.R
@@ -0,0 +1,283 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Create SedonaDB logical expressions
+#'
+#' @param column_name A column name
+#' @param qualifier An optional qualifier (e.g., table reference) that may be
+#' used to disambiguate a specific reference
+#' @param function_name The name of the function to call. This name is resolved
+#' from the context associated with `factory`.
+#' @param type A destination type into which `expr` should be cast.
+#' @param expr A SedonaDBExpr or object coercible to one with [as_sd_expr()].
+#' @param alias An alias to apply to `expr`.
+#' @param op Operator name for a binary expression. In general these follow
+#' R function names (e.g., `>`, `<`, `+`, `-`).
+#' @param lhs,rhs Arguments to a binary expression
+#' @param factory A [sd_expr_factory()]. This factory wraps a SedonaDB context
+#' and is used to resolve scalar functions and/or retrieve options.
+#'
+#' @returns An object of class SedonaDBExpr
+#' @export
+#'
+#' @examples
+#' sd_expr_column("foofy")
+#' sd_expr_literal(1L)
+#' sd_expr_scalar_function("abs", list(1L))
+#' sd_expr_cast(1L, nanoarrow::na_int64())
+#' sd_expr_alias(1L, "foofy")
+#'
+sd_expr_column <- function(column_name, qualifier = NULL, factory =
sd_expr_factory()) {
+ factory$column(column_name, qualifier)
+}
+
+#' @rdname sd_expr_column
+#' @export
+sd_expr_literal <- function(x, type = NULL, factory = sd_expr_factory()) {
+ as_sedonadb_literal(x, type = type, factory = factory)
+}
+
+#' @rdname sd_expr_column
+#' @export
+sd_expr_binary <- function(op, lhs, rhs, factory = sd_expr_factory()) {
+ factory$binary(op, as_sd_expr(lhs), as_sd_expr(rhs))
+}
+
+#' @rdname sd_expr_column
+#' @export
+sd_expr_negative <- function(expr, factory = sd_expr_factory()) {
+ as_sd_expr(expr, factory = factory)$negate()
+}
+
+#' @rdname sd_expr_column
+#' @export
+sd_expr_scalar_function <- function(function_name, args, factory =
sd_expr_factory()) {
+ args_as_expr <- lapply(args, as_sd_expr, factory = factory)
+ factory$scalar_function(function_name, args_as_expr)
+}
+
+#' @rdname sd_expr_column
+#' @export
+sd_expr_aggregate_function <- function(function_name, args, ...,
+ na.rm = FALSE, distinct = FALSE,
factory = sd_expr_factory()) {
+ args_as_expr <- lapply(args, as_sd_expr, factory = factory)
+ factory$aggregate_function(function_name, args_as_expr, na_rm = na.rm,
distinct = distinct)
+}
+
+#' @rdname sd_expr_column
+#' @export
+sd_expr_cast <- function(expr, type, factory = sd_expr_factory()) {
+ expr <- as_sd_expr(expr, factory = factory)
+ type <- nanoarrow::as_nanoarrow_schema(type)
+ expr$cast(type)
+}
+
+#' @rdname sd_expr_column
+#' @export
+sd_expr_alias <- function(expr, alias, factory = sd_expr_factory()) {
+ expr <- as_sd_expr(expr, factory = factory)
+ expr$alias(alias)
+}
+
+#' @rdname sd_expr_column
+#' @export
+as_sd_expr <- function(x, factory = sd_expr_factory()) {
+ if (inherits(x, "SedonaDBExpr")) {
+ x
+ } else {
+ sd_expr_literal(x, factory = factory)
+ }
+}
+
+#' @rdname sd_expr_column
+#' @export
+is_sd_expr <- function(x) {
+ inherits(x, "SedonaDBExpr")
+}
+
+#' @rdname sd_expr_column
+#' @export
+sd_expr_factory <- function() {
+ SedonaDBExprFactory$new(ctx())
+}
+
+#' @export
+print.SedonaDBExpr <- function(x, ...) {
+ cat("<SedonaDBExpr>\n")
+ cat(x$display())
+ cat("\n")
+ invisible(x)
+}
+
+#' Evaluate an R expression into a SedonaDB expression
+#'
+#' @param expr An R expression (e.g., the result of `quote()`).
+#' @param expr_ctx An `sd_expr_ctx()`
+#'
+#' @returns A `SedonaDBExpr`
+#' @noRd
+sd_eval_expr <- function(expr, expr_ctx = sd_expr_ctx(env = env), env =
parent.frame()) {
+ ensure_translations_registered()
+
+ rlang::try_fetch({
+ result <- sd_eval_expr_inner(expr, expr_ctx)
+ as_sd_expr(result, factory = expr_ctx$factory)
+ }, error = function(e) {
+ rlang::abort(
+ sprintf("Error evaluating translated expression %s",
rlang::expr_label(expr)),
+ parent = e
+ )
+ })
+}
+
+sd_eval_expr_inner <- function(expr, expr_ctx) {
+ if (rlang::is_call(expr)) {
+ # Extract `pkg::fun` or `fun` if this is a usual call (e.g., not
+ # something fancy like `fun()()`)
+ call_name <- rlang::call_name(expr)
+
+ # If this is not a fancy function call and we have a translation, call it.
+ # Individual translations can choose to defer to the R function if all the
+ # arguments are R objects and not SedonaDB expressions (or the user can
+ # use !! to force R evaluation).
+ if (!is.null(call_name) && !is.null(expr_ctx$fns[[call_name]])) {
+ sd_eval_translation(call_name, expr, expr_ctx)
+ } else {
+ sd_eval_default(expr, expr_ctx)
+ }
+ } else {
+ sd_eval_default(expr, expr_ctx)
+ }
+}
+
+sd_eval_translation <- function(fn_key, expr, expr_ctx) {
+ # Replace the function with the translation in such a way that
+ # any error resulting from the call doesn't have an absolute garbage error
+ # stack trace
+ new_fn_expr <- rlang::call2("$", expr_ctx$fns, rlang::sym(fn_key))
+
+ # Evaluate arguments individually. We may need to allow translations to
+ # override this step to have more control over the expression evaluation.
+ evaluated_args <- lapply(expr[-1], sd_eval_expr_inner, expr_ctx = expr_ctx)
+
+ # Recreate the call, injecting the context as the first argument
+ new_call <- rlang::call2(new_fn_expr, expr_ctx, !!!evaluated_args)
+
+ # ...and evaluate it
+ sd_eval_default(new_call, expr_ctx)
+}
+
+sd_eval_default <- function(expr, expr_ctx) {
+ rlang::eval_tidy(expr, data = expr_ctx$data, env = expr_ctx$env)
+}
+
+#' Expression evaluation context
+#'
+#' A context to use for evaluating a set of related R expressions into
+#' SedonaDB expressions. One expression context may be used to translate
+#' multiple expressions (e.g., all arguments to `mutate()`).
+#'
+#' @param schema A schema-like object coerced using
+#' [nanoarrow::as_nanoarrow_schema()]. This is used to create the data mask
+#' for expressions.
+#' @param env The expression environment. This is needed to evaluate
expressions.
+#'
+#' @return An object of class sedonadb_expr_ctx
+#' @noRd
+sd_expr_ctx <- function(schema = NULL, env = parent.frame()) {
+ if (is.null(schema)) {
+ schema <- nanoarrow::na_struct()
+ }
+
+ schema <- nanoarrow::as_nanoarrow_schema(schema)
+ data_names <- as.character(names(schema$children))
+ data <- lapply(data_names, sd_expr_column)
+ names(data) <- data_names
+
+ structure(
+ list(
+ factory = sd_expr_factory(),
+ schema = schema,
+ data = rlang::as_data_mask(data),
+ env = env,
+ fns = default_fns
+ ),
+ class = "sedonadb_expr_ctx"
+ )
+}
+
+#' Register an R function translation into a SedonaDB expression
+#'
+#' @param qualified_name The name of the function in the form `pkg::fun` or
+#' `fun` if the package name is not relevant. This allows translations to
+#' support calls to `fun()` or `pkg::fun()` that appear in an R expression.
+#' @param fn A function. The first argument must always be `.ctx`, which
+#' is the instance of `sd_expr_ctx()` that may be used to construct
+#' the required expressions (using `$factory`).
+#'
+#' @returns fn, invisibly
+#' @noRd
+sd_register_translation <- function(qualified_name, fn) {
+ stopifnot(is.function(fn))
+
+ pieces <- strsplit(qualified_name, "::")[[1]]
+ unqualified_name <- pieces[[2]]
+
+ default_fns[[qualified_name]] <- default_fns[[unqualified_name]] <- fn
+ invisible(fn)
+}
+
+default_fns <- new.env(parent = emptyenv())
+
+# Register translations lazily because SQL users don't need them and because
+# we need rlang for this and it is currently in Suggests
+ensure_translations_registered <- function() {
+ if (!is.null(default_fns$abs)) {
+ return()
+ }
+
+ sd_register_translation("base::abs", function(.ctx, x) {
+ sd_expr_scalar_function("abs", list(x), factory = .ctx$factory)
+ })
+
+ sd_register_translation("base::sum", function(.ctx, x, ..., na.rm = FALSE) {
+ sd_expr_aggregate_function("sum", list(x), na.rm = na.rm, factory =
.ctx$factory)
+ })
+
+ sd_register_translation("base::+", function(.ctx, lhs, rhs) {
+ if (missing(rhs)) {
+ # Use a double negative to ensure this fails for non-numeric types
+ sd_expr_negative(sd_expr_negative(lhs, factory = .ctx$factory), factory
= .ctx$factory)
+ } else {
+ sd_expr_binary("+", lhs, rhs, factory = .ctx$factory)
+ }
+ })
+
+ sd_register_translation("base::-", function(.ctx, lhs, rhs) {
+ if (missing(rhs)) {
+ sd_expr_negative(lhs, factory = .ctx$factory)
+ } else {
+ sd_expr_binary("-", lhs, rhs, factory = .ctx$factory)
+ }
+ })
+
+ for (op in c("==", "!=", ">", ">=", "<", "<=", "*", "/", "&", "|")) {
+ sd_register_translation(paste0("base::", op), rlang::inject(function(.ctx,
lhs, rhs) {
+ sd_expr_binary(!!op, lhs, rhs, factory = .ctx$factory)
+ }))
+ }
+}
diff --git a/r/sedonadb/R/literal.R b/r/sedonadb/R/literal.R
new file mode 100644
index 00000000..679239ca
--- /dev/null
+++ b/r/sedonadb/R/literal.R
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' S3 Generic to create a SedonaDB literal expression
+#'
+#' This generic provides the opportunity for objects to register a mechanism
+#' to be understood as literals in the context of a SedonaDB expression.
+#' Users constructing expressions directly should use [sd_expr_literal()].
+#'
+#' @param x An object to convert to a SedonaDB literal
+#' @param ... Passed to/from methods
+#' @param type An optional data type to request for the output
+#' @param factory An `sd_expr_factory()` that should be passed to any
+#' other calls to `as_sedonadb_literal()` if needed
+#'
+#' @returns An object of class SedonaDBExpr
+#' @export
+#'
+#' @examples
+#' as_sedonadb_literal("abcd")
+#'
+as_sedonadb_literal <- function(x, ..., type = NULL, factory = NULL) {
+ UseMethod("as_sedonadb_literal")
+}
+
+#' @export
+as_sedonadb_literal.NULL <- function(x, ..., type = NULL) {
+ na <- nanoarrow::nanoarrow_array_init(nanoarrow::na_na()) |>
+ nanoarrow::nanoarrow_array_modify(list(length = 1L, null_count = 1L))
+ as_sedonadb_literal_from_nanoarrow(na, ..., type = type)
+}
+
+#' @export
+as_sedonadb_literal.character <- function(x, ..., type = NULL) {
+ as_sedonadb_literal_from_nanoarrow(x, ..., type = type)
+}
+
+#' @export
+as_sedonadb_literal.integer <- function(x, ..., type = NULL) {
+ as_sedonadb_literal_from_nanoarrow(x, ..., type = type)
+}
+
+#' @export
+as_sedonadb_literal.double <- function(x, ..., type = NULL) {
+ as_sedonadb_literal_from_nanoarrow(x, ..., type = type)
+}
+
+#' @export
+as_sedonadb_literal.raw <- function(x, ..., type = NULL) {
+ as_sedonadb_literal_from_nanoarrow(list(x), ..., type = type)
+}
+
+#' @export
+as_sedonadb_literal.wk_wkb <- function(x, ..., type = NULL) {
+ as_sedonadb_literal_from_nanoarrow(x, ..., type = type)
+}
+
+as_sedonadb_literal_from_nanoarrow <- function(x, ..., type = NULL) {
+ array <- nanoarrow::as_nanoarrow_array(x)
+ if (array$length != 1L) {
+ stop("Can't convert non-scalar to sedonadb_expr")
+ }
+
+ as_sedonadb_literal(array, type = type)
+}
+
+#' @export
+as_sedonadb_literal.nanoarrow_array <- function(x, ..., type = NULL) {
+ schema <- nanoarrow::infer_nanoarrow_schema(x)
+
+ array_export <- nanoarrow::nanoarrow_allocate_array()
+ nanoarrow::nanoarrow_pointer_export(x, array_export)
+
+ expr <- SedonaDBExprFactory$literal(array_export, schema)
+ handle_type_request(expr, type)
+}
+
+handle_type_request <- function(x, type) {
+ if (!is.null(type)) {
+ x$cast(nanoarrow::as_nanoarrow_schema(type))
+ } else {
+ x
+ }
+}
diff --git a/r/sedonadb/man/as_sedonadb_literal.Rd
b/r/sedonadb/man/as_sedonadb_literal.Rd
new file mode 100644
index 00000000..448b0222
--- /dev/null
+++ b/r/sedonadb/man/as_sedonadb_literal.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/literal.R
+\name{as_sedonadb_literal}
+\alias{as_sedonadb_literal}
+\title{S3 Generic to create a SedonaDB literal expression}
+\usage{
+as_sedonadb_literal(x, ..., type = NULL, factory = NULL)
+}
+\arguments{
+\item{x}{An object to convert to a SedonaDB literal}
+
+\item{...}{Passed to/from methods}
+
+\item{type}{An optional data type to request for the output}
+
+\item{factory}{An \code{sd_expr_factory()} that should be passed to any
+other calls to \code{as_sedonadb_literal()} if needed}
+}
+\value{
+An object of class SedonaDBExpr
+}
+\description{
+This generic provides the opportunity for objects to register a mechanism
+to be understood as literals in the context of a SedonaDB expression.
+Users constructing expressions directly should use
\code{\link[=sd_expr_literal]{sd_expr_literal()}}.
+}
+\examples{
+as_sedonadb_literal("abcd")
+
+}
diff --git a/r/sedonadb/man/sd_expr_column.Rd b/r/sedonadb/man/sd_expr_column.Rd
new file mode 100644
index 00000000..50c41a89
--- /dev/null
+++ b/r/sedonadb/man/sd_expr_column.Rd
@@ -0,0 +1,82 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/expression.R
+\name{sd_expr_column}
+\alias{sd_expr_column}
+\alias{sd_expr_literal}
+\alias{sd_expr_binary}
+\alias{sd_expr_negative}
+\alias{sd_expr_scalar_function}
+\alias{sd_expr_aggregate_function}
+\alias{sd_expr_cast}
+\alias{sd_expr_alias}
+\alias{as_sd_expr}
+\alias{is_sd_expr}
+\alias{sd_expr_factory}
+\title{Create SedonaDB logical expressions}
+\usage{
+sd_expr_column(column_name, qualifier = NULL, factory = sd_expr_factory())
+
+sd_expr_literal(x, type = NULL, factory = sd_expr_factory())
+
+sd_expr_binary(op, lhs, rhs, factory = sd_expr_factory())
+
+sd_expr_negative(expr, factory = sd_expr_factory())
+
+sd_expr_scalar_function(function_name, args, factory = sd_expr_factory())
+
+sd_expr_aggregate_function(
+ function_name,
+ args,
+ ...,
+ na.rm = FALSE,
+ distinct = FALSE,
+ factory = sd_expr_factory()
+)
+
+sd_expr_cast(expr, type, factory = sd_expr_factory())
+
+sd_expr_alias(expr, alias, factory = sd_expr_factory())
+
+as_sd_expr(x, factory = sd_expr_factory())
+
+is_sd_expr(x)
+
+sd_expr_factory()
+}
+\arguments{
+\item{column_name}{A column name}
+
+\item{qualifier}{An optional qualifier (e.g., table reference) that may be
+used to disambiguate a specific reference}
+
+\item{factory}{A \code{\link[=sd_expr_factory]{sd_expr_factory()}}. This
factory wraps a SedonaDB context
+and is used to resolve scalar functions and/or retrieve options.}
+
+\item{type}{A destination type into which \code{expr} should be cast.}
+
+\item{op}{Operator name for a binary expression. In general these follow
+R function names (e.g., \code{>}, \code{<}, \code{+}, \code{-}).}
+
+\item{lhs, rhs}{Arguments to a binary expression}
+
+\item{expr}{A SedonaDBExpr or object coercible to one with
\code{\link[=as_sd_expr]{as_sd_expr()}}.}
+
+\item{function_name}{The name of the function to call. This name is resolved
+from the context associated with \code{factory}.}
+
+\item{alias}{An alias to apply to \code{expr}.}
+}
+\value{
+An object of class SedonaDBExpr
+}
+\description{
+Create SedonaDB logical expressions
+}
+\examples{
+sd_expr_column("foofy")
+sd_expr_literal(1L)
+sd_expr_scalar_function("abs", list(1L))
+sd_expr_cast(1L, nanoarrow::na_int64())
+sd_expr_alias(1L, "foofy")
+
+}
diff --git a/r/sedonadb/src/init.c b/r/sedonadb/src/init.c
index 8405b2cc..6faa1521 100644
--- a/r/sedonadb/src/init.c
+++ b/r/sedonadb/src/init.c
@@ -209,6 +209,75 @@ SEXP savvy_InternalDataFrame_to_view__impl(SEXP self__,
SEXP c_arg__ctx,
return handle_result(res);
}
+SEXP savvy_SedonaDBExpr_alias__impl(SEXP self__, SEXP c_arg__name) {
+ SEXP res = savvy_SedonaDBExpr_alias__ffi(self__, c_arg__name);
+ return handle_result(res);
+}
+
+SEXP savvy_SedonaDBExpr_cast__impl(SEXP self__, SEXP c_arg__schema_xptr) {
+ SEXP res = savvy_SedonaDBExpr_cast__ffi(self__, c_arg__schema_xptr);
+ return handle_result(res);
+}
+
+SEXP savvy_SedonaDBExpr_debug_string__impl(SEXP self__) {
+ SEXP res = savvy_SedonaDBExpr_debug_string__ffi(self__);
+ return handle_result(res);
+}
+
+SEXP savvy_SedonaDBExpr_display__impl(SEXP self__) {
+ SEXP res = savvy_SedonaDBExpr_display__ffi(self__);
+ return handle_result(res);
+}
+
+SEXP savvy_SedonaDBExpr_negate__impl(SEXP self__) {
+ SEXP res = savvy_SedonaDBExpr_negate__ffi(self__);
+ return handle_result(res);
+}
+
+SEXP savvy_SedonaDBExprFactory_aggregate_function__impl(SEXP self__,
+ SEXP c_arg__name,
+ SEXP c_arg__args,
+ SEXP c_arg__na_rm,
+ SEXP c_arg__distinct) {
+ SEXP res = savvy_SedonaDBExprFactory_aggregate_function__ffi(
+ self__, c_arg__name, c_arg__args, c_arg__na_rm, c_arg__distinct);
+ return handle_result(res);
+}
+
+SEXP savvy_SedonaDBExprFactory_binary__impl(SEXP self__, SEXP c_arg__op,
+ SEXP c_arg__lhs, SEXP c_arg__rhs) {
+ SEXP res = savvy_SedonaDBExprFactory_binary__ffi(self__, c_arg__op,
+ c_arg__lhs, c_arg__rhs);
+ return handle_result(res);
+}
+
+SEXP savvy_SedonaDBExprFactory_column__impl(SEXP self__, SEXP c_arg__name,
+ SEXP c_arg__qualifier) {
+ SEXP res = savvy_SedonaDBExprFactory_column__ffi(self__, c_arg__name,
+ c_arg__qualifier);
+ return handle_result(res);
+}
+
+SEXP savvy_SedonaDBExprFactory_literal__impl(SEXP c_arg__array_xptr,
+ SEXP c_arg__schema_xptr) {
+ SEXP res = savvy_SedonaDBExprFactory_literal__ffi(c_arg__array_xptr,
+ c_arg__schema_xptr);
+ return handle_result(res);
+}
+
+SEXP savvy_SedonaDBExprFactory_new__impl(SEXP c_arg__ctx) {
+ SEXP res = savvy_SedonaDBExprFactory_new__ffi(c_arg__ctx);
+ return handle_result(res);
+}
+
+SEXP savvy_SedonaDBExprFactory_scalar_function__impl(SEXP self__,
+ SEXP c_arg__name,
+ SEXP c_arg__args) {
+ SEXP res = savvy_SedonaDBExprFactory_scalar_function__ffi(self__,
c_arg__name,
+ c_arg__args);
+ return handle_result(res);
+}
+
static const R_CallMethodDef CallEntries[] = {
{"savvy_configure_proj_shared__impl",
(DL_FUNC)&savvy_configure_proj_shared__impl, 3},
@@ -258,6 +327,28 @@ static const R_CallMethodDef CallEntries[] = {
(DL_FUNC)&savvy_InternalDataFrame_to_provider__impl, 1},
{"savvy_InternalDataFrame_to_view__impl",
(DL_FUNC)&savvy_InternalDataFrame_to_view__impl, 4},
+ {"savvy_SedonaDBExpr_alias__impl",
(DL_FUNC)&savvy_SedonaDBExpr_alias__impl,
+ 2},
+ {"savvy_SedonaDBExpr_cast__impl", (DL_FUNC)&savvy_SedonaDBExpr_cast__impl,
+ 2},
+ {"savvy_SedonaDBExpr_debug_string__impl",
+ (DL_FUNC)&savvy_SedonaDBExpr_debug_string__impl, 1},
+ {"savvy_SedonaDBExpr_display__impl",
+ (DL_FUNC)&savvy_SedonaDBExpr_display__impl, 1},
+ {"savvy_SedonaDBExpr_negate__impl",
+ (DL_FUNC)&savvy_SedonaDBExpr_negate__impl, 1},
+ {"savvy_SedonaDBExprFactory_aggregate_function__impl",
+ (DL_FUNC)&savvy_SedonaDBExprFactory_aggregate_function__impl, 5},
+ {"savvy_SedonaDBExprFactory_binary__impl",
+ (DL_FUNC)&savvy_SedonaDBExprFactory_binary__impl, 4},
+ {"savvy_SedonaDBExprFactory_column__impl",
+ (DL_FUNC)&savvy_SedonaDBExprFactory_column__impl, 3},
+ {"savvy_SedonaDBExprFactory_literal__impl",
+ (DL_FUNC)&savvy_SedonaDBExprFactory_literal__impl, 2},
+ {"savvy_SedonaDBExprFactory_new__impl",
+ (DL_FUNC)&savvy_SedonaDBExprFactory_new__impl, 1},
+ {"savvy_SedonaDBExprFactory_scalar_function__impl",
+ (DL_FUNC)&savvy_SedonaDBExprFactory_scalar_function__impl, 3},
{NULL, NULL, 0}};
void R_init_sedonadb(DllInfo *dll) {
diff --git a/r/sedonadb/src/rust/api.h b/r/sedonadb/src/rust/api.h
index 201039e1..fac6258b 100644
--- a/r/sedonadb/src/rust/api.h
+++ b/r/sedonadb/src/rust/api.h
@@ -60,3 +60,27 @@ SEXP savvy_InternalDataFrame_to_provider__ffi(SEXP self__);
SEXP savvy_InternalDataFrame_to_view__ffi(SEXP self__, SEXP c_arg__ctx,
SEXP c_arg__table_ref,
SEXP c_arg__overwrite);
+
+// methods and associated functions for SedonaDBExpr
+SEXP savvy_SedonaDBExpr_alias__ffi(SEXP self__, SEXP c_arg__name);
+SEXP savvy_SedonaDBExpr_cast__ffi(SEXP self__, SEXP c_arg__schema_xptr);
+SEXP savvy_SedonaDBExpr_debug_string__ffi(SEXP self__);
+SEXP savvy_SedonaDBExpr_display__ffi(SEXP self__);
+SEXP savvy_SedonaDBExpr_negate__ffi(SEXP self__);
+
+// methods and associated functions for SedonaDBExprFactory
+SEXP savvy_SedonaDBExprFactory_aggregate_function__ffi(SEXP self__,
+ SEXP c_arg__name,
+ SEXP c_arg__args,
+ SEXP c_arg__na_rm,
+ SEXP c_arg__distinct);
+SEXP savvy_SedonaDBExprFactory_binary__ffi(SEXP self__, SEXP c_arg__op,
+ SEXP c_arg__lhs, SEXP c_arg__rhs);
+SEXP savvy_SedonaDBExprFactory_column__ffi(SEXP self__, SEXP c_arg__name,
+ SEXP c_arg__qualifier);
+SEXP savvy_SedonaDBExprFactory_literal__ffi(SEXP c_arg__array_xptr,
+ SEXP c_arg__schema_xptr);
+SEXP savvy_SedonaDBExprFactory_new__ffi(SEXP c_arg__ctx);
+SEXP savvy_SedonaDBExprFactory_scalar_function__ffi(SEXP self__,
+ SEXP c_arg__name,
+ SEXP c_arg__args);
diff --git a/r/sedonadb/src/rust/src/expression.rs
b/r/sedonadb/src/rust/src/expression.rs
new file mode 100644
index 00000000..0add4b53
--- /dev/null
+++ b/r/sedonadb/src/rust/src/expression.rs
@@ -0,0 +1,199 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use datafusion_common::{Column, ScalarValue};
+use datafusion_expr::{
+ expr::{AggregateFunction, FieldMetadata, NullTreatment, ScalarFunction},
+ BinaryExpr, Cast, Expr, Operator,
+};
+use savvy::{savvy, savvy_err, EnvironmentSexp};
+use sedona::context::SedonaContext;
+
+use crate::{
+ context::InternalContext,
+ ffi::{import_array, import_field},
+};
+
+#[savvy]
+pub struct SedonaDBExpr {
+ pub inner: Expr,
+}
+
+#[savvy]
+impl SedonaDBExpr {
+ fn display(&self) -> savvy::Result<savvy::Sexp> {
+ format!("{}", self.inner).try_into()
+ }
+
+ fn debug_string(&self) -> savvy::Result<savvy::Sexp> {
+ format!("{:?}", self.inner).try_into()
+ }
+
+ fn alias(&self, name: &str) -> savvy::Result<SedonaDBExpr> {
+ let inner = self.inner.clone().alias_if_changed(name.to_string())?;
+ Ok(Self { inner })
+ }
+
+ fn cast(&self, schema_xptr: savvy::Sexp) -> savvy::Result<SedonaDBExpr> {
+ let field = import_field(schema_xptr)?;
+ if let Some(type_name) = field.extension_type_name() {
+ return Err(savvy_err!(
+ "Can't cast to Arrow extension type '{type_name}'"
+ ));
+ }
+
+ let inner = Expr::Cast(Cast::new(
+ self.inner.clone().into(),
+ field.data_type().clone(),
+ ));
+
+ Ok(Self { inner })
+ }
+
+ fn negate(&self) -> savvy::Result<SedonaDBExpr> {
+ let inner = Expr::Negative(Box::new(self.inner.clone()));
+ Ok(Self { inner })
+ }
+}
+
+#[savvy]
+pub struct SedonaDBExprFactory {
+ pub ctx: Arc<SedonaContext>,
+}
+
+#[savvy]
+impl SedonaDBExprFactory {
+ fn new(ctx: &InternalContext) -> Self {
+ Self {
+ ctx: ctx.inner.clone(),
+ }
+ }
+
+ fn literal(array_xptr: savvy::Sexp, schema_xptr: savvy::Sexp) ->
savvy::Result<SedonaDBExpr> {
+ let (field, array_ref) = import_array(array_xptr, schema_xptr)?;
+ let metadata = if field.metadata().is_empty() {
+ None
+ } else {
+ Some(FieldMetadata::new_from_field(&field))
+ };
+
+ let scalar_value = ScalarValue::try_from_array(&array_ref, 0)?;
+ let inner = Expr::Literal(scalar_value, metadata);
+ Ok(SedonaDBExpr { inner })
+ }
+
+ fn column(&self, name: &str, qualifier: Option<&str>) ->
savvy::Result<SedonaDBExpr> {
+ let inner = Expr::Column(Column::new(qualifier, name));
+ Ok(SedonaDBExpr { inner })
+ }
+
+ fn binary(
+ &self,
+ op: &str,
+ lhs: &SedonaDBExpr,
+ rhs: &SedonaDBExpr,
+ ) -> savvy::Result<SedonaDBExpr> {
+ let operator = match op {
+ "==" => Operator::Eq,
+ "!=" => Operator::NotEq,
+ ">" => Operator::Gt,
+ ">=" => Operator::GtEq,
+ "<" => Operator::Lt,
+ "<=" => Operator::LtEq,
+ "+" => Operator::Plus,
+ "-" => Operator::Minus,
+ "*" => Operator::Multiply,
+ "/" => Operator::Divide,
+ "&" => Operator::And,
+ "|" => Operator::Or,
+ other => return Err(savvy_err!("Unimplemented binary operation
'{other}'")),
+ };
+
+ let inner = Expr::BinaryExpr(BinaryExpr::new(
+ Box::new(lhs.inner.clone()),
+ operator,
+ Box::new(rhs.inner.clone()),
+ ));
+ Ok(SedonaDBExpr { inner })
+ }
+
+ fn scalar_function(&self, name: &str, args: savvy::Sexp) ->
savvy::Result<SedonaDBExpr> {
+ if let Some(udf) = self.ctx.ctx.state().scalar_functions().get(name) {
+ let args = Self::exprs(args)?;
+ let inner =
Expr::ScalarFunction(ScalarFunction::new_udf(udf.clone(), args));
+ Ok(SedonaDBExpr { inner })
+ } else {
+ Err(savvy_err!("Scalar UDF '{name}' not found"))
+ }
+ }
+
+ fn aggregate_function(
+ &self,
+ name: &str,
+ args: savvy::Sexp,
+ na_rm: Option<bool>,
+ distinct: Option<bool>,
+ ) -> savvy::Result<SedonaDBExpr> {
+ if let Some(udf) =
self.ctx.ctx.state().aggregate_functions().get(name) {
+ let args = Self::exprs(args)?;
+ let null_treatment = if na_rm.unwrap_or(true) {
+ NullTreatment::IgnoreNulls
+ } else {
+ NullTreatment::RespectNulls
+ };
+
+ let inner = Expr::AggregateFunction(AggregateFunction::new_udf(
+ udf.clone(),
+ args,
+ distinct.unwrap_or(false),
+ None, // filter
+ vec![], // order by
+ Some(null_treatment),
+ ));
+
+ Ok(SedonaDBExpr { inner })
+ } else {
+ Err(savvy_err!("Aggregate UDF '{name}' not found"))
+ }
+ }
+}
+
+impl SedonaDBExprFactory {
+ fn exprs(exprs_sexp: savvy::Sexp) -> savvy::Result<Vec<Expr>> {
+ savvy::ListSexp::try_from(exprs_sexp)?
+ .iter()
+ .map(|(_, item)| -> savvy::Result<Expr> {
+ // item here is the Environment wrapper around the external
pointer
+ let expr_wrapper: &SedonaDBExpr =
EnvironmentSexp::try_from(item)?.try_into()?;
+ Ok(expr_wrapper.inner.clone())
+ })
+ .collect()
+ }
+}
+
+impl TryFrom<EnvironmentSexp> for &SedonaDBExpr {
+ type Error = savvy::Error;
+
+ fn try_from(env: EnvironmentSexp) -> Result<Self, Self::Error> {
+ env.get(".ptr")?
+ .map(<&SedonaDBExpr>::try_from)
+ .transpose()?
+ .ok_or(savvy_err!("Invalid SedonaDBExpr object."))
+ }
+}
diff --git a/r/sedonadb/src/rust/src/ffi.rs b/r/sedonadb/src/rust/src/ffi.rs
index 4275e264..828364c3 100644
--- a/r/sedonadb/src/rust/src/ffi.rs
+++ b/r/sedonadb/src/rust/src/ffi.rs
@@ -18,10 +18,11 @@
use std::sync::Arc;
use arrow_array::{
- ffi::FFI_ArrowSchema,
+ ffi::{from_ffi_and_data_type, FFI_ArrowArray, FFI_ArrowSchema},
ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream},
+ make_array, ArrayRef,
};
-use arrow_schema::Schema;
+use arrow_schema::{Field, Schema};
use datafusion::catalog::TableProvider;
use datafusion_expr::ScalarUDF;
use datafusion_ffi::{
@@ -36,6 +37,25 @@ pub fn import_schema(mut xptr: savvy::Sexp) ->
savvy::Result<Schema> {
Ok(schema)
}
+pub fn import_field(mut xptr: savvy::Sexp) -> savvy::Result<Field> {
+ let ffi_schema: &FFI_ArrowSchema = import_xptr(&mut xptr,
"nanoarrow_schema")?;
+ let schema = Field::try_from(ffi_schema)?;
+ Ok(schema)
+}
+
+pub fn import_array(
+ mut xptr: savvy::Sexp,
+ schema_xptr: savvy::Sexp,
+) -> savvy::Result<(Field, ArrayRef)> {
+ let field = import_field(schema_xptr)?;
+ let ffi_array_ref: &mut FFI_ArrowArray = import_xptr(&mut xptr,
"nanoarrow_array")?;
+ let ffi_array_owned = unsafe { FFI_ArrowArray::from_raw(ffi_array_ref as
_) };
+ let array_data =
+ unsafe { from_ffi_and_data_type(ffi_array_owned as _,
field.data_type().clone())? };
+ let array_ref = make_array(array_data);
+ Ok((field, array_ref))
+}
+
pub fn import_array_stream(mut xptr: savvy::Sexp) ->
savvy::Result<ArrowArrayStreamReader> {
let ffi_stream: &mut FFI_ArrowArrayStream = import_xptr(&mut xptr,
"nanoarrow_array_stream")?;
let reader = unsafe { ArrowArrayStreamReader::from_raw(ffi_stream as _)? };
diff --git a/r/sedonadb/src/rust/src/lib.rs b/r/sedonadb/src/rust/src/lib.rs
index 07c6f311..84251908 100644
--- a/r/sedonadb/src/rust/src/lib.rs
+++ b/r/sedonadb/src/rust/src/lib.rs
@@ -27,6 +27,7 @@ use sedona_proj::register::{configure_global_proj_engine,
ProjCrsEngineBuilder};
mod context;
mod dataframe;
mod error;
+mod expression;
mod ffi;
mod runtime;
diff --git a/r/sedonadb/tests/testthat/_snaps/expression.md
b/r/sedonadb/tests/testthat/_snaps/expression.md
new file mode 100644
index 00000000..5d7452a5
--- /dev/null
+++ b/r/sedonadb/tests/testthat/_snaps/expression.md
@@ -0,0 +1,170 @@
+# basic expression types can be constructed
+
+ Code
+ sd_expr_column("foofy")
+ Output
+ <SedonaDBExpr>
+ foofy
+
+---
+
+ Code
+ sd_expr_literal(1L)
+ Output
+ <SedonaDBExpr>
+ Int32(1)
+
+---
+
+ Code
+ sd_expr_scalar_function("abs", list(1L))
+ Output
+ <SedonaDBExpr>
+ abs(Int32(1))
+
+---
+
+ Code
+ sd_expr_cast(1L, nanoarrow::na_int64())
+ Output
+ <SedonaDBExpr>
+ CAST(Int32(1) AS Int64)
+
+---
+
+ Code
+ sd_expr_alias(1L, "foofy")
+ Output
+ <SedonaDBExpr>
+ Int32(1) AS foofy
+
+---
+
+ Code
+ sd_expr_binary("+", 1L, 2L)
+ Output
+ <SedonaDBExpr>
+ Int32(1) + Int32(2)
+
+---
+
+ Code
+ sd_expr_negative(1L)
+ Output
+ <SedonaDBExpr>
+ (- Int32(1))
+
+---
+
+ Code
+ sd_expr_aggregate_function("sum", list(1L))
+ Output
+ <SedonaDBExpr>
+ sum(Int32(1)) RESPECT NULLS
+
+# literal expressions can be translated
+
+ Code
+ sd_eval_expr(quote(1L))
+ Output
+ <SedonaDBExpr>
+ Int32(1)
+
+# column expressions can be translated
+
+ Code
+ sd_eval_expr(quote(col0), expr_ctx)
+ Output
+ <SedonaDBExpr>
+ col0
+
+---
+
+ Code
+ sd_eval_expr(quote(.data$col0), expr_ctx)
+ Output
+ <SedonaDBExpr>
+ col0
+
+---
+
+ Code
+ sd_eval_expr(quote(.data[[col_zero]]), expr_ctx)
+ Output
+ <SedonaDBExpr>
+ col0
+
+# function calls with a translation become function calls
+
+ Code
+ sd_eval_expr(quote(abs(-1L)))
+ Output
+ <SedonaDBExpr>
+ abs((- Int32(1)))
+
+---
+
+ Code
+ sd_eval_expr(quote(base::abs(-1L)))
+ Output
+ <SedonaDBExpr>
+ abs((- Int32(1)))
+
+# function calls without a translation are evaluated in R
+
+ Code
+ sd_eval_expr(quote(function_without_a_translation(1L)))
+ Output
+ <SedonaDBExpr>
+ Int32(2)
+
+# function calls that map to binary expressions are translated
+
+ Code
+ sd_eval_expr(quote(+2))
+ Output
+ <SedonaDBExpr>
+ (- (- Float64(2)))
+
+---
+
+ Code
+ sd_eval_expr(quote(1 + 2))
+ Output
+ <SedonaDBExpr>
+ Float64(1) + Float64(2)
+
+---
+
+ Code
+ sd_eval_expr(quote(-2))
+ Output
+ <SedonaDBExpr>
+ (- Float64(2))
+
+---
+
+ Code
+ sd_eval_expr(quote(1 - 2))
+ Output
+ <SedonaDBExpr>
+ Float64(1) - Float64(2)
+
+---
+
+ Code
+ sd_eval_expr(quote(1 > 2))
+ Output
+ <SedonaDBExpr>
+ Float64(1) > Float64(2)
+
+# errors that occur during evaluation have reasonable context
+
+ Code
+ sd_eval_expr(quote(stop("this will error")))
+ Condition
+ Error in `sd_eval_expr()`:
+ ! Error evaluating translated expression `stop("this will error")`
+ Caused by error:
+ ! this will error
+
diff --git a/r/sedonadb/tests/testthat/_snaps/literal.md
b/r/sedonadb/tests/testthat/_snaps/literal.md
new file mode 100644
index 00000000..c115e766
--- /dev/null
+++ b/r/sedonadb/tests/testthat/_snaps/literal.md
@@ -0,0 +1,8 @@
+# literals with Arrow extension metadata can be converted to literals
+
+ Code
+ as_sedonadb_literal(wk::as_wkb("POINT (0 1)"))
+ Output
+ <SedonaDBExpr>
+ Binary("1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,240,63") FieldMetadata {
inner: {"ARROW:extension:metadata": "{}", "ARROW:extension:name":
"geoarrow.wkb"} }
+
diff --git a/r/sedonadb/tests/testthat/test-expression.R
b/r/sedonadb/tests/testthat/test-expression.R
new file mode 100644
index 00000000..f0f3d5af
--- /dev/null
+++ b/r/sedonadb/tests/testthat/test-expression.R
@@ -0,0 +1,80 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("basic expression types can be constructed", {
+ expect_snapshot(sd_expr_column("foofy"))
+ expect_snapshot(sd_expr_literal(1L))
+ expect_snapshot(sd_expr_scalar_function("abs", list(1L)))
+ expect_snapshot(sd_expr_cast(1L, nanoarrow::na_int64()))
+ expect_snapshot(sd_expr_alias(1L, "foofy"))
+ expect_snapshot(sd_expr_binary("+", 1L, 2L))
+ expect_snapshot(sd_expr_negative(1L))
+ expect_snapshot(sd_expr_aggregate_function("sum", list(1L)))
+})
+
+test_that("casts to a type with extension metadata can't be constructed", {
+ expect_error(
+ sd_expr_cast(1L, geoarrow::geoarrow_wkb()),
+ "Can't cast to Arrow extension type 'geoarrow.wkb'"
+ )
+})
+
+test_that("literal expressions can be translated", {
+ expect_snapshot(sd_eval_expr(quote(1L)))
+})
+
+test_that("column expressions can be translated", {
+ schema <- nanoarrow::na_struct(list(col0 = nanoarrow::na_int32()))
+ expr_ctx <- sd_expr_ctx(schema)
+
+ expect_snapshot(sd_eval_expr(quote(col0), expr_ctx))
+ expect_snapshot(sd_eval_expr(quote(.data$col0), expr_ctx))
+ col_zero <- "col0"
+ expect_snapshot(sd_eval_expr(quote(.data[[col_zero]]), expr_ctx))
+
+ expect_error(
+ sd_eval_expr(quote(col1), expr_ctx),
+ "object 'col1' not found"
+ )
+})
+
+test_that("function calls with a translation become function calls", {
+ # Should work for the qualified or unqualified versions
+ expect_snapshot(sd_eval_expr(quote(abs(-1L))))
+ expect_snapshot(sd_eval_expr(quote(base::abs(-1L))))
+})
+
+test_that("function calls without a translation are evaluated in R", {
+ function_without_a_translation <- function(x) x + 1L
+ expect_snapshot(sd_eval_expr(quote(function_without_a_translation(1L))))
+})
+
+test_that("function calls that map to binary expressions are translated", {
+ # + and - are special-cased because in R the unary function calls are valid
+ expect_snapshot(sd_eval_expr(quote(+2)))
+ expect_snapshot(sd_eval_expr(quote(1 + 2)))
+ expect_snapshot(sd_eval_expr(quote(-2)))
+ expect_snapshot(sd_eval_expr(quote(1 - 2)))
+
+ # normal translation
+ expect_snapshot(sd_eval_expr(quote(1 > 2)))
+})
+
+test_that("errors that occur during evaluation have reasonable context", {
+ function_without_a_translation <- function(x) x + 1L
+ expect_snapshot(sd_eval_expr(quote(stop("this will error"))), error = TRUE)
+})
diff --git a/r/sedonadb/tests/testthat/test-literal.R
b/r/sedonadb/tests/testthat/test-literal.R
new file mode 100644
index 00000000..a9f09406
--- /dev/null
+++ b/r/sedonadb/tests/testthat/test-literal.R
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("basic literals can be converted to expressions", {
+ expect_identical(
+ as_sedonadb_literal(NULL)$debug_string(),
+ 'Literal(NULL, None)'
+ )
+
+ expect_identical(
+ as_sedonadb_literal("foofy")$debug_string(),
+ 'Literal(Utf8("foofy"), None)'
+ )
+
+ expect_identical(
+ as_sedonadb_literal(1L)$debug_string(),
+ 'Literal(Int32(1), None)'
+ )
+
+ expect_identical(
+ as_sedonadb_literal(1.0)$debug_string(),
+ 'Literal(Float64(1), None)'
+ )
+
+ expect_identical(
+ as_sedonadb_literal(as.raw(c(1:3)))$debug_string(),
+ 'Literal(Binary("1,2,3"), None)'
+ )
+})
+
+test_that("literals can request a type", {
+ expect_identical(
+ as_sedonadb_literal(1.0, type = nanoarrow::na_float())$debug_string(),
+ "Cast(Cast { expr: Literal(Float64(1), None), data_type: Float32 })"
+ )
+})
+
+test_that("literals with Arrow extension metadata can be converted to
literals", {
+ expect_snapshot(as_sedonadb_literal(wk::as_wkb("POINT (0 1)")))
+})
+
+test_that("non-scalars can't be automatically converted to literals", {
+ expect_error(
+ as_sedonadb_literal(1:5)$debug_string(),
+ "Can't convert non-scalar to sedonadb_expr"
+ )
+})
diff --git a/r/sedonadb/tools/savvy-update.sh b/r/sedonadb/tools/savvy-update.sh
new file mode 100755
index 00000000..ddba6259
--- /dev/null
+++ b/r/sedonadb/tools/savvy-update.sh
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+set -eu
+
+main() {
+ local -r source_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ local -r source_rpkg_dir="$(cd "${source_dir}/../" && pwd)"
+
+ # Run the updater
+ savvy-cli update "${source_rpkg_dir}"
+
+ # Post-process files
+ local -r api_h="${source_rpkg_dir}/src/rust/api.h"
+ local -r init_c="${source_rpkg_dir}/src/init.c"
+ local -r wrappers_r="${source_rpkg_dir}/R/000-wrappers.R"
+
+ mv "${api_h}" "${api_h}.tmp"
+ mv "${init_c}" "${init_c}.tmp"
+ mv "${wrappers_r}" "${wrappers_r}.tmp"
+
+ # Add license header to api.h
+ echo "${LICENSE_C}" > "${api_h}"
+ cat "${api_h}.tmp" >> "${api_h}"
+
+ # Add license header, put includes on their own lines, and fix a typo in
init.c
+ echo "${LICENSE_C}" > "${init_c}"
+ sed 's/#include/\n#include/g' "${init_c}.tmp" | \
+ sed '1s/^\n//' | \
+ sed 's/initialzation/initialization/g' >> "${init_c}"
+
+ # Add license header to 000-wrappers.R
+ echo "${LICENSE_R}" > "${wrappers_r}"
+ cat "${wrappers_r}.tmp" >> "${wrappers_r}"
+
+ # Run clang-format on the generated C files
+ clang-format -i "${api_h}"
+ clang-format -i "${init_c}"
+
+ # Remove .tmp files
+ rm "${api_h}.tmp" "${init_c}.tmp" "${wrappers_r}.tmp"
+}
+
+LICENSE_R='# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+'
+LICENSE_C='// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+'
+
+main