Copilot commented on code in PR #658:
URL: https://github.com/apache/sedona-db/pull/658#discussion_r2848673393
##########
r/sedonadb/R/context.R:
##########
@@ -15,10 +15,111 @@
# specific language governing permissions and limitations
# under the License.
+#' Create a SedonaDB context
+#'
+#' Runtime options configure the execution environment. Use
+#' `global = TRUE` to configure the global context or use the
+#' returned object as a scoped context. A scoped context is
+#' recommended for programmatic usage as it prevents named
+#' views from interfering with eachother.
+#'
+#' @param global Use TRUE to set options on the global context.
+#' @param memory_limit Maximum memory for query execution, as a
+#' human-readable string (e.g., `"4gb"`, `"512m"`) or `NULL` for
+#' unbounded (the default).
+#' @param temp_dir Directory for temporary/spill files, or `NULL` to
+#' use the DataFusion default.
+#' @param memory_pool_type Memory pool type: `"greedy"` (default) or
+#' `"fair"`. Only takes effect when `memory_limit` is set.
+#' @param unspillable_reserve_ratio Fraction of memory (0--1) reserved for
+#' unspillable consumers. Only applies when `memory_pool_type` is
+#' `"fair"`. Defaults to 0.2 when not explicitly set.
+#' @param ... Reserved for future options
+#'
+#' @returns The constructed context, invisibly.
+#' @export
+#'
+#' @examples
+#' sd_connect(memory_limit = "100mb", memory_pool_type = "fair")
+#'
+#'
+sd_connect <- function(
+ ...,
+ global = FALSE,
+ memory_limit = NULL,
+ temp_dir = NULL,
+ memory_pool_type = NULL,
+ unspillable_reserve_ratio = NULL
+) {
+ unsupported_options <- list(...)
+ if (length(unsupported_options) != 0) {
+ stop(
+ sprintf(
+ "Unrecognized options for sd_connect(): %s",
+ paste(names(unsupported_options), collapse = ", ")
+ )
+ )
+ }
+
+ options <- list()
+
+ if (!is.null(memory_limit)) {
+ stopifnot(is.character(memory_limit), length(memory_limit) == 1L)
+ options[["memory_limit"]] <- memory_limit
+ }
+
+ if (!is.null(temp_dir)) {
+ stopifnot(is.character(temp_dir), length(temp_dir) == 1L)
+ options[["temp_dir"]] <- temp_dir
+ }
+
+ if (!is.null(memory_pool_type)) {
+ memory_pool_type <- match.arg(memory_pool_type, c("greedy", "fair"))
+ options[["memory_pool_type"]] <- memory_pool_type
+ }
+
+ if (!is.null(unspillable_reserve_ratio)) {
+ stopifnot(
+ is.numeric(unspillable_reserve_ratio),
+ length(unspillable_reserve_ratio) == 1L,
+ unspillable_reserve_ratio >= 0,
+ unspillable_reserve_ratio <= 1
+ )
+ options[["unspillable_reserve_ratio"]] <- as.character(
+ unspillable_reserve_ratio
+ )
+ }
+
+ # Don't replace the global context
+ if (global && length(options) > 0 && !is.null(global_ctx$ctx)) {
+ warning(
+ "Cannot change runtime options after the context has been initialized. ",
+ "Set global options with sd_connect() before executing your first query
",
+ "or use a scoped context by passing global = FALSE"
+ )
+
+ return(invisible(global_ctx$ctx))
+ } else if (global && !is.null(global_ctx$ctx)) {
+ return(invisible(global_ctx$ctx))
+ }
+
+ opts <- global_ctx$options
Review Comment:
The function uses `global_ctx$options` (lines 106-108) instead of the local
`options` variable that was built from the function arguments (lines 64-91).
This means user-provided configuration options (memory_limit, temp_dir, etc.)
are ignored when creating the context. The `options` variable should be used
here instead of `global_ctx$options`.
```suggestion
opts <- global_ctx$options
if (is.null(opts)) {
opts <- list()
}
if (length(options) > 0) {
opts[names(options)] <- options
}
```
##########
r/sedonadb/R/context.R:
##########
@@ -15,10 +15,111 @@
# specific language governing permissions and limitations
# under the License.
+#' Create a SedonaDB context
+#'
+#' Runtime options configure the execution environment. Use
+#' `global = TRUE` to configure the global context or use the
+#' returned object as a scoped context. A scoped context is
+#' recommended for programmatic usage as it prevents named
+#' views from interfering with eachother.
Review Comment:
Spelling error: 'eachother' should be 'each other' (two words).
```suggestion
#' views from interfering with each other.
```
##########
r/sedonadb/src/rust/src/dataframe.rs:
##########
@@ -19,6 +19,7 @@ use arrow_array::ffi::FFI_ArrowSchema;
use arrow_array::ffi_stream::FFI_ArrowArrayStream;
use arrow_array::{RecordBatchIterator, RecordBatchReader};
use datafusion::catalog::MemTable;
+use datafusion::config::ConfigField;
Review Comment:
The import `datafusion::config::ConfigField` appears to be unused. It was
added in this PR but is not referenced anywhere in the file.
```suggestion
```
##########
r/sedonadb/man/sd_connect.Rd:
##########
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/context.R
+\name{sd_connect}
+\alias{sd_connect}
+\title{Create a SedonaDB context}
+\usage{
+sd_connect(
+ ...,
+ global = FALSE,
+ memory_limit = NULL,
+ temp_dir = NULL,
+ memory_pool_type = NULL,
+ unspillable_reserve_ratio = NULL
+)
+}
+\arguments{
+\item{...}{Reserved for future options}
+
+\item{global}{Use TRUE to set options on the global context.}
+
+\item{memory_limit}{Maximum memory for query execution, as a
+human-readable string (e.g., \code{"4gb"}, \code{"512m"}) or \code{NULL} for
+unbounded (the default).}
+
+\item{temp_dir}{Directory for temporary/spill files, or \code{NULL} to
+use the DataFusion default.}
+
+\item{memory_pool_type}{Memory pool type: \code{"greedy"} (default) or
+\code{"fair"}. Only takes effect when \code{memory_limit} is set.}
+
+\item{unspillable_reserve_ratio}{Fraction of memory (0--1) reserved for
+unspillable consumers. Only applies when \code{memory_pool_type} is
+\code{"fair"}. Defaults to 0.2 when not explicitly set.}
+}
+\value{
+The constructed context, invisibly.
+}
+\description{
+Runtime options configure the execution environment. Use
+\code{global = TRUE} to configure the global context or use the
+returned object as a scoped context. A scoped context is
+recommended for programmatic usage as it prevents named
+views from interfering with eachother.
Review Comment:
Spelling error: 'eachother' should be 'each other' (two words).
```suggestion
views from interfering with each other.
```
##########
r/sedonadb/tests/testthat/test-context.R:
##########
@@ -15,6 +15,49 @@
# specific language governing permissions and limitations
# under the License.
+test_that("the global context is never replaced", {
+ # Check a few times to make sure this is true
+ ctx <- sd_connect(global = TRUE)
+ expect_true(rlang::is_reference(ctx, global_ctx$ctx))
+
+ ctx <- sd_connect(global = TRUE)
+ expect_true(rlang::is_reference(ctx, global_ctx$ctx))
+
+ expect_snapshot_warning(
+ expect_true(
+ rlang::is_reference(
+ sd_connect(global = TRUE, memory_limit = "5g"),
+ global_ctx$ctx
+ )
+ )
+ )
+})
+
+test_that("scoped connections can be created", {
+ ctx <- sd_connect(
+ memory_limit = "1g",
+ temp_dir = tempfile(),
+ memory_pool_type = "fair",
+ unspillable_reserve_ratio = 0.5
+ )
Review Comment:
The test creates a context with memory_limit = "1g" but doesn't verify that
the memory limit is actually enforced. Consider adding a test that triggers a
memory limit violation to ensure the configuration is properly applied (similar
to the Python test in issue #537).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]