This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4bcab1256c GH-49533: [R] Implement dplyr's when_any() and when_all()
helpers (#49535)
4bcab1256c is described below
commit 4bcab1256c43bfa08b0049b76e310bdc0b255931
Author: Nic Crane <[email protected]>
AuthorDate: Fri Mar 27 14:20:36 2026 +0000
GH-49533: [R] Implement dplyr's when_any() and when_all() helpers (#49535)
### Rationale for this change
Bindings to new dplyr helpers
### What changes are included in this PR?
Bindings and tests
### Are these changes tested?
Yes
### Are there any user-facing changes?
Yes
### AI usage
I have written this using Claude Opus 4.5 using Claude from the cli. I
have manually reviewed the changes myself and am happy with them.
* GitHub Issue: #49533
Authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/R/dplyr-funcs-conditional.R | 22 +++++++
r/R/dplyr-funcs-doc.R | 4 +-
r/man/acero.Rd | 4 +-
r/man/read_json_arrow.Rd | 2 +-
r/man/schema.Rd | 2 +-
r/tests/testthat/test-dplyr-funcs-conditional.R | 82 +++++++++++++++++++++++++
6 files changed, 112 insertions(+), 4 deletions(-)
diff --git a/r/R/dplyr-funcs-conditional.R b/r/R/dplyr-funcs-conditional.R
index 25d7fbc668..a7df2e8f0d 100644
--- a/r/R/dplyr-funcs-conditional.R
+++ b/r/R/dplyr-funcs-conditional.R
@@ -99,6 +99,28 @@ register_bindings_conditional <- function() {
out
})
+ register_binding("dplyr::when_any", function(..., na_rm = FALSE, size =
NULL) {
+ if (!is.null(size)) {
+ arrow_not_supported("`when_any()` with `size` specified")
+ }
+ args <- list2(...)
+ if (na_rm) {
+ args <- map(args, ~ call_binding("coalesce", .x, FALSE))
+ }
+ reduce(args, `|`)
+ })
+
+ register_binding("dplyr::when_all", function(..., na_rm = FALSE, size =
NULL) {
+ if (!is.null(size)) {
+ arrow_not_supported("`when_all()` with `size` specified")
+ }
+ args <- list2(...)
+ if (na_rm) {
+ args <- map(args, ~ call_binding("coalesce", .x, TRUE))
+ }
+ reduce(args, `&`)
+ })
+
register_binding(
"dplyr::case_when",
function(..., .default = NULL, .ptype = NULL, .size = NULL) {
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 9293d14c94..e0b3dd095c 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -21,7 +21,7 @@
#'
#' The `arrow` package contains methods for 38 `dplyr` table functions, many of
#' which are "verbs" that do transformations to one or more tables.
-#' The package also has mappings of 224 R functions to the corresponding
+#' The package also has mappings of 226 R functions to the corresponding
#' functions in the Arrow compute library. These allow you to write code inside
#' of `dplyr` methods that call R functions, including many in packages like
#' `stringr` and `lubridate`, and they will get translated to Arrow and run
@@ -214,6 +214,8 @@
#' * [`if_else()`][dplyr::if_else()]
#' * [`n()`][dplyr::n()]
#' * [`n_distinct()`][dplyr::n_distinct()]
+#' * [`when_all()`][dplyr::when_all()]
+#' * [`when_any()`][dplyr::when_any()]
#'
#' ## hms
#'
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index ee156cc912..a43617493a 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -9,7 +9,7 @@
\description{
The \code{arrow} package contains methods for 38 \code{dplyr} table functions,
many of
which are "verbs" that do transformations to one or more tables.
-The package also has mappings of 224 R functions to the corresponding
+The package also has mappings of 226 R functions to the corresponding
functions in the Arrow compute library. These allow you to write code inside
of \code{dplyr} methods that call R functions, including many in packages like
\code{stringr} and \code{lubridate}, and they will get translated to Arrow and
run
@@ -207,6 +207,8 @@ Valid values are "s", "ms" (default), "us", "ns".
\item \code{\link[dplyr:if_else]{if_else()}}
\item \code{\link[dplyr:context]{n()}}
\item \code{\link[dplyr:n_distinct]{n_distinct()}}
+\item \code{\link[dplyr:when-any-all]{when_all()}}
+\item \code{\link[dplyr:when-any-all]{when_any()}}
}
}
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index b809a63bcc..abf6b8fc44 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -54,7 +54,7 @@ If \code{schema} is not provided, Arrow data types are
inferred from the data:
\item JSON numbers convert to \code{\link[=int64]{int64()}}, falling back to
\code{\link[=float64]{float64()}} if a non-integer is encountered.
\item JSON strings of the kind "YYYY-MM-DD" and "YYYY-MM-DD hh:mm:ss" convert
to \code{\link[=timestamp]{timestamp(unit = "s")}},
falling back to \code{\link[=utf8]{utf8()}} if a conversion error occurs.
-\item JSON arrays convert to a \code{\link[=list_of]{list_of()}} type, and
inference proceeds recursively on the JSON arrays' values.
+\item JSON arrays convert to a \code{\link[vctrs:list_of]{vctrs::list_of()}}
type, and inference proceeds recursively on the JSON arrays' values.
\item Nested JSON objects convert to a \code{\link[=struct]{struct()}} type,
and inference proceeds recursively on the JSON objects' values.
}
diff --git a/r/man/schema.Rd b/r/man/schema.Rd
index 65ab2eea0d..ff77a05d84 100644
--- a/r/man/schema.Rd
+++ b/r/man/schema.Rd
@@ -7,7 +7,7 @@
schema(...)
}
\arguments{
-\item{...}{\link[=field]{fields}, field name/\link[=data-type]{data type}
pairs (or a list of), or object from which to extract
+\item{...}{\link[vctrs:fields]{fields}, field name/\link[=data-type]{data
type} pairs (or a list of), or object from which to extract
a schema}
}
\description{
diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R
b/r/tests/testthat/test-dplyr-funcs-conditional.R
index 58373db253..d99843ab9d 100644
--- a/r/tests/testthat/test-dplyr-funcs-conditional.R
+++ b/r/tests/testthat/test-dplyr-funcs-conditional.R
@@ -517,3 +517,85 @@ test_that("external objects are found when they're not in
the global environment
tibble(x = c("a", "b"), x2 = c("foo", NA))
)
})
+
+test_that("when_any()", {
+ # combines with OR (3 inputs)
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = when_any(lgl, false, int > 5)) |>
+ collect(),
+ tbl
+ )
+
+ # na_rm=TRUE treats NA as FALSE
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = when_any(lgl, false, na_rm = TRUE)) |>
+ collect(),
+ tbl
+ )
+
+ # works in filter()
+ compare_dplyr_binding(
+ .input |>
+ filter(when_any(int > 5, dbl > 3)) |>
+ collect(),
+ tbl
+ )
+
+ # single input
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = when_any(lgl)) |>
+ collect(),
+ tbl
+ )
+
+ # size not supported
+ expect_arrow_eval_error(
+ when_any(lgl, false, size = 10),
+ "`when_any\\(\\)` with `size` specified not supported in Arrow",
+ class = "arrow_not_supported"
+ )
+})
+
+test_that("when_all()", {
+ # combines with AND (3 inputs)
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = when_all(lgl, int > 0, dbl > 1)) |>
+ collect(),
+ tbl
+ )
+
+ # na_rm=TRUE treats NA as TRUE
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = when_all(lgl, false, na_rm = TRUE)) |>
+ collect(),
+ tbl
+ )
+
+ # works in filter()
+ compare_dplyr_binding(
+ .input |>
+ filter(when_all(int > 5, dbl > 3)) |>
+ collect(),
+ tbl
+ )
+
+ # single input
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = when_all(lgl)) |>
+ collect(),
+ tbl
+ )
+
+ # size not supported
+ expect_arrow_eval_error(
+ when_all(lgl, false, size = 10),
+ "`when_all\\(\\)` with `size` specified not supported in Arrow",
+ class = "arrow_not_supported"
+ )
+})