This is an automated email from the ASF dual-hosted git repository.
npr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 1d7907e ARROW-9544: [R] Fix version argument of write_parquet()
1d7907e is described below
commit 1d7907ee349b96c9cf453b0c69df24a14bba467b
Author: Matthias <[email protected]>
AuthorDate: Tue Jul 28 08:09:13 2020 -0700
ARROW-9544: [R] Fix version argument of write_parquet()
Setting the version argument in `write_parquet()` did not work due to an
incorrect function name. This PR fixes the bug, adds tests and amends the
documentation.
Closes #7831 from Plebejer/master
Authored-by: Matthias <[email protected]>
Signed-off-by: Neal Richardson <[email protected]>
---
r/NEWS.md | 4 ++++
r/R/parquet.R | 5 +++--
r/man/write_parquet.Rd | 3 ++-
r/tests/testthat/test-parquet.R | 14 ++++++++++++++
4 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/r/NEWS.md b/r/NEWS.md
index be78cac..3af512e 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -19,6 +19,10 @@
# arrow 1.0.0.9000
+## Bug fixes
+
+* The `version` option to `write_parquet()` is now correctly implemented.
+
# arrow 1.0.0
## Arrow format conversion
diff --git a/r/R/parquet.R b/r/R/parquet.R
index e504a5c..b44cf18 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -62,7 +62,8 @@ read_parquet <- function(file,
#' @param sink an [arrow::io::OutputStream][OutputStream] or a string which is
interpreted as a file path
#' @param chunk_size chunk size in number of rows. If NULL, the total number
of rows is used.
#'
-#' @param version parquet version, "1.0" or "2.0". Default "1.0"
+#' @param version parquet version, "1.0" or "2.0". Default "1.0". Numeric
values
+#' are coerced to character.
#' @param compression compression algorithm. Default "snappy". See details.
#' @param compression_level compression level. Meaning depends on compression
algorithm
#' @param use_dictionary Specify if we should use dictionary encoding. Default
`TRUE`
@@ -257,7 +258,7 @@ ParquetWriterProperties <-
R6Class("ParquetWriterProperties", inherit = ArrowObj
ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder",
inherit = ArrowObject,
public = list(
set_version = function(version) {
- parquet___ArrowWriterProperties___Builder__version(self,
make_valid_version(version))
+ parquet___WriterProperties___Builder__version(self,
make_valid_version(version))
},
set_compression = function(table, compression) {
compression <- compression_from_name(compression)
diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd
index 3ff60df..e005dfb 100644
--- a/r/man/write_parquet.Rd
+++ b/r/man/write_parquet.Rd
@@ -33,7 +33,8 @@ write_parquet(
\item{chunk_size}{chunk size in number of rows. If NULL, the total number of
rows is used.}
-\item{version}{parquet version, "1.0" or "2.0". Default "1.0"}
+\item{version}{parquet version, "1.0" or "2.0". Default "1.0". Numeric values
+are coerced to character.}
\item{compression}{compression algorithm. Default "snappy". See details.}
diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R
index 71484db..7ddf4cc 100644
--- a/r/tests/testthat/test-parquet.R
+++ b/r/tests/testthat/test-parquet.R
@@ -177,3 +177,17 @@ test_that("write_parquet() returns its input", {
df_out <- write_parquet(df, tf)
expect_equivalent(df, df_out)
})
+
+test_that("write_parquet() handles version argument", {
+ df <- tibble::tibble(x = 1:5)
+ tf <- tempfile()
+ on.exit(unlink(tf))
+
+ purrr::walk(list("1.0", "2.0", 1.0, 2.0, 1L, 2L), ~ {
+ write_parquet(df, tf, version = .x)
+ expect_identical(read_parquet(tf), df)
+ })
+ purrr::walk(list("3.0", 3.0, 3L, "A"), ~ {
+ expect_error(write_parquet(df, tf, version = .x))
+ })
+})