This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new b9ae6940f3 [SEDONA-685] Deprecate custom readers writers (#1714)
b9ae6940f3 is described below
commit b9ae6940f31f56a398e83e67da9555de79f126f5
Author: gregleleu <[email protected]>
AuthorDate: Tue Dec 10 00:49:52 2024 -0500
[SEDONA-685] Deprecate custom readers writers (#1714)
* Deprecate custom readers writers
- Deprecate custom reader/writers replaced by the normal "source" API
- Update tests for deprecation warnings
* Fix trailing whitespace
---
R/DESCRIPTION | 4 +-
R/R/data_interface.R | 53 ++++++++++++++++++++++-
R/man/sdf_register.spatial_rdd.Rd | 2 +-
R/man/spark_read_shapefile.Rd | 9 +++-
R/man/spark_write_geojson.Rd | 8 +++-
R/tests/testthat/test-data-interface-raster.R | 3 +-
R/tests/testthat/test-data-interface.R | 62 ++++++++++++++++++---------
7 files changed, 113 insertions(+), 28 deletions(-)
diff --git a/R/DESCRIPTION b/R/DESCRIPTION
index 5a4de87277..76e111657e 100644
--- a/R/DESCRIPTION
+++ b/R/DESCRIPTION
@@ -1,7 +1,7 @@
Type: Package
Package: apache.sedona
Title: R Interface for Apache Sedona
-Version: 1.7.0
+Version: 1.7.0.900
Authors@R:
c(person(family = "Apache Sedona",
role = c("aut", "cre"),
@@ -38,6 +38,6 @@ Suggests:
knitr,
rmarkdown
Encoding: UTF-8
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
SystemRequirements: 'Apache Spark' 3.x
Roxygen: list(markdown = TRUE)
diff --git a/R/R/data_interface.R b/R/R/data_interface.R
index 3471f4d8fc..fabe885a98 100644
--- a/R/R/data_interface.R
+++ b/R/R/data_interface.R
@@ -389,7 +389,14 @@ sedona_read_shapefile <- function(sc,
# ------- Read SDF ------------
#' Read geospatial data into a Spark DataFrame.
#'
-#' @description Functions to read geospatial data from a variety of formats
into Spark DataFrames.
+#' @description `r lifecycle::badge("deprecated")`
+#'
+#' These functions are deprecated and will be removed in a future release.
Sedona has
+#' been implementing readers as spark DataFrame sources, so you can use
`spark_read_source`
+#' with the right sources ("shapefile", "geojson", "geoparquet") to read
geospatial data.
+#'
+#' Functions to read geospatial data from a variety of formats into Spark
DataFrames.
+#'
#'
#' * `spark_read_shapefile`: from a shapefile
#' * `spark_read_geojson`: from a geojson file
@@ -420,6 +427,12 @@ spark_read_shapefile <- function(sc,
options = list(),
...) {
+ lifecycle::deprecate_soft(
+ "1.7.1",
+ "spark_read_shapefile()",
+ with = "spark_read_source()"
+ )
+
lapply(names(options), function(name) {
if (!name %in% c("")) {
warning(paste0("Ignoring unknown option '", name,"'"))
@@ -446,6 +459,12 @@ spark_read_geojson <- function(sc,
memory = TRUE,
overwrite = TRUE) {
+ lifecycle::deprecate_soft(
+ "1.7.1",
+ "spark_read_geojson()",
+ with = "spark_read_source()"
+ )
+
# check options
if ("allow_invalid_geometries" %in% names(options)) final_allow_invalid <-
options[["allow_invalid_geometries"]] else final_allow_invalid <- TRUE
if ("skip_syntactically_invalid_geometries" %in% names(options)) final_skip
<- options[["skip_syntactically_invalid_geometries"]] else final_skip <- TRUE
@@ -480,6 +499,12 @@ spark_read_geoparquet <- function(sc,
memory = TRUE,
overwrite = TRUE) {
+ lifecycle::deprecate_soft(
+ "1.7.1",
+ "spark_read_geoparquet()",
+ with = "spark_read_source()"
+ )
+
spark_read_source(sc,
name = name,
path = path,
@@ -602,7 +627,14 @@ sedona_save_spatial_rdd <- function(x,
#' Write geospatial data from a Spark DataFrame.
#'
-#' @description Functions to write geospatial data into a variety of formats
from Spark DataFrames.
+#' @description `r lifecycle::badge("deprecated")`
+#'
+#' These functions are deprecated and will be removed in a future release.
Sedona has
+#' been implementing writers as spark DataFrame sources, so you can use
`spark_write_source`
+#' with the right sources ("shapefile", "geojson", "geoparquet") to write
geospatial data.
+
+#'
+#' Functions to write geospatial data into a variety of formats from Spark
DataFrames.
#'
#' * `spark_write_geojson`: to GeoJSON
#' * `spark_write_geoparquet`: to GeoParquet
@@ -644,6 +676,12 @@ spark_write_geojson <- function(x,
partition_by = NULL,
...) {
+ lifecycle::deprecate_soft(
+ "1.7.1",
+ "spark_write_geojson()",
+ with = "spark_read_source()"
+ )
+
## find geometry column if not specified
if (!"spatial_col" %in% names(options)) {
schema <- x %>% sdf_schema()
@@ -678,6 +716,11 @@ spark_write_geoparquet <- function(x,
options = list(),
partition_by = NULL,
...) {
+ lifecycle::deprecate_soft(
+ "1.7.1",
+ "spark_write_geoparquet()",
+ with = "spark_read_source()"
+ )
spark_write_source(
x = x,
@@ -702,6 +745,12 @@ spark_write_raster <- function(x,
partition_by = NULL,
...) {
+ lifecycle::deprecate_soft(
+ "1.7.1",
+ "spark_write_raster()",
+ with = "spark_read_source()"
+ )
+
spark_write_source(
x = x,
source = "raster",
diff --git a/R/man/sdf_register.spatial_rdd.Rd
b/R/man/sdf_register.spatial_rdd.Rd
index 0596bd5271..1770e265ce 100644
--- a/R/man/sdf_register.spatial_rdd.Rd
+++ b/R/man/sdf_register.spatial_rdd.Rd
@@ -43,7 +43,7 @@ if (!inherits(sc, "test_connection")) {
type = "polygon"
)
sdf <- sdf_register(rdd)
-
+
input_location <- "/dev/null" # replace it with the path to your input file
rdd <- sedona_read_dsv_to_typed_rdd(
sc,
diff --git a/R/man/spark_read_shapefile.Rd b/R/man/spark_read_shapefile.Rd
index 1d52f9348d..412e2a8a69 100644
--- a/R/man/spark_read_shapefile.Rd
+++ b/R/man/spark_read_shapefile.Rd
@@ -36,7 +36,8 @@ spark_read_geoparquet(
\item{path}{The path to the file. Needs to be accessible from the cluster.
Supports the \samp{"hdfs://"}, \samp{"s3a://"} and \samp{"file://"} protocols.}
-\item{options}{A list of strings with additional options. See
\url{https://spark.apache.org/docs/latest/sql-programming-guide.html}.}
+\item{options}{A list of strings with additional options.
+See
\url{https://spark.apache.org/docs/latest/sql-programming-guide.html#configuration}.}
\item{...}{Optional arguments; currently unused.}
@@ -53,6 +54,12 @@ already exists?}
A tbl
}
\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options:
alt='[Deprecated]'}}}{\strong{[Deprecated]}}
+
+These functions are deprecated and will be removed in a future release. Sedona
has
+been implementing readers as spark DataFrame sources, so you can use
\code{spark_read_source}
+with the right sources ("shapefile", "geojson", "geoparquet") to read
geospatial data.
+
Functions to read geospatial data from a variety of formats into Spark
DataFrames.
\itemize{
\item \code{spark_read_shapefile}: from a shapefile
diff --git a/R/man/spark_write_geojson.Rd b/R/man/spark_write_geojson.Rd
index e5413a962e..cf4e4903b6 100644
--- a/R/man/spark_write_geojson.Rd
+++ b/R/man/spark_write_geojson.Rd
@@ -43,7 +43,7 @@ Supports the \samp{"hdfs://"}, \samp{"s3a://"} and
\samp{"file://"} protocols.}
table already exists. Supported values include: 'error', 'append',
'overwrite' and
ignore. Notice that 'overwrite' will also change the column structure.
- For more details see also
\url{https://spark.apache.org/docs/latest/sql-programming-guide.html}
+ For more details see also
\url{https://spark.apache.org/docs/latest/sql-programming-guide.html#save-modes}
for your version of Spark.}
\item{options}{A list of strings with additional options.}
@@ -53,6 +53,12 @@ Supports the \samp{"hdfs://"}, \samp{"s3a://"} and
\samp{"file://"} protocols.}
\item{...}{Optional arguments; currently unused.}
}
\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options:
alt='[Deprecated]'}}}{\strong{[Deprecated]}}
+
+These functions are deprecated and will be removed in a future release. Sedona
has
+been implementing writers as spark DataFrame sources, so you can use
\code{spark_write_source}
+with the right sources ("shapefile", "geojson", "geoparquet") to write
geospatial data.
+
Functions to write geospatial data into a variety of formats from Spark
DataFrames.
\itemize{
\item \code{spark_write_geojson}: to GeoJSON
diff --git a/R/tests/testthat/test-data-interface-raster.R
b/R/tests/testthat/test-data-interface-raster.R
index 7290c5850f..67a1e6a0af 100644
--- a/R/tests/testthat/test-data-interface-raster.R
+++ b/R/tests/testthat/test-data-interface-raster.R
@@ -212,9 +212,10 @@ test_that("Should read geotiff using binary source and
write geotiff back to dis
binary_sdf <- spark_read_binary(sc, dir = test_data("raster"), name =
sdf_name)
tmp_dest <- tempfile()
-
+ lifecycle::expect_deprecated({
binary_sdf %>%
spark_write_raster(path = tmp_dest)
+ })
sdf_name_2 <- random_string("spatial_sdf_2")
binary_2_sdf <- spark_read_binary(sc, dir = tmp_dest, name = sdf_name_2,
recursive_file_lookup = TRUE)
diff --git a/R/tests/testthat/test-data-interface.R
b/R/tests/testthat/test-data-interface.R
index d2f58683c1..a16b1a9368 100644
--- a/R/tests/testthat/test-data-interface.R
+++ b/R/tests/testthat/test-data-interface.R
@@ -297,8 +297,9 @@ test_that("sedona_read_shapefile() works as expected", {
test_that("spark_read_geoparquet() works as expected", {
sdf_name <- random_string("spatial_sdf")
- geoparquet_sdf <- spark_read_geoparquet(sc, geoparquet("example1.parquet"),
name = sdf_name)
-
+ lifecycle::expect_deprecated({
+ geoparquet_sdf <- spark_read_geoparquet(sc,
geoparquet("example1.parquet"), name = sdf_name)
+ })
## Right number of rows
geoparquet_df <-
geoparquet_sdf %>%
@@ -350,7 +351,9 @@ test_that("spark_read_geoparquet() works as expected", {
test_that("spark_read_geoparquet() works as expected, ex 2", {
sdf_name <- random_string("spatial_sdf")
- geoparquet_sdf <- spark_read_geoparquet(sc, geoparquet("example2.parquet"),
name = sdf_name)
+ lifecycle::expect_deprecated({
+ geoparquet_sdf <- spark_read_geoparquet(sc,
geoparquet("example2.parquet"), name = sdf_name)
+ })
## Right data (first row)
expect_equivalent(
@@ -366,8 +369,9 @@ test_that("spark_read_geoparquet() works as expected, ex
2", {
test_that("spark_read_geoparquet() works as expected, ex 3", {
sdf_name <- random_string("spatial_sdf")
- geoparquet_sdf <- spark_read_geoparquet(sc, geoparquet("example3.parquet"),
name = sdf_name)
-
+ lifecycle::expect_deprecated({
+ geoparquet_sdf <- spark_read_geoparquet(sc,
geoparquet("example3.parquet"), name = sdf_name)
+ })
## Right data (first row)
expect_equivalent(
geoparquet_sdf %>% head(1) %>% mutate(geometry = geometry %>% st_astext()
%>% substring(1, 26)) %>% collect() %>% as.list(),
@@ -385,8 +389,9 @@ test_that("spark_read_geoparquet() works as expected, ex
3", {
test_that("spark_read_geoparquet() works as expected, ex 1.0.0-beta.1", {
sdf_name <- random_string("spatial_sdf")
- geoparquet_sdf <- spark_read_geoparquet(sc,
geoparquet("example-1.0.0-beta.1.parquet"), name = sdf_name)
-
+ lifecycle::expect_deprecated({
+ geoparquet_sdf <- spark_read_geoparquet(sc,
geoparquet("example-1.0.0-beta.1.parquet"), name = sdf_name)
+ })
## Right number of rows
geoparquet_df <-
geoparquet_sdf %>%
@@ -417,14 +422,16 @@ test_that("spark_read_geoparquet() works as expected,
multiple geom", {
## Write
tmp_dest <- tempfile()
- spark_write_geoparquet(geoparquet_sdf, path = tmp_dest, mode = "overwrite")
-
+ lifecycle::expect_deprecated({
+ spark_write_geoparquet(geoparquet_sdf, path = tmp_dest, mode = "overwrite")
+ })
## Check
### Can't check on geoparquet metadata with available packages
file <- dir(tmp_dest, full.names = TRUE, pattern = "parquet$")
-
- geoparquet_2_sdf <- spark_read_geoparquet(sc, path = file)
+ lifecycle::expect_deprecated({
+ geoparquet_2_sdf <- spark_read_geoparquet(sc, path = file)
+ })
out <- geoparquet_2_sdf %>% sdf_schema()
expect_match(out$g0$type, "GeometryUDT")
@@ -450,7 +457,9 @@ test_that("spark_read_geoparquet() throws an error with
plain parquet files", {
test_that("spark_read_geojson() works as expected", {
sdf_name <- random_string("spatial_sdf")
+ lifecycle::expect_deprecated({
geojson_sdf <- spark_read_geojson(sc, path = test_data("testPolygon.json"),
name = sdf_name)
+ })
tmp_dest <- tempfile(fileext = ".json")
## Right number of rows
@@ -470,8 +479,9 @@ test_that("spark_read_geojson() works as expected", {
test_that("spark_read_geojson() works as expected, no feat", {
sdf_name <- random_string("spatial_sdf")
+ lifecycle::expect_deprecated({
geojson_sdf <- spark_read_geojson(sc, path =
test_data("testpolygon-no-property.json"), name = sdf_name)
-
+ })
## Right number of rows
expect_equal(
invoke(geojson_sdf %>% spark_dataframe(), 'count'), 10
@@ -486,8 +496,9 @@ test_that("spark_read_geojson() works as expected, no
feat", {
test_that("spark_read_geojson() works as expected, null values", {
sdf_name <- random_string("spatial_sdf")
+ lifecycle::expect_deprecated({
geojson_sdf <- spark_read_geojson(sc, path =
test_data("testpolygon-with-null-property-value.json"), name = sdf_name)
-
+ })
## Right number of rows
expect_equal(
invoke(geojson_sdf %>% spark_dataframe(), 'count'), 3
@@ -503,8 +514,9 @@ test_that("spark_read_geojson() works as expected, null
values", {
test_that("spark_read_geojson() works as expected, with id", {
sdf_name <- random_string("spatial_sdf")
+ lifecycle::expect_deprecated({
geojson_sdf <- spark_read_geojson(sc, path =
test_data("testContainsId.json"), name = sdf_name)
-
+ })
## Right number of rows
expect_equal(
invoke(geojson_sdf %>% spark_dataframe(), 'count'), 1
@@ -533,8 +545,9 @@ test_that("spark_read_geojson() works as expected, invalid
geom", {
sdf_name <- random_string("spatial_sdf")
# Keep invalid
+ lifecycle::expect_deprecated({
geojson_sdf <- spark_read_geojson(sc, path =
test_data("testInvalidPolygon.json"), name = sdf_name)
-
+ })
## Right number of rows
expect_equal(
invoke(geojson_sdf %>% spark_dataframe(), 'count'), 3
@@ -545,8 +558,9 @@ test_that("spark_read_geojson() works as expected, invalid
geom", {
# Remove invalid
+ lifecycle::expect_deprecated({
geojson_sdf <- spark_read_geojson(sc, path =
test_data("testInvalidPolygon.json"), name = sdf_name, options =
list(allow_invalid_geometries = FALSE))
-
+ })
## Right number of rows
expect_equal(
invoke(geojson_sdf %>% spark_dataframe(), 'count'), 2
@@ -645,15 +659,19 @@ test_that("sedona_save_spatial_rdd() works as expected", {
# ------- Write SDF ------------
test_that("spark_write_geoparquet() works as expected", {
+ lifecycle::expect_deprecated({
geoparquet_sdf <- spark_read_geoparquet(sc, geoparquet("example2.parquet"))
+ })
tmp_dest <- tempfile(fileext = ".parquet")
## Save
+ lifecycle::expect_deprecated({
geoparquet_sdf %>% spark_write_geoparquet(tmp_dest)
-
+ })
### Reload
+ lifecycle::expect_deprecated({
geoparquet_2_sdf <- spark_read_geoparquet(sc, tmp_dest)
-
+ })
expect_equivalent(
geoparquet_sdf %>% mutate(geometry = geometry %>% st_astext()) %>%
collect(),
geoparquet_2_sdf %>% mutate(geometry = geometry %>% st_astext()) %>%
collect()
@@ -665,15 +683,19 @@ test_that("spark_write_geoparquet() works as expected", {
test_that("spark_write_geojson() works as expected", {
sdf_name <- random_string("spatial_sdf")
+ lifecycle::expect_deprecated({
geojson_sdf <- spark_read_geojson(sc, path = test_data("testPolygon.json"),
name = sdf_name)
+ })
tmp_dest <- tempfile(fileext = ".json")
## Save
+ lifecycle::expect_deprecated({
geojson_sdf %>% spark_write_geojson(tmp_dest)
-
+ })
### Reload
+ lifecycle::expect_deprecated({
geojson_2_sdf <- spark_read_geojson(sc, path = tmp_dest)
-
+ })
## order of columns changes !
expect_equal(
colnames(geojson_sdf) %>% sort(),