This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new e30c8d2a27 [SEDONA-704] Fix STC reader option issues and allow
disabling generate outdb raster (#1847)
e30c8d2a27 is described below
commit e30c8d2a275a6a056577f27b58f88b228e16e0a8
Author: Feng Zhang <[email protected]>
AuthorDate: Mon Mar 10 14:53:53 2025 -0700
[SEDONA-704] Fix STC reader option issues and allow disabling generate
outdb raster (#1847)
---
.../spark/sql/sedona_sql/io/stac/StacBatch.scala | 6 +++++-
.../sedona_sql/io/stac/StacPartitionReader.scala | 25 +++++++++++++++++++++-
2 files changed, 29 insertions(+), 2 deletions(-)
diff --git
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacBatch.scala
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacBatch.scala
index 1de8518de7..5994c463fd 100644
---
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacBatch.scala
+++
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacBatch.scala
@@ -48,7 +48,11 @@ case class StacBatch(
temporalFilter: Option[TemporalFilter])
extends Batch {
- private val defaultItemsLimitPerRequest =
opts.getOrElse("itemsLimitPerRequest", "10").toInt
+ private val defaultItemsLimitPerRequest: Int = {
+ val itemsLimitMax = opts.getOrElse("itemsLimitMax", "-1").toInt
+ val limitPerRequest = opts.getOrElse("itemsLimitPerRequest", "10").toInt
+ if (itemsLimitMax > 0 && limitPerRequest > itemsLimitMax) itemsLimitMax
else limitPerRequest
+ }
private val itemsLoadProcessReportThreshold =
opts.getOrElse("itemsLoadProcessReportThreshold", "1000000").toInt
private var itemMaxLeft: Int = -1
diff --git
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacPartitionReader.scala
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacPartitionReader.scala
index 4929087db5..a545eb232f 100644
---
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacPartitionReader.scala
+++
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacPartitionReader.scala
@@ -62,7 +62,7 @@ class StacPartitionReader(
val tempFile = File.createTempFile("stac_item_", ".json")
val writer = new PrintWriter(tempFile)
try {
- val fileContent = Source.fromURL(url).mkString
+ val fileContent = fetchContentWithRetry(url)
val rootNode = mapper.readTree(fileContent)
val nodeType = rootNode.get("type").asText()
@@ -150,6 +150,29 @@ class StacPartitionReader(
}
}
+ def fetchContentWithRetry(url: java.net.URL, maxRetries: Int = 3): String = {
+ var attempt = 0
+ var success = false
+ var fileContent: String = ""
+
+ while (attempt < maxRetries && !success) {
+ try {
+ fileContent = Source.fromURL(url).mkString
+ success = true
+ } catch {
+ case e: Exception =>
+ attempt += 1
+ if (attempt >= maxRetries) {
+ throw new RuntimeException(
+ s"Failed to fetch content from URL after $maxRetries attempts",
+ e)
+ }
+ }
+ }
+
+ fileContent
+ }
+
/**
* Create a PartitionedFile instance using reflection. The constructor
parameters differ between
* these versions, so we need to handle both cases. For Spark 3.4 and below,
the constructor has