(sedona) branch master updated: [SEDONA-704] Fix STC reader option issues and allow disabling generate outdb raster (#1847)

jiayu Sat, 15 Mar 2025 10:43:53 -0700

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git



The following commit(s) were added to refs/heads/master by this push:
     new e30c8d2a27 [SEDONA-704] Fix STC reader option issues and allow 
disabling generate outdb raster (#1847)
e30c8d2a27 is described below

commit e30c8d2a275a6a056577f27b58f88b228e16e0a8
Author: Feng Zhang <[email protected]>
AuthorDate: Mon Mar 10 14:53:53 2025 -0700

    [SEDONA-704] Fix STC reader option issues and allow disabling generate 
outdb raster (#1847)
---
 .../spark/sql/sedona_sql/io/stac/StacBatch.scala   |  6 +++++-
 .../sedona_sql/io/stac/StacPartitionReader.scala   | 25 +++++++++++++++++++++-
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacBatch.scala
 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacBatch.scala
index 1de8518de7..5994c463fd 100644
--- 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacBatch.scala
+++ 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacBatch.scala
@@ -48,7 +48,11 @@ case class StacBatch(
     temporalFilter: Option[TemporalFilter])
     extends Batch {
 
-  private val defaultItemsLimitPerRequest = 
opts.getOrElse("itemsLimitPerRequest", "10").toInt
+  private val defaultItemsLimitPerRequest: Int = {
+    val itemsLimitMax = opts.getOrElse("itemsLimitMax", "-1").toInt
+    val limitPerRequest = opts.getOrElse("itemsLimitPerRequest", "10").toInt
+    if (itemsLimitMax > 0 && limitPerRequest > itemsLimitMax) itemsLimitMax 
else limitPerRequest
+  }
   private val itemsLoadProcessReportThreshold =
     opts.getOrElse("itemsLoadProcessReportThreshold", "1000000").toInt
   private var itemMaxLeft: Int = -1
diff --git 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacPartitionReader.scala
 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacPartitionReader.scala
index 4929087db5..a545eb232f 100644
--- 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacPartitionReader.scala
+++ 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/io/stac/StacPartitionReader.scala
@@ -62,7 +62,7 @@ class StacPartitionReader(
         val tempFile = File.createTempFile("stac_item_", ".json")
         val writer = new PrintWriter(tempFile)
         try {
-          val fileContent = Source.fromURL(url).mkString
+          val fileContent = fetchContentWithRetry(url)
           val rootNode = mapper.readTree(fileContent)
           val nodeType = rootNode.get("type").asText()
 
@@ -150,6 +150,29 @@ class StacPartitionReader(
     }
   }
 
+  def fetchContentWithRetry(url: java.net.URL, maxRetries: Int = 3): String = {
+    var attempt = 0
+    var success = false
+    var fileContent: String = ""
+
+    while (attempt < maxRetries && !success) {
+      try {
+        fileContent = Source.fromURL(url).mkString
+        success = true
+      } catch {
+        case e: Exception =>
+          attempt += 1
+          if (attempt >= maxRetries) {
+            throw new RuntimeException(
+              s"Failed to fetch content from URL after $maxRetries attempts",
+              e)
+          }
+      }
+    }
+
+    fileContent
+  }
+
   /**
    * Create a PartitionedFile instance using reflection. The constructor 
parameters differ between
    * these versions, so we need to handle both cases. For Spark 3.4 and below, 
the constructor has

(sedona) branch master updated: [SEDONA-704] Fix STC reader option issues and allow disabling generate outdb raster (#1847)

Reply via email to