This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 1ffcb4e31d GH-49559: [C++][Parquet] Fix uncontrolled recursion in 
WKBGeometryBounder::MergeGeometryInternal (#49558)
1ffcb4e31d is described below

commit 1ffcb4e31d6f54c6e0ef0fc85d3f9c9903753c63
Author: Gang Wu <[email protected]>
AuthorDate: Mon Mar 23 16:42:23 2026 +0800

    GH-49559: [C++][Parquet] Fix uncontrolled recursion in 
WKBGeometryBounder::MergeGeometryInternal (#49558)
    
    ### Rationale for this change
    
    Fix `MergeGeometryInternal` stack overflow from deeply nested WKB 
GeometryCollection inputs.
    
    ### What changes are included in this PR?
    
    Added a depth limit (128) to `MergeGeometryInternal` to prevent stack 
overflow when parsing deeply nested WKB GeometryCollection inputs.
    
    ### Are these changes tested?
    
    A unit test `TestWKBBounderErrorForDeepNesting` has been included to ensure 
proper exception throwing upon exceeding the limit.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #49559
    
    Lead-authored-by: Gang Wu <[email protected]>
    Co-authored-by: google-labs-jules[bot] 
<161369871+google-labs-jules[bot]@users.noreply.github.com>
    Co-authored-by: wgtmac <[email protected]>
    Signed-off-by: Gang Wu <[email protected]>
---
 cpp/src/parquet/geospatial/util_internal.cc      |  9 +++--
 cpp/src/parquet/geospatial/util_internal.h       |  2 +-
 cpp/src/parquet/geospatial/util_internal_test.cc | 43 ++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/cpp/src/parquet/geospatial/util_internal.cc 
b/cpp/src/parquet/geospatial/util_internal.cc
index d5c8d66288..f27ad7012a 100644
--- a/cpp/src/parquet/geospatial/util_internal.cc
+++ b/cpp/src/parquet/geospatial/util_internal.cc
@@ -160,7 +160,12 @@ void 
WKBGeometryBounder::MergeGeometry(::arrow::util::span<const uint8_t> bytes_
   }
 }
 
-void WKBGeometryBounder::MergeGeometryInternal(WKBBuffer* src, bool 
record_wkb_type) {
+void WKBGeometryBounder::MergeGeometryInternal(WKBBuffer* src, bool 
record_wkb_type,
+                                               int depth) {
+  if (depth > 128) {
+    throw ParquetException("WKB geometry has too many levels of nesting");
+  }
+
   uint8_t endian = src->ReadUInt8();
 #if ARROW_LITTLE_ENDIAN
   bool swap = endian != 0x01;
@@ -208,7 +213,7 @@ void WKBGeometryBounder::MergeGeometryInternal(WKBBuffer* 
src, bool record_wkb_t
     case GeometryType::kGeometryCollection: {
       uint32_t n_parts = src->ReadUInt32(swap);
       for (uint32_t i = 0; i < n_parts; i++) {
-        MergeGeometryInternal(src, /*record_wkb_type*/ false);
+        MergeGeometryInternal(src, /*record_wkb_type*/ false, depth + 1);
       }
       break;
     }
diff --git a/cpp/src/parquet/geospatial/util_internal.h 
b/cpp/src/parquet/geospatial/util_internal.h
index 2de9e1d076..9af144d42b 100644
--- a/cpp/src/parquet/geospatial/util_internal.h
+++ b/cpp/src/parquet/geospatial/util_internal.h
@@ -216,7 +216,7 @@ class PARQUET_EXPORT WKBGeometryBounder {
   BoundingBox box_;
   std::unordered_set<int32_t> geospatial_types_;
 
-  void MergeGeometryInternal(WKBBuffer* src, bool record_wkb_type);
+  void MergeGeometryInternal(WKBBuffer* src, bool record_wkb_type, int depth = 
0);
 
   void MergeSequence(WKBBuffer* src, Dimensions dimensions, uint32_t n_coords, 
bool swap);
 };
diff --git a/cpp/src/parquet/geospatial/util_internal_test.cc 
b/cpp/src/parquet/geospatial/util_internal_test.cc
index 96f90b64b5..527bcc5050 100644
--- a/cpp/src/parquet/geospatial/util_internal_test.cc
+++ b/cpp/src/parquet/geospatial/util_internal_test.cc
@@ -545,4 +545,47 @@ INSTANTIATE_TEST_SUITE_P(
                       MakeWKBPointTestCase{{30, 10, 40, 300}, false, true},
                       MakeWKBPointTestCase{{30, 10, 40, 300}, true, true}));
 
+TEST(TestGeometryUtil, TestWKBBounderErrorForDeepNesting) {
+  // Construct a nested GeometryCollection with 200 levels
+  std::vector<uint8_t> nested_wkb;
+  int num_levels = 200;
+
+  for (int i = 0; i < num_levels; i++) {
+    nested_wkb.push_back(0x01);  // little endian
+    nested_wkb.push_back(0x07);  // geometry collection
+    nested_wkb.push_back(0x00);
+    nested_wkb.push_back(0x00);
+    nested_wkb.push_back(0x00);
+
+    nested_wkb.push_back(0x01);  // 1 part
+    nested_wkb.push_back(0x00);
+    nested_wkb.push_back(0x00);
+    nested_wkb.push_back(0x00);
+  }
+
+  // Final part is an empty geometry collection
+  nested_wkb.push_back(0x01);  // little endian
+  nested_wkb.push_back(0x07);  // geometry collection
+  nested_wkb.push_back(0x00);
+  nested_wkb.push_back(0x00);
+  nested_wkb.push_back(0x00);
+
+  nested_wkb.push_back(0x00);  // 0 parts
+  nested_wkb.push_back(0x00);
+  nested_wkb.push_back(0x00);
+  nested_wkb.push_back(0x00);
+
+  WKBGeometryBounder bounder;
+  EXPECT_THROW(
+      {
+        try {
+          bounder.MergeGeometry(nested_wkb);
+        } catch (const ParquetException& e) {
+          EXPECT_THAT(e.what(), ::testing::HasSubstr("too many levels of 
nesting"));
+          throw;
+        }
+      },
+      ParquetException);
+}
+
 }  // namespace parquet::geospatial

Reply via email to