This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 1ffcb4e31d GH-49559: [C++][Parquet] Fix uncontrolled recursion in
WKBGeometryBounder::MergeGeometryInternal (#49558)
1ffcb4e31d is described below
commit 1ffcb4e31d6f54c6e0ef0fc85d3f9c9903753c63
Author: Gang Wu <[email protected]>
AuthorDate: Mon Mar 23 16:42:23 2026 +0800
GH-49559: [C++][Parquet] Fix uncontrolled recursion in
WKBGeometryBounder::MergeGeometryInternal (#49558)
### Rationale for this change
Fix `MergeGeometryInternal` stack overflow from deeply nested WKB
GeometryCollection inputs.
### What changes are included in this PR?
Added a depth limit (128) to `MergeGeometryInternal` to prevent stack
overflow when parsing deeply nested WKB GeometryCollection inputs.
### Are these changes tested?
A unit test `TestWKBBounderErrorForDeepNesting` has been included to ensure
proper exception throwing upon exceeding the limit.
### Are there any user-facing changes?
No.
* GitHub Issue: #49559
Lead-authored-by: Gang Wu <[email protected]>
Co-authored-by: google-labs-jules[bot]
<161369871+google-labs-jules[bot]@users.noreply.github.com>
Co-authored-by: wgtmac <[email protected]>
Signed-off-by: Gang Wu <[email protected]>
---
cpp/src/parquet/geospatial/util_internal.cc | 9 +++--
cpp/src/parquet/geospatial/util_internal.h | 2 +-
cpp/src/parquet/geospatial/util_internal_test.cc | 43 ++++++++++++++++++++++++
3 files changed, 51 insertions(+), 3 deletions(-)
diff --git a/cpp/src/parquet/geospatial/util_internal.cc
b/cpp/src/parquet/geospatial/util_internal.cc
index d5c8d66288..f27ad7012a 100644
--- a/cpp/src/parquet/geospatial/util_internal.cc
+++ b/cpp/src/parquet/geospatial/util_internal.cc
@@ -160,7 +160,12 @@ void
WKBGeometryBounder::MergeGeometry(::arrow::util::span<const uint8_t> bytes_
}
}
-void WKBGeometryBounder::MergeGeometryInternal(WKBBuffer* src, bool
record_wkb_type) {
+void WKBGeometryBounder::MergeGeometryInternal(WKBBuffer* src, bool
record_wkb_type,
+ int depth) {
+ if (depth > 128) {
+ throw ParquetException("WKB geometry has too many levels of nesting");
+ }
+
uint8_t endian = src->ReadUInt8();
#if ARROW_LITTLE_ENDIAN
bool swap = endian != 0x01;
@@ -208,7 +213,7 @@ void WKBGeometryBounder::MergeGeometryInternal(WKBBuffer*
src, bool record_wkb_t
case GeometryType::kGeometryCollection: {
uint32_t n_parts = src->ReadUInt32(swap);
for (uint32_t i = 0; i < n_parts; i++) {
- MergeGeometryInternal(src, /*record_wkb_type*/ false);
+ MergeGeometryInternal(src, /*record_wkb_type*/ false, depth + 1);
}
break;
}
diff --git a/cpp/src/parquet/geospatial/util_internal.h
b/cpp/src/parquet/geospatial/util_internal.h
index 2de9e1d076..9af144d42b 100644
--- a/cpp/src/parquet/geospatial/util_internal.h
+++ b/cpp/src/parquet/geospatial/util_internal.h
@@ -216,7 +216,7 @@ class PARQUET_EXPORT WKBGeometryBounder {
BoundingBox box_;
std::unordered_set<int32_t> geospatial_types_;
- void MergeGeometryInternal(WKBBuffer* src, bool record_wkb_type);
+ void MergeGeometryInternal(WKBBuffer* src, bool record_wkb_type, int depth =
0);
void MergeSequence(WKBBuffer* src, Dimensions dimensions, uint32_t n_coords,
bool swap);
};
diff --git a/cpp/src/parquet/geospatial/util_internal_test.cc
b/cpp/src/parquet/geospatial/util_internal_test.cc
index 96f90b64b5..527bcc5050 100644
--- a/cpp/src/parquet/geospatial/util_internal_test.cc
+++ b/cpp/src/parquet/geospatial/util_internal_test.cc
@@ -545,4 +545,47 @@ INSTANTIATE_TEST_SUITE_P(
MakeWKBPointTestCase{{30, 10, 40, 300}, false, true},
MakeWKBPointTestCase{{30, 10, 40, 300}, true, true}));
+TEST(TestGeometryUtil, TestWKBBounderErrorForDeepNesting) {
+ // Construct a nested GeometryCollection with 200 levels
+ std::vector<uint8_t> nested_wkb;
+ int num_levels = 200;
+
+ for (int i = 0; i < num_levels; i++) {
+ nested_wkb.push_back(0x01); // little endian
+ nested_wkb.push_back(0x07); // geometry collection
+ nested_wkb.push_back(0x00);
+ nested_wkb.push_back(0x00);
+ nested_wkb.push_back(0x00);
+
+ nested_wkb.push_back(0x01); // 1 part
+ nested_wkb.push_back(0x00);
+ nested_wkb.push_back(0x00);
+ nested_wkb.push_back(0x00);
+ }
+
+ // Final part is an empty geometry collection
+ nested_wkb.push_back(0x01); // little endian
+ nested_wkb.push_back(0x07); // geometry collection
+ nested_wkb.push_back(0x00);
+ nested_wkb.push_back(0x00);
+ nested_wkb.push_back(0x00);
+
+ nested_wkb.push_back(0x00); // 0 parts
+ nested_wkb.push_back(0x00);
+ nested_wkb.push_back(0x00);
+ nested_wkb.push_back(0x00);
+
+ WKBGeometryBounder bounder;
+ EXPECT_THROW(
+ {
+ try {
+ bounder.MergeGeometry(nested_wkb);
+ } catch (const ParquetException& e) {
+ EXPECT_THAT(e.what(), ::testing::HasSubstr("too many levels of
nesting"));
+ throw;
+ }
+ },
+ ParquetException);
+}
+
} // namespace parquet::geospatial