This is an automated email from the ASF dual-hosted git repository.
felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new c433396f2b [VL] Fix overflow of pageNumber in VeloxSortShuffleWriter
(#11101)
c433396f2b is described below
commit c433396f2b8c7d88f5977b273d68bcca6eecfc4e
Author: Zhen Li <[email protected]>
AuthorDate: Thu Nov 20 04:36:30 2025 +0800
[VL] Fix overflow of pageNumber in VeloxSortShuffleWriter (#11101)
Fix overflow of pageNumber in VeloxSortShuffleWriter. page number should be
less than 8192 because it's 13bit.
---
cpp/velox/shuffle/VeloxSortShuffleWriter.cc | 3 ++-
cpp/velox/shuffle/VeloxSortShuffleWriter.h | 4 +++-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/cpp/velox/shuffle/VeloxSortShuffleWriter.cc
b/cpp/velox/shuffle/VeloxSortShuffleWriter.cc
index 3bcdacf3e9..57166e7ab7 100644
--- a/cpp/velox/shuffle/VeloxSortShuffleWriter.cc
+++ b/cpp/velox/shuffle/VeloxSortShuffleWriter.cc
@@ -31,6 +31,7 @@ constexpr uint32_t kMaskLower27Bits = (1 << 27) - 1;
constexpr uint64_t kMaskLower40Bits = (1UL << 40) - 1;
constexpr uint32_t kPartitionIdStartByteIndex = 5;
constexpr uint32_t kPartitionIdEndByteIndex = 7;
+constexpr uint32_t kMaxPageNumber = (1 << 13) - 1; // 13-bit max = 8191
uint64_t toCompactRowId(uint32_t partitionId, uint32_t pageNumber, uint32_t
offsetInPage) {
// |63 partitionId(24) |39 inputIndex(13) |26 rowIndex(27) |
@@ -216,7 +217,7 @@ void VeloxSortShuffleWriter::insertRows(
}
arrow::Status VeloxSortShuffleWriter::maybeSpill(uint32_t nextRows) {
- if ((uint64_t)offset_ + nextRows > std::numeric_limits<uint32_t>::max()) {
+ if ((uint64_t)offset_ + nextRows > std::numeric_limits<uint32_t>::max() ||
pageNumber_ >= kMaxPageNumber) {
RETURN_NOT_OK(evictAllPartitions());
}
return arrow::Status::OK();
diff --git a/cpp/velox/shuffle/VeloxSortShuffleWriter.h
b/cpp/velox/shuffle/VeloxSortShuffleWriter.h
index 6fc08434bd..9ab842718b 100644
--- a/cpp/velox/shuffle/VeloxSortShuffleWriter.h
+++ b/cpp/velox/shuffle/VeloxSortShuffleWriter.h
@@ -106,7 +106,9 @@ class VeloxSortShuffleWriter final : public
VeloxShuffleWriter {
std::list<facebook::velox::BufferPtr> pages_;
std::vector<char*> pageAddresses_;
char* currentPage_;
+ // 13-bit: max 8192 pages
uint32_t pageNumber_;
+ // 27-bit: max 128MB page size
uint32_t pageCursor_;
// For debug.
uint32_t currenPageSize_;
@@ -116,7 +118,7 @@ class VeloxSortShuffleWriter final : public
VeloxShuffleWriter {
// Row ID -> Partition ID
// subscript: The index of row in the current input RowVector
- // value: Partition ID
+ // value: Partition ID (24-bit: max 16M partitions)
// Updated for each input RowVector.
std::vector<uint32_t> row2Partition_;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]