This is an automated email from the ASF dual-hosted git repository.

zhonghongsheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/shardingsphere.git


The following commit(s) were added to refs/heads/master by this push:
     new f81a1c9fa6c Support unique key first integer column exact or estimated 
splitting based on data sparseness (#37542)
f81a1c9fa6c is described below

commit f81a1c9fa6cb056b641d898eaf595a07c7355348
Author: Hongsheng Zhong <[email protected]>
AuthorDate: Fri Dec 26 20:14:42 2025 +0800

    Support unique key first integer column exact or estimated splitting based 
on data sparseness (#37542)
    
    * Add InventoryDataSparsenessCalculator
    
    * Update InventoryPositionCalculator to integrate data sparseness calculator
    
    * Update RELEASE-NOTES.md
---
 RELEASE-NOTES.md                                   |  1 +
 .../core/ingest/dumper/inventory/query/Range.java  |  5 ++
 .../InventoryDataSparsenessCalculator.java         | 60 ++++++++++++++++++++++
 .../position/InventoryPositionCalculator.java      | 16 ++++--
 .../InventoryDataSparsenessCalculatorTest.java     | 37 +++++++++++++
 5 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 9543c38c63c..3ff8a3bfa8a 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -68,6 +68,7 @@
 1. Pipeline: InventoryDumperContextSplitter supports multi-columns unique key 
first integer column splitting - 
[#36935](https://github.com/apache/shardingsphere/pull/36935)
 1. Pipeline: Support unique key first integer column exact splitting - 
[#37517](https://github.com/apache/shardingsphere/pull/37517)
 1. Pipeline: Improve InventoryPositionEstimatedCalculator: support possible 
null unique key value - 
[#37522](https://github.com/apache/shardingsphere/pull/37522)
+1. Pipeline: Support unique key first integer column exact or estimated 
splitting based on data sparseness - 
[#37542](https://github.com/apache/shardingsphere/pull/37542)
 1. Encrypt: Support handling show create view result decoration in encrypt - 
[#37299](https://github.com/apache/shardingsphere/pull/37299)
 1. JDBC: Enhance ResultSetUtils to support flexible string date/time 
conversions - [37424](https://github.com/apache/shardingsphere/pull/37424)
 
diff --git 
a/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/ingest/dumper/inventory/query/Range.java
 
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/ingest/dumper/inventory/query/Range.java
index 48c8274037c..031582935b1 100644
--- 
a/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/ingest/dumper/inventory/query/Range.java
+++ 
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/ingest/dumper/inventory/query/Range.java
@@ -59,4 +59,9 @@ public final class Range<T> {
     public static <T> Range<T> openClosed(final T lowerBound, final T 
upperBound) {
         return new Range<T>(lowerBound, false, upperBound);
     }
+    
+    @Override
+    public String toString() {
+        return (lowerInclusive ? "[" : "(") + lowerBound + ", " + upperBound + 
"]";
+    }
 }
diff --git 
a/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculator.java
 
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculator.java
new file mode 100644
index 00000000000..60d9c65a23c
--- /dev/null
+++ 
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculator.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package 
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator;
+
+import lombok.AccessLevel;
+import lombok.NoArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import 
org.apache.shardingsphere.data.pipeline.core.ingest.dumper.inventory.query.Range;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+
+/**
+ * Inventory data sparseness calculator.
+ */
+@NoArgsConstructor(access = AccessLevel.PRIVATE)
+@Slf4j
+public final class InventoryDataSparsenessCalculator {
+    
+    private static final long EXACT_SPLITTING_RECORDS_COUNT_THRESHOLD = 
1000000L;
+    
+    private static final BigDecimal MULTIPLE_THRESHOLD = new 
BigDecimal("1.50");
+    
+    /**
+     * Is integer unique key data sparse.
+     *
+     * @param tableRecordsCount table records count
+     * @param uniqueKeyValuesRange unique key values range
+     * @return true if sparse
+     */
+    public static boolean isIntegerUniqueKeyDataSparse(final long 
tableRecordsCount, final Range<Long> uniqueKeyValuesRange) {
+        boolean result = false;
+        Long lowerValue = uniqueKeyValuesRange.getLowerBound();
+        Long upperValue = uniqueKeyValuesRange.getUpperBound();
+        if (tableRecordsCount >= EXACT_SPLITTING_RECORDS_COUNT_THRESHOLD && 
null != lowerValue && null != upperValue) {
+            BigDecimal multiple = 
BigDecimal.valueOf(upperValue).subtract(BigDecimal.valueOf(lowerValue)).add(BigDecimal.ONE)
+                    .divide(BigDecimal.valueOf(tableRecordsCount), 2, 
RoundingMode.HALF_UP);
+            if (multiple.compareTo(MULTIPLE_THRESHOLD) >= 0) {
+                log.info("Table is sparse for integer unique key, table 
records count: {}, unique key values range: {}, multiple: {}", 
tableRecordsCount, uniqueKeyValuesRange, multiple);
+                result = true;
+            }
+        }
+        return result;
+    }
+}
diff --git 
a/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/position/InventoryPositionCalculator.java
 
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/position/InventoryPositionCalculator.java
index a296a55f2a0..42a9b781fd8 100644
--- 
a/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/position/InventoryPositionCalculator.java
+++ 
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/position/InventoryPositionCalculator.java
@@ -24,7 +24,10 @@ import 
org.apache.shardingsphere.data.pipeline.core.ingest.position.IngestPositi
 import 
org.apache.shardingsphere.data.pipeline.core.ingest.position.type.pk.type.StringPrimaryKeyIngestPosition;
 import 
org.apache.shardingsphere.data.pipeline.core.ingest.position.type.pk.type.UnsupportedKeyIngestPosition;
 import 
org.apache.shardingsphere.data.pipeline.core.metadata.model.PipelineColumnMetaData;
+import 
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator.InventoryDataSparsenessCalculator;
 import 
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator.position.estimated.InventoryPositionEstimatedCalculator;
+import 
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator.position.exact.IntegerPositionHandler;
+import 
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator.position.exact.InventoryPositionExactCalculator;
 import 
org.apache.shardingsphere.database.connector.core.metadata.database.metadata.option.datatype.DialectDataTypeOption;
 import 
org.apache.shardingsphere.database.connector.core.type.DatabaseTypeRegistry;
 import org.apache.shardingsphere.infra.metadata.database.schema.QualifiedTable;
@@ -57,13 +60,20 @@ public final class InventoryPositionCalculator {
         DialectDataTypeOption dataTypeOption = new 
DatabaseTypeRegistry(dataSource.getDatabaseType()).getDialectDatabaseMetaData().getDataTypeOption();
         int firstColumnDataType = uniqueKeyColumns.get(0).getDataType();
         if (dataTypeOption.isIntegerDataType(firstColumnDataType)) {
-            String uniqueKey = uniqueKeyColumns.get(0).getName();
-            Range<Long> uniqueKeyValuesRange = 
InventoryPositionEstimatedCalculator.getIntegerUniqueKeyValuesRange(qualifiedTable,
 uniqueKey, dataSource);
-            return 
InventoryPositionEstimatedCalculator.getIntegerPositions(tableRecordsCount, 
uniqueKeyValuesRange, shardingSize);
+            return getIntegerPositions();
         }
         if (1 == uniqueKeyColumns.size() && 
dataTypeOption.isStringDataType(firstColumnDataType)) {
             return Collections.singletonList(new 
StringPrimaryKeyIngestPosition(null, null));
         }
         return Collections.singletonList(new UnsupportedKeyIngestPosition());
     }
+    
+    private List<IngestPosition> getIntegerPositions() {
+        String uniqueKey = uniqueKeyColumns.get(0).getName();
+        Range<Long> uniqueKeyValuesRange = 
InventoryPositionEstimatedCalculator.getIntegerUniqueKeyValuesRange(qualifiedTable,
 uniqueKey, dataSource);
+        if 
(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(tableRecordsCount,
 uniqueKeyValuesRange)) {
+            return 
InventoryPositionExactCalculator.getPositions(qualifiedTable, uniqueKey, 
shardingSize, dataSource, new IntegerPositionHandler());
+        }
+        return 
InventoryPositionEstimatedCalculator.getIntegerPositions(tableRecordsCount, 
uniqueKeyValuesRange, shardingSize);
+    }
 }
diff --git 
a/kernel/data-pipeline/core/src/test/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculatorTest.java
 
b/kernel/data-pipeline/core/src/test/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculatorTest.java
new file mode 100644
index 00000000000..6979188719d
--- /dev/null
+++ 
b/kernel/data-pipeline/core/src/test/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculatorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package 
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator;
+
+import 
org.apache.shardingsphere.data.pipeline.core.ingest.dumper.inventory.query.Range;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class InventoryDataSparsenessCalculatorTest {
+    
+    @Test
+    void assertIsIntegerUniqueKeyDataSparse() {
+        
assertFalse(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1L, 
Range.closed(1L, 1000000L)));
+        
assertFalse(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1000000L,
 Range.closed(null, 1000000L)));
+        
assertFalse(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1000000L,
 Range.closed(1L, null)));
+        
assertFalse(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1000000L,
 Range.closed(1L, 1494000L)));
+        
assertTrue(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1000000L,
 Range.closed(1L, 1495000L)));
+        
assertTrue(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1000000L,
 Range.closed(1L, 1500000L)));
+    }
+}

Reply via email to