This is an automated email from the ASF dual-hosted git repository.
zhonghongsheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/shardingsphere.git
The following commit(s) were added to refs/heads/master by this push:
new f81a1c9fa6c Support unique key first integer column exact or estimated
splitting based on data sparseness (#37542)
f81a1c9fa6c is described below
commit f81a1c9fa6cb056b641d898eaf595a07c7355348
Author: Hongsheng Zhong <[email protected]>
AuthorDate: Fri Dec 26 20:14:42 2025 +0800
Support unique key first integer column exact or estimated splitting based
on data sparseness (#37542)
* Add InventoryDataSparsenessCalculator
* Update InventoryPositionCalculator to integrate data sparseness calculator
* Update RELEASE-NOTES.md
---
RELEASE-NOTES.md | 1 +
.../core/ingest/dumper/inventory/query/Range.java | 5 ++
.../InventoryDataSparsenessCalculator.java | 60 ++++++++++++++++++++++
.../position/InventoryPositionCalculator.java | 16 ++++--
.../InventoryDataSparsenessCalculatorTest.java | 37 +++++++++++++
5 files changed, 116 insertions(+), 3 deletions(-)
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 9543c38c63c..3ff8a3bfa8a 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -68,6 +68,7 @@
1. Pipeline: InventoryDumperContextSplitter supports multi-columns unique key
first integer column splitting -
[#36935](https://github.com/apache/shardingsphere/pull/36935)
1. Pipeline: Support unique key first integer column exact splitting -
[#37517](https://github.com/apache/shardingsphere/pull/37517)
1. Pipeline: Improve InventoryPositionEstimatedCalculator: support possible
null unique key value -
[#37522](https://github.com/apache/shardingsphere/pull/37522)
+1. Pipeline: Support unique key first integer column exact or estimated
splitting based on data sparseness -
[#37542](https://github.com/apache/shardingsphere/pull/37542)
1. Encrypt: Support handling show create view result decoration in encrypt -
[#37299](https://github.com/apache/shardingsphere/pull/37299)
1. JDBC: Enhance ResultSetUtils to support flexible string date/time
conversions - [37424](https://github.com/apache/shardingsphere/pull/37424)
diff --git
a/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/ingest/dumper/inventory/query/Range.java
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/ingest/dumper/inventory/query/Range.java
index 48c8274037c..031582935b1 100644
---
a/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/ingest/dumper/inventory/query/Range.java
+++
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/ingest/dumper/inventory/query/Range.java
@@ -59,4 +59,9 @@ public final class Range<T> {
public static <T> Range<T> openClosed(final T lowerBound, final T
upperBound) {
return new Range<T>(lowerBound, false, upperBound);
}
+
+ @Override
+ public String toString() {
+ return (lowerInclusive ? "[" : "(") + lowerBound + ", " + upperBound +
"]";
+ }
}
diff --git
a/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculator.java
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculator.java
new file mode 100644
index 00000000000..60d9c65a23c
--- /dev/null
+++
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculator.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator;
+
+import lombok.AccessLevel;
+import lombok.NoArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import
org.apache.shardingsphere.data.pipeline.core.ingest.dumper.inventory.query.Range;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+
+/**
+ * Inventory data sparseness calculator.
+ */
+@NoArgsConstructor(access = AccessLevel.PRIVATE)
+@Slf4j
+public final class InventoryDataSparsenessCalculator {
+
+ private static final long EXACT_SPLITTING_RECORDS_COUNT_THRESHOLD =
1000000L;
+
+ private static final BigDecimal MULTIPLE_THRESHOLD = new
BigDecimal("1.50");
+
+ /**
+ * Is integer unique key data sparse.
+ *
+ * @param tableRecordsCount table records count
+ * @param uniqueKeyValuesRange unique key values range
+ * @return true if sparse
+ */
+ public static boolean isIntegerUniqueKeyDataSparse(final long
tableRecordsCount, final Range<Long> uniqueKeyValuesRange) {
+ boolean result = false;
+ Long lowerValue = uniqueKeyValuesRange.getLowerBound();
+ Long upperValue = uniqueKeyValuesRange.getUpperBound();
+ if (tableRecordsCount >= EXACT_SPLITTING_RECORDS_COUNT_THRESHOLD &&
null != lowerValue && null != upperValue) {
+ BigDecimal multiple =
BigDecimal.valueOf(upperValue).subtract(BigDecimal.valueOf(lowerValue)).add(BigDecimal.ONE)
+ .divide(BigDecimal.valueOf(tableRecordsCount), 2,
RoundingMode.HALF_UP);
+ if (multiple.compareTo(MULTIPLE_THRESHOLD) >= 0) {
+ log.info("Table is sparse for integer unique key, table
records count: {}, unique key values range: {}, multiple: {}",
tableRecordsCount, uniqueKeyValuesRange, multiple);
+ result = true;
+ }
+ }
+ return result;
+ }
+}
diff --git
a/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/position/InventoryPositionCalculator.java
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/position/InventoryPositionCalculator.java
index a296a55f2a0..42a9b781fd8 100644
---
a/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/position/InventoryPositionCalculator.java
+++
b/kernel/data-pipeline/core/src/main/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/position/InventoryPositionCalculator.java
@@ -24,7 +24,10 @@ import
org.apache.shardingsphere.data.pipeline.core.ingest.position.IngestPositi
import
org.apache.shardingsphere.data.pipeline.core.ingest.position.type.pk.type.StringPrimaryKeyIngestPosition;
import
org.apache.shardingsphere.data.pipeline.core.ingest.position.type.pk.type.UnsupportedKeyIngestPosition;
import
org.apache.shardingsphere.data.pipeline.core.metadata.model.PipelineColumnMetaData;
+import
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator.InventoryDataSparsenessCalculator;
import
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator.position.estimated.InventoryPositionEstimatedCalculator;
+import
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator.position.exact.IntegerPositionHandler;
+import
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator.position.exact.InventoryPositionExactCalculator;
import
org.apache.shardingsphere.database.connector.core.metadata.database.metadata.option.datatype.DialectDataTypeOption;
import
org.apache.shardingsphere.database.connector.core.type.DatabaseTypeRegistry;
import org.apache.shardingsphere.infra.metadata.database.schema.QualifiedTable;
@@ -57,13 +60,20 @@ public final class InventoryPositionCalculator {
DialectDataTypeOption dataTypeOption = new
DatabaseTypeRegistry(dataSource.getDatabaseType()).getDialectDatabaseMetaData().getDataTypeOption();
int firstColumnDataType = uniqueKeyColumns.get(0).getDataType();
if (dataTypeOption.isIntegerDataType(firstColumnDataType)) {
- String uniqueKey = uniqueKeyColumns.get(0).getName();
- Range<Long> uniqueKeyValuesRange =
InventoryPositionEstimatedCalculator.getIntegerUniqueKeyValuesRange(qualifiedTable,
uniqueKey, dataSource);
- return
InventoryPositionEstimatedCalculator.getIntegerPositions(tableRecordsCount,
uniqueKeyValuesRange, shardingSize);
+ return getIntegerPositions();
}
if (1 == uniqueKeyColumns.size() &&
dataTypeOption.isStringDataType(firstColumnDataType)) {
return Collections.singletonList(new
StringPrimaryKeyIngestPosition(null, null));
}
return Collections.singletonList(new UnsupportedKeyIngestPosition());
}
+
+ private List<IngestPosition> getIntegerPositions() {
+ String uniqueKey = uniqueKeyColumns.get(0).getName();
+ Range<Long> uniqueKeyValuesRange =
InventoryPositionEstimatedCalculator.getIntegerUniqueKeyValuesRange(qualifiedTable,
uniqueKey, dataSource);
+ if
(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(tableRecordsCount,
uniqueKeyValuesRange)) {
+ return
InventoryPositionExactCalculator.getPositions(qualifiedTable, uniqueKey,
shardingSize, dataSource, new IntegerPositionHandler());
+ }
+ return
InventoryPositionEstimatedCalculator.getIntegerPositions(tableRecordsCount,
uniqueKeyValuesRange, shardingSize);
+ }
}
diff --git
a/kernel/data-pipeline/core/src/test/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculatorTest.java
b/kernel/data-pipeline/core/src/test/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculatorTest.java
new file mode 100644
index 00000000000..6979188719d
--- /dev/null
+++
b/kernel/data-pipeline/core/src/test/java/org/apache/shardingsphere/data/pipeline/core/preparer/inventory/calculator/InventoryDataSparsenessCalculatorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package
org.apache.shardingsphere.data.pipeline.core.preparer.inventory.calculator;
+
+import
org.apache.shardingsphere.data.pipeline.core.ingest.dumper.inventory.query.Range;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class InventoryDataSparsenessCalculatorTest {
+
+ @Test
+ void assertIsIntegerUniqueKeyDataSparse() {
+
assertFalse(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1L,
Range.closed(1L, 1000000L)));
+
assertFalse(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1000000L,
Range.closed(null, 1000000L)));
+
assertFalse(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1000000L,
Range.closed(1L, null)));
+
assertFalse(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1000000L,
Range.closed(1L, 1494000L)));
+
assertTrue(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1000000L,
Range.closed(1L, 1495000L)));
+
assertTrue(InventoryDataSparsenessCalculator.isIntegerUniqueKeyDataSparse(1000000L,
Range.closed(1L, 1500000L)));
+ }
+}