This is an automated email from the ASF dual-hosted git repository.
yuqi4733 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 27eabaace6 [#7185] Improvement(doris): Doris supports 'DISTRIBUTED BY
HASH(columnName) BUCKETS AUTO' (#7186)
27eabaace6 is described below
commit 27eabaace620851feda7c367e2b9a817a4cf4bd7
Author: Xiaojian Sun <[email protected]>
AuthorDate: Tue Jun 3 13:43:16 2025 +0800
[#7185] Improvement(doris): Doris supports 'DISTRIBUTED BY
HASH(columnName) BUCKETS AUTO' (#7186)
<!--
1. Title: [#<issue>] <type>(<scope>): <subject>
Examples:
- "[#123] feat(operator): support xxx"
- "[#233] fix: check null before access result in xxx"
- "[MINOR] refactor: fix typo in variable name"
- "[MINOR] docs: fix typo in README"
- "[#255] test: fix flaky test NameOfTheTest"
Reference: https://www.conventionalcommits.org/en/v1.0.0/
2. If the PR is unfinished, please mark this PR as draft.
-->
### What changes were proposed in this pull request?
Doris supports 'DISTRIBUTED BY HASH(dt_date) BUCKETS AUTO'
### Why are the changes needed?
Fix: #([7185](https://github.com/apache/gravitino/issues/7185))
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?
org.apache.gravitino.catalog.doris.utils.TestDorisUtils#testDistributedInfoPattern
org.apache.gravitino.catalog.doris.integration.test.CatalogDorisIT#testAllDistributionWithAuto
---
.../expressions/distributions/Distributions.java | 17 ++++++++++++
.../doris/operation/DorisTableOperations.java | 2 +-
.../gravitino/catalog/doris/utils/DorisUtils.java | 27 ++++++++++++++----
.../doris/integration/test/CatalogDorisIT.java | 32 ++++++++++++++++++++++
.../catalog/doris/utils/TestDorisUtils.java | 14 ++++++++++
.../apache/gravitino/dto/rel/DistributionDTO.java | 3 +-
...partitioning-distribution-sort-order-indexes.md | 12 ++++++++
7 files changed, 99 insertions(+), 8 deletions(-)
diff --git
a/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Distributions.java
b/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Distributions.java
index 905e0c5550..c7aeb62189 100644
---
a/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Distributions.java
+++
b/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Distributions.java
@@ -26,6 +26,12 @@ import org.apache.gravitino.rel.expressions.NamedReference;
/** Helper methods to create distributions to pass into Apache Gravitino. */
public class Distributions {
+ /**
+ * AUTO indicates that the number of buckets is automatically determined by
the system (without
+ * the need for manual specification).
+ */
+ public static final int AUTO = -1;
+
/** NONE is used to indicate that there is no distribution. */
public static final Distribution NONE =
new DistributionImpl(Strategy.NONE, 0, Expression.EMPTY_EXPRESSION);
@@ -74,6 +80,17 @@ public class Distributions {
return new DistributionImpl(strategy, number, expressions);
}
+ /**
+ * Create a distribution by the given strategy, the number of buckets is not
used.
+ *
+ * @param strategy The strategy to use
+ * @param expressions The expressions to distribute by
+ * @return The created distribution
+ */
+ public static Distribution auto(Strategy strategy, Expression...
expressions) {
+ return new DistributionImpl(strategy, AUTO, expressions);
+ }
+
/**
* Create a distribution on columns. Like distribute by (a) or (a, b), for
complex like
* distributing by (func(a), b) or (func(a), func(b)), please use {@link
DistributionImpl.Builder}
diff --git
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java
index f4b99e9b01..c87face2cb 100644
---
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java
+++
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java
@@ -134,7 +134,7 @@ public class DorisTableOperations extends
JdbcTableOperations {
}
if (distribution.number() != 0) {
- sqlBuilder.append(" BUCKETS ").append(distribution.number());
+ sqlBuilder.append(" BUCKETS
").append(DorisUtils.toBucketNumberString(distribution.number()));
}
properties = appendNecessaryProperties(properties);
diff --git
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
index e0543b8eee..b5a4541b57 100644
---
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
+++
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
@@ -29,6 +29,7 @@ import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.gravitino.rel.expressions.NamedReference;
import org.apache.gravitino.rel.expressions.distributions.Distribution;
+import org.apache.gravitino.rel.expressions.distributions.Distributions;
import
org.apache.gravitino.rel.expressions.distributions.Distributions.DistributionImpl;
import org.apache.gravitino.rel.expressions.distributions.Strategy;
import org.apache.gravitino.rel.expressions.literals.Literal;
@@ -48,7 +49,7 @@ public final class DorisUtils {
private static final Pattern DISTRIBUTION_INFO_PATTERN =
Pattern.compile(
- "DISTRIBUTED
BY\\s+(HASH|RANDOM)\\s*(\\(([^)]+)\\))?\\s*(BUCKETS\\s+(\\d+))?");
+ "DISTRIBUTED
BY\\s+(HASH|RANDOM)\\s*(\\(([^)]+)\\))?\\s*(BUCKETS\\s+(\\d+|AUTO))?");
private static final String LIST_PARTITION = "LIST";
private static final String RANGE_PARTITION = "RANGE";
@@ -202,11 +203,8 @@ public final class DorisUtils {
.map(f -> f.substring(1, f.length() - 1))
.toArray(String[]::new);
- // Default bucket number is 1.
- int bucketNum = 1;
- if (matcher.find(5)) {
- bucketNum = Integer.valueOf(matcher.group(5));
- }
+ // Default bucket number is 1, auto is -1.
+ int bucketNum = extractBucketNum(matcher);
return new DistributionImpl.Builder()
.withStrategy(Strategy.getByName(distributionType))
@@ -220,4 +218,21 @@ public final class DorisUtils {
throw new RuntimeException("Failed to extract distribution info in sql:" +
createTableSql);
}
+
+ private static int extractBucketNum(Matcher matcher) {
+ int bucketNum = 1;
+ if (matcher.find(5)) {
+ String bucketValue = matcher.group(5);
+ // Use -1 to indicate auto bucket.
+ bucketNum =
+ bucketValue.trim().toUpperCase().equals("AUTO")
+ ? Distributions.AUTO
+ : Integer.valueOf(bucketValue);
+ }
+ return bucketNum;
+ }
+
+ public static String toBucketNumberString(int number) {
+ return number == Distributions.AUTO ? "AUTO" : String.valueOf(number);
+ }
}
diff --git
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java
index 9d2c798ae7..11911bba47 100644
---
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java
+++
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java
@@ -61,6 +61,7 @@ import org.apache.gravitino.rel.expressions.Expression;
import org.apache.gravitino.rel.expressions.NamedReference;
import org.apache.gravitino.rel.expressions.distributions.Distribution;
import org.apache.gravitino.rel.expressions.distributions.Distributions;
+import org.apache.gravitino.rel.expressions.distributions.Strategy;
import org.apache.gravitino.rel.expressions.literals.Literal;
import org.apache.gravitino.rel.expressions.literals.Literals;
import org.apache.gravitino.rel.expressions.sorts.SortOrder;
@@ -1004,4 +1005,35 @@ public class CatalogDorisIT extends BaseIT {
tableCatalog.dropTable(tableIdentifier);
}
}
+
+ @Test
+ void testAllDistributionWithAuto() {
+ Distribution distribution =
+ Distributions.auto(Strategy.HASH,
NamedReference.field(DORIS_COL_NAME1));
+
+ String tableName =
GravitinoITUtils.genRandomName("test_distribution_table");
+ NameIdentifier tableIdentifier = NameIdentifier.of(schemaName, tableName);
+ Column[] columns = createColumns();
+ Index[] indexes = Indexes.EMPTY_INDEXES;
+ Map<String, String> properties = createTableProperties();
+ Transform[] partitioning = Transforms.EMPTY_TRANSFORM;
+ TableCatalog tableCatalog = catalog.asTableCatalog();
+ tableCatalog.createTable(
+ tableIdentifier,
+ columns,
+ table_comment,
+ properties,
+ partitioning,
+ distribution,
+ null,
+ indexes);
+ // load table
+ Table loadTable = tableCatalog.loadTable(tableIdentifier);
+
+ Assertions.assertEquals(distribution.strategy(),
loadTable.distribution().strategy());
+ Assertions.assertEquals(distribution.number(),
loadTable.distribution().number());
+ Assertions.assertArrayEquals(
+ distribution.expressions(), loadTable.distribution().expressions());
+ tableCatalog.dropTable(tableIdentifier);
+ }
}
diff --git
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
index 26d011af8e..a73f04ed6e 100644
---
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
+++
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
@@ -26,6 +26,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
+import org.apache.gravitino.rel.expressions.distributions.Distribution;
import org.apache.gravitino.rel.expressions.literals.Literal;
import org.apache.gravitino.rel.expressions.literals.Literals;
import org.apache.gravitino.rel.expressions.transforms.Transform;
@@ -171,4 +172,17 @@ public class TestDorisUtils {
partitionSqlFragment = DorisUtils.generatePartitionSqlFragment(partition);
assertEquals("PARTITION `p7` VALUES IN ((\"1\",\"2\"),(\"3\",\"4\"))",
partitionSqlFragment);
}
+
+ @Test
+ public void testDistributedInfoPattern() {
+ String createTableSql =
+ "CREATE TABLE `testTable` (\n`col1` date NOT NULL\n) ENGINE=OLAP\n
PARTITION BY RANGE(`col1`)\n()\n DISTRIBUTED BY HASH(`col1`) BUCKETS 2";
+ Distribution distribution =
DorisUtils.extractDistributionInfoFromSql(createTableSql);
+ assertEquals(distribution.number(), 2);
+
+ String createTableSqlWithAuto =
+ "CREATE TABLE `testTable` (\n`col1` date NOT NULL\n) ENGINE=OLAP\n
PARTITION BY RANGE(`col1`)\n()\n DISTRIBUTED BY HASH(`col1`) BUCKETS AUTO";
+ Distribution distribution2 =
DorisUtils.extractDistributionInfoFromSql(createTableSqlWithAuto);
+ assertEquals(distribution2.number(), -1);
+ }
}
diff --git
a/common/src/main/java/org/apache/gravitino/dto/rel/DistributionDTO.java
b/common/src/main/java/org/apache/gravitino/dto/rel/DistributionDTO.java
index 1a1eddef90..1d6887af09 100644
--- a/common/src/main/java/org/apache/gravitino/dto/rel/DistributionDTO.java
+++ b/common/src/main/java/org/apache/gravitino/dto/rel/DistributionDTO.java
@@ -163,7 +163,8 @@ public class DistributionDTO implements Distribution {
strategy = strategy == null ? Strategy.HASH : strategy;
Preconditions.checkState(args != null, "expressions cannot be null");
- Preconditions.checkState(number >= 0, "bucketNum must be greater than
0");
+ // Check if the number of buckets is greater than -1, -1 is auto.
+ Preconditions.checkState(number >= -1, "bucketNum must be greater than
-1");
return new DistributionDTO(strategy, number, args);
}
}
diff --git a/docs/table-partitioning-distribution-sort-order-indexes.md
b/docs/table-partitioning-distribution-sort-order-indexes.md
index 4ffc56f021..581d10fc3a 100644
--- a/docs/table-partitioning-distribution-sort-order-indexes.md
+++ b/docs/table-partitioning-distribution-sort-order-indexes.md
@@ -84,9 +84,21 @@ Distributions.of(Strategy.HASH, 4,
NamedReference.field("score"));
```
</TabItem>
+
+<TabItem value="java" label="Java">
+if you want to use auto distribution, you can use the following code, it will
set the number is -1.
+
+```java
+// Auto distribution with strategy and fields
+Distributions.auto(Strategy.HASH, NamedReference.field("score"));
+```
+</TabItem>
+
</Tabs>
+
+
## Sort ordering
To define a sorted order table, you should use the following three components
to construct a valid sorted order table.