This is an automated email from the ASF dual-hosted git repository.

yuqi4733 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
     new 27eabaace6 [#7185]  Improvement(doris): Doris supports 'DISTRIBUTED BY 
HASH(columnName) BUCKETS AUTO' (#7186)
27eabaace6 is described below

commit 27eabaace620851feda7c367e2b9a817a4cf4bd7
Author: Xiaojian Sun <[email protected]>
AuthorDate: Tue Jun 3 13:43:16 2025 +0800

    [#7185]  Improvement(doris): Doris supports 'DISTRIBUTED BY 
HASH(columnName) BUCKETS AUTO' (#7186)
    
    <!--
    1. Title: [#<issue>] <type>(<scope>): <subject>
       Examples:
         - "[#123] feat(operator): support xxx"
         - "[#233] fix: check null before access result in xxx"
         - "[MINOR] refactor: fix typo in variable name"
         - "[MINOR] docs: fix typo in README"
         - "[#255] test: fix flaky test NameOfTheTest"
       Reference: https://www.conventionalcommits.org/en/v1.0.0/
    2. If the PR is unfinished, please mark this PR as draft.
    -->
    
    ### What changes were proposed in this pull request?
    
    Doris supports 'DISTRIBUTED BY HASH(dt_date) BUCKETS AUTO'
    
    ### Why are the changes needed?
    
    Fix: #([7185](https://github.com/apache/gravitino/issues/7185))
    
    ### Does this PR introduce _any_ user-facing change?
    
    N/A
    
    ### How was this patch tested?
    
    
    
org.apache.gravitino.catalog.doris.utils.TestDorisUtils#testDistributedInfoPattern
    
    
org.apache.gravitino.catalog.doris.integration.test.CatalogDorisIT#testAllDistributionWithAuto
---
 .../expressions/distributions/Distributions.java   | 17 ++++++++++++
 .../doris/operation/DorisTableOperations.java      |  2 +-
 .../gravitino/catalog/doris/utils/DorisUtils.java  | 27 ++++++++++++++----
 .../doris/integration/test/CatalogDorisIT.java     | 32 ++++++++++++++++++++++
 .../catalog/doris/utils/TestDorisUtils.java        | 14 ++++++++++
 .../apache/gravitino/dto/rel/DistributionDTO.java  |  3 +-
 ...partitioning-distribution-sort-order-indexes.md | 12 ++++++++
 7 files changed, 99 insertions(+), 8 deletions(-)

diff --git 
a/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Distributions.java
 
b/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Distributions.java
index 905e0c5550..c7aeb62189 100644
--- 
a/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Distributions.java
+++ 
b/api/src/main/java/org/apache/gravitino/rel/expressions/distributions/Distributions.java
@@ -26,6 +26,12 @@ import org.apache.gravitino.rel.expressions.NamedReference;
 /** Helper methods to create distributions to pass into Apache Gravitino. */
 public class Distributions {
 
+  /**
+   * AUTO indicates that the number of buckets is automatically determined by 
the system (without
+   * the need for manual specification).
+   */
+  public static final int AUTO = -1;
+
   /** NONE is used to indicate that there is no distribution. */
   public static final Distribution NONE =
       new DistributionImpl(Strategy.NONE, 0, Expression.EMPTY_EXPRESSION);
@@ -74,6 +80,17 @@ public class Distributions {
     return new DistributionImpl(strategy, number, expressions);
   }
 
+  /**
+   * Create a distribution by the given strategy, the number of buckets is not 
used.
+   *
+   * @param strategy The strategy to use
+   * @param expressions The expressions to distribute by
+   * @return The created distribution
+   */
+  public static Distribution auto(Strategy strategy, Expression... 
expressions) {
+    return new DistributionImpl(strategy, AUTO, expressions);
+  }
+
   /**
    * Create a distribution on columns. Like distribute by (a) or (a, b), for 
complex like
    * distributing by (func(a), b) or (func(a), func(b)), please use {@link 
DistributionImpl.Builder}
diff --git 
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java
 
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java
index f4b99e9b01..c87face2cb 100644
--- 
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java
+++ 
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/operation/DorisTableOperations.java
@@ -134,7 +134,7 @@ public class DorisTableOperations extends 
JdbcTableOperations {
     }
 
     if (distribution.number() != 0) {
-      sqlBuilder.append(" BUCKETS ").append(distribution.number());
+      sqlBuilder.append(" BUCKETS 
").append(DorisUtils.toBucketNumberString(distribution.number()));
     }
 
     properties = appendNecessaryProperties(properties);
diff --git 
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
 
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
index e0543b8eee..b5a4541b57 100644
--- 
a/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
+++ 
b/catalogs/catalog-jdbc-doris/src/main/java/org/apache/gravitino/catalog/doris/utils/DorisUtils.java
@@ -29,6 +29,7 @@ import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import org.apache.gravitino.rel.expressions.NamedReference;
 import org.apache.gravitino.rel.expressions.distributions.Distribution;
+import org.apache.gravitino.rel.expressions.distributions.Distributions;
 import 
org.apache.gravitino.rel.expressions.distributions.Distributions.DistributionImpl;
 import org.apache.gravitino.rel.expressions.distributions.Strategy;
 import org.apache.gravitino.rel.expressions.literals.Literal;
@@ -48,7 +49,7 @@ public final class DorisUtils {
 
   private static final Pattern DISTRIBUTION_INFO_PATTERN =
       Pattern.compile(
-          "DISTRIBUTED 
BY\\s+(HASH|RANDOM)\\s*(\\(([^)]+)\\))?\\s*(BUCKETS\\s+(\\d+))?");
+          "DISTRIBUTED 
BY\\s+(HASH|RANDOM)\\s*(\\(([^)]+)\\))?\\s*(BUCKETS\\s+(\\d+|AUTO))?");
 
   private static final String LIST_PARTITION = "LIST";
   private static final String RANGE_PARTITION = "RANGE";
@@ -202,11 +203,8 @@ public final class DorisUtils {
                   .map(f -> f.substring(1, f.length() - 1))
                   .toArray(String[]::new);
 
-      // Default bucket number is 1.
-      int bucketNum = 1;
-      if (matcher.find(5)) {
-        bucketNum = Integer.valueOf(matcher.group(5));
-      }
+      // Default bucket number is 1, auto is -1.
+      int bucketNum = extractBucketNum(matcher);
 
       return new DistributionImpl.Builder()
           .withStrategy(Strategy.getByName(distributionType))
@@ -220,4 +218,21 @@ public final class DorisUtils {
 
     throw new RuntimeException("Failed to extract distribution info in sql:" + 
createTableSql);
   }
+
+  private static int extractBucketNum(Matcher matcher) {
+    int bucketNum = 1;
+    if (matcher.find(5)) {
+      String bucketValue = matcher.group(5);
+      // Use -1 to indicate auto bucket.
+      bucketNum =
+          bucketValue.trim().toUpperCase().equals("AUTO")
+              ? Distributions.AUTO
+              : Integer.valueOf(bucketValue);
+    }
+    return bucketNum;
+  }
+
+  public static String toBucketNumberString(int number) {
+    return number == Distributions.AUTO ? "AUTO" : String.valueOf(number);
+  }
 }
diff --git 
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java
 
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java
index 9d2c798ae7..11911bba47 100644
--- 
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java
+++ 
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/integration/test/CatalogDorisIT.java
@@ -61,6 +61,7 @@ import org.apache.gravitino.rel.expressions.Expression;
 import org.apache.gravitino.rel.expressions.NamedReference;
 import org.apache.gravitino.rel.expressions.distributions.Distribution;
 import org.apache.gravitino.rel.expressions.distributions.Distributions;
+import org.apache.gravitino.rel.expressions.distributions.Strategy;
 import org.apache.gravitino.rel.expressions.literals.Literal;
 import org.apache.gravitino.rel.expressions.literals.Literals;
 import org.apache.gravitino.rel.expressions.sorts.SortOrder;
@@ -1004,4 +1005,35 @@ public class CatalogDorisIT extends BaseIT {
       tableCatalog.dropTable(tableIdentifier);
     }
   }
+
+  @Test
+  void testAllDistributionWithAuto() {
+    Distribution distribution =
+        Distributions.auto(Strategy.HASH, 
NamedReference.field(DORIS_COL_NAME1));
+
+    String tableName = 
GravitinoITUtils.genRandomName("test_distribution_table");
+    NameIdentifier tableIdentifier = NameIdentifier.of(schemaName, tableName);
+    Column[] columns = createColumns();
+    Index[] indexes = Indexes.EMPTY_INDEXES;
+    Map<String, String> properties = createTableProperties();
+    Transform[] partitioning = Transforms.EMPTY_TRANSFORM;
+    TableCatalog tableCatalog = catalog.asTableCatalog();
+    tableCatalog.createTable(
+        tableIdentifier,
+        columns,
+        table_comment,
+        properties,
+        partitioning,
+        distribution,
+        null,
+        indexes);
+    // load table
+    Table loadTable = tableCatalog.loadTable(tableIdentifier);
+
+    Assertions.assertEquals(distribution.strategy(), 
loadTable.distribution().strategy());
+    Assertions.assertEquals(distribution.number(), 
loadTable.distribution().number());
+    Assertions.assertArrayEquals(
+        distribution.expressions(), loadTable.distribution().expressions());
+    tableCatalog.dropTable(tableIdentifier);
+  }
 }
diff --git 
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
 
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
index 26d011af8e..a73f04ed6e 100644
--- 
a/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
+++ 
b/catalogs/catalog-jdbc-doris/src/test/java/org/apache/gravitino/catalog/doris/utils/TestDorisUtils.java
@@ -26,6 +26,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Optional;
+import org.apache.gravitino.rel.expressions.distributions.Distribution;
 import org.apache.gravitino.rel.expressions.literals.Literal;
 import org.apache.gravitino.rel.expressions.literals.Literals;
 import org.apache.gravitino.rel.expressions.transforms.Transform;
@@ -171,4 +172,17 @@ public class TestDorisUtils {
     partitionSqlFragment = DorisUtils.generatePartitionSqlFragment(partition);
     assertEquals("PARTITION `p7` VALUES IN ((\"1\",\"2\"),(\"3\",\"4\"))", 
partitionSqlFragment);
   }
+
+  @Test
+  public void testDistributedInfoPattern() {
+    String createTableSql =
+        "CREATE TABLE `testTable` (\n`col1` date NOT NULL\n) ENGINE=OLAP\n 
PARTITION BY RANGE(`col1`)\n()\n DISTRIBUTED BY HASH(`col1`) BUCKETS 2";
+    Distribution distribution = 
DorisUtils.extractDistributionInfoFromSql(createTableSql);
+    assertEquals(distribution.number(), 2);
+
+    String createTableSqlWithAuto =
+        "CREATE TABLE `testTable` (\n`col1` date NOT NULL\n) ENGINE=OLAP\n 
PARTITION BY RANGE(`col1`)\n()\n DISTRIBUTED BY HASH(`col1`) BUCKETS AUTO";
+    Distribution distribution2 = 
DorisUtils.extractDistributionInfoFromSql(createTableSqlWithAuto);
+    assertEquals(distribution2.number(), -1);
+  }
 }
diff --git 
a/common/src/main/java/org/apache/gravitino/dto/rel/DistributionDTO.java 
b/common/src/main/java/org/apache/gravitino/dto/rel/DistributionDTO.java
index 1a1eddef90..1d6887af09 100644
--- a/common/src/main/java/org/apache/gravitino/dto/rel/DistributionDTO.java
+++ b/common/src/main/java/org/apache/gravitino/dto/rel/DistributionDTO.java
@@ -163,7 +163,8 @@ public class DistributionDTO implements Distribution {
       strategy = strategy == null ? Strategy.HASH : strategy;
 
       Preconditions.checkState(args != null, "expressions cannot be null");
-      Preconditions.checkState(number >= 0, "bucketNum must be greater than 
0");
+      // Check if the number of buckets is greater than -1, -1 is auto.
+      Preconditions.checkState(number >= -1, "bucketNum must be greater than 
-1");
       return new DistributionDTO(strategy, number, args);
     }
   }
diff --git a/docs/table-partitioning-distribution-sort-order-indexes.md 
b/docs/table-partitioning-distribution-sort-order-indexes.md
index 4ffc56f021..581d10fc3a 100644
--- a/docs/table-partitioning-distribution-sort-order-indexes.md
+++ b/docs/table-partitioning-distribution-sort-order-indexes.md
@@ -84,9 +84,21 @@ Distributions.of(Strategy.HASH, 4, 
NamedReference.field("score"));
 ```
 
 </TabItem>
+
+<TabItem value="java" label="Java">
+if you want to use auto distribution, you can use the following code, it will 
set the number is -1.
+
+```java
+// Auto distribution with strategy and fields
+Distributions.auto(Strategy.HASH, NamedReference.field("score"));
+```
+</TabItem>
+
 </Tabs>
 
 
+
+
 ## Sort ordering
 
 To define a sorted order table, you should use the following three components 
to construct a valid sorted order table.

Reply via email to