This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6b50368d384 [fix](default value) enable HLL_EMPTY_DEFAULT_VALUE 
(#34447)
6b50368d384 is described below

commit 6b50368d38416f83a3feb0258ad39768ba09db7d
Author: yi wang <48236141+my-vegetable-has-explo...@users.noreply.github.com>
AuthorDate: Fri May 17 16:08:23 2024 +0800

    [fix](default value) enable HLL_EMPTY_DEFAULT_VALUE (#34447)
    
    The goal of this pr is to make it possible to use `HLL_EMPTY()` to fill 
missing hll column.  As shown in `test_default_hll.groovy`,  if we don't 
specify hll column, it will use HLL_EMPTY to fill it.
    
    
https://github.com/apache/doris/blob/f180d90bb8db1af8bee43e095de55cfe72e95ecc/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java#L362-L367
    
    Existing analysis will add HLL_EMPTY_DEFAULT_VALUE as default value for hll 
column, but some relative problem would occurs if we didn't pass `hll_hash(x)` 
... for this column.
    
    This pr add  `defaultValueExprDef` for HLL_EMPTY_DEFAULT_VALUE (whick is 
used in streamload mode) and make default hll column pass the analysis for 
`NativeInsertStmt` and `Load`.
---
 .../java/org/apache/doris/analysis/ColumnDef.java  |  3 +-
 .../apache/doris/analysis/NativeInsertStmt.java    |  3 +-
 .../apache/doris/datasource/FileQueryScanNode.java |  2 +-
 .../org/apache/doris/datasource/FileScanNode.java  |  8 ++
 .../trees/plans/commands/info/DefaultValue.java    |  3 +-
 .../org/apache/doris/planner/FileLoadScanNode.java |  3 +-
 .../data/correctness_p0/test_default_hll.out       | 29 +++++++
 .../correctness_p0/test_default_hll_streamload.csv |  2 +
 .../suites/correctness_p0/test_default_hll.groovy  | 99 ++++++++++++++++++++++
 9 files changed, 147 insertions(+), 5 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
index 46a48339d54..efb109a9233 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
@@ -98,6 +98,7 @@ public class ColumnDef {
         // default "CURRENT_TIMESTAMP", only for DATETIME type
         public static String CURRENT_TIMESTAMP = "CURRENT_TIMESTAMP";
         public static String NOW = "now";
+        public static String HLL_EMPTY = "HLL_EMPTY";
         public static DefaultValue CURRENT_TIMESTAMP_DEFAULT_VALUE = new 
DefaultValue(true, CURRENT_TIMESTAMP, NOW);
         // no default value
         public static DefaultValue NOT_SET = new DefaultValue(false, null);
@@ -105,7 +106,7 @@ public class ColumnDef {
         public static DefaultValue NULL_DEFAULT_VALUE = new DefaultValue(true, 
null);
         public static String ZERO = new String(new byte[] {0});
         // default "value", "0" means empty hll
-        public static DefaultValue HLL_EMPTY_DEFAULT_VALUE = new 
DefaultValue(true, ZERO);
+        public static DefaultValue HLL_EMPTY_DEFAULT_VALUE = new 
DefaultValue(true, ZERO, HLL_EMPTY);
         // default "value", "0" means empty bitmap
         public static DefaultValue BITMAP_EMPTY_DEFAULT_VALUE = new 
DefaultValue(true, ZERO);
         // default "value", "[]" means empty array
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
index ab1aaad679a..c4ef8f6597f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
@@ -590,7 +590,8 @@ public class NativeInsertStmt extends InsertStmt {
             }
             // hll column must in mentionedColumns
             for (Column col : targetTable.getBaseSchema()) {
-                if (col.getType().isObjectStored() && 
!mentionedColumns.contains(col.getName())) {
+                if (col.getType().isObjectStored() && !col.hasDefaultValue()
+                        && !mentionedColumns.contains(col.getName())) {
                     throw new AnalysisException(
                             "object-stored column " + col.getName() + " must 
in insert into columns");
                 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
index ff25464b4c9..90c24989275 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java
@@ -173,7 +173,7 @@ public abstract class FileQueryScanNode extends 
FileScanNode {
             
slotInfo.setIsFileSlot(!partitionKeys.contains(slot.getColumn().getName()));
             params.addToRequiredSlots(slotInfo);
         }
-        setDefaultValueExprs(getTargetTable(), destSlotDescByName, params, 
false);
+        setDefaultValueExprs(getTargetTable(), destSlotDescByName, null, 
params, false);
         setColumnPositionMapping();
         // For query, set src tuple id to -1.
         params.setSrcTupleId(-1);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
index 44f07718485..92cdfbcfa1f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java
@@ -181,6 +181,7 @@ public abstract class FileScanNode extends ExternalScanNode 
{
 
     protected void setDefaultValueExprs(TableIf tbl,
                                         Map<String, SlotDescriptor> 
slotDescByName,
+                                        Map<String, Expr> exprByName,
                                         TFileScanRangeParams params,
                                         boolean useVarcharAsNull) throws 
UserException {
         Preconditions.checkNotNull(tbl);
@@ -208,6 +209,13 @@ public abstract class FileScanNode extends 
ExternalScanNode {
                     expr = null;
                 }
             }
+            // if there is already an expr , just skip it.
+            // eg:
+            // (a, b, c, c=hll_hash(c)) in stream load
+            // c will be filled with hll_hash(column c) , don't need to 
specify it.
+            if (exprByName != null && 
exprByName.containsKey(column.getName())) {
+                continue;
+            }
             SlotDescriptor slotDesc = slotDescByName.get(column.getName());
             // if slot desc is null, which mean it is an unrelated slot, just 
skip.
             // eg:
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DefaultValue.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DefaultValue.java
index 8e7228705ac..48a22daf7b7 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DefaultValue.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DefaultValue.java
@@ -27,6 +27,7 @@ public class DefaultValue {
     public static String CURRENT_DATE = "CURRENT_DATE";
     public static String CURRENT_TIMESTAMP = "CURRENT_TIMESTAMP";
     public static String NOW = "now";
+    public static String HLL_EMPTY = "HLL_EMPTY";
     public static DefaultValue CURRENT_DATE_DEFAULT_VALUE = new 
DefaultValue(CURRENT_DATE, CURRENT_DATE.toLowerCase());
     public static DefaultValue CURRENT_TIMESTAMP_DEFAULT_VALUE = new 
DefaultValue(CURRENT_TIMESTAMP, NOW);
     // default null
@@ -34,7 +35,7 @@ public class DefaultValue {
     public static String ZERO = new String(new byte[] {0});
     public static String ZERO_NUMBER = "0";
     // default "value", "0" means empty hll
-    public static DefaultValue HLL_EMPTY_DEFAULT_VALUE = new 
DefaultValue(ZERO);
+    public static DefaultValue HLL_EMPTY_DEFAULT_VALUE = new 
DefaultValue(ZERO, HLL_EMPTY);
     // default "value", "0" means empty bitmap
     public static DefaultValue BITMAP_EMPTY_DEFAULT_VALUE = new 
DefaultValue(ZERO);
     // default "value", "[]" means empty array
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java
index ca0324a51d0..9ecbe31673a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/FileLoadScanNode.java
@@ -131,7 +131,8 @@ public class FileLoadScanNode extends FileScanNode {
             // FIXME(cmy): we should support set different expr for different 
file group.
             
initAndSetPrecedingFilter(context.fileGroup.getPrecedingFilterExpr(), 
context.srcTupleDescriptor, analyzer);
             initAndSetWhereExpr(context.fileGroup.getWhereExpr(), 
context.destTupleDescriptor, analyzer);
-            setDefaultValueExprs(scanProvider.getTargetTable(), 
context.srcSlotDescByName, context.params, true);
+            setDefaultValueExprs(scanProvider.getTargetTable(), 
context.srcSlotDescByName,
+                    context.exprMap, context.params, true);
             this.contexts.add(context);
         }
     }
diff --git a/regression-test/data/correctness_p0/test_default_hll.out 
b/regression-test/data/correctness_p0/test_default_hll.out
new file mode 100644
index 00000000000..c2e8103ded6
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_default_hll.out
@@ -0,0 +1,29 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !insert_into1 --
+0
+0
+0
+0
+
+-- !stream_load_csv1 --
+0
+0
+0
+0
+1
+1
+
+-- !select_1 --
+0
+0
+0
+0
+
+-- !stream_load_csv1 --
+0
+0
+0
+0
+0
+0
+
diff --git 
a/regression-test/data/correctness_p0/test_default_hll_streamload.csv 
b/regression-test/data/correctness_p0/test_default_hll_streamload.csv
new file mode 100644
index 00000000000..f4ec2d7748a
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_default_hll_streamload.csv
@@ -0,0 +1,2 @@
+5,5
+6,6
\ No newline at end of file
diff --git a/regression-test/suites/correctness_p0/test_default_hll.groovy 
b/regression-test/suites/correctness_p0/test_default_hll.groovy
new file mode 100644
index 00000000000..b21869e30e3
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_default_hll.groovy
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_default_hll") {
+    def tableName = "test_default_hll"
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+        CREATE TABLE IF NOT EXISTS ${tableName}
+        (
+            k TINYINT,
+            v1 DECIMAL(10, 2) DEFAULT "0",
+            h1 hll NOT NULL COMMENT "hll column"
+        )
+        UNIQUE KEY(K)
+        DISTRIBUTED BY HASH(k)
+        PROPERTIES("replication_num" = "1");
+    """
+
+    // test insert into.
+    sql " insert into ${tableName} (k, v1, h1) values (1, 1, hll_empty()); "
+    sql " insert into ${tableName} (k, v1, h1) values (2, 2, hll_empty()); "
+    sql " insert into ${tableName} (k, v1, h1) values (3, 3, hll_empty()); "
+    sql " insert into ${tableName} (k, v1, h1) values (4, 4, hll_empty()); "
+    sql "sync"
+    qt_insert_into1 """ select HLL_CARDINALITY(h1) from ${tableName} order by 
k; """
+
+    // test csv stream load.
+    streamLoad {
+        table "${tableName}"
+
+        set 'column_separator', ','
+        set 'columns', 'k, v1, h1=hll_hash(k)'
+
+        file 'test_default_hll_streamload.csv'
+
+        time 10000 // limit inflight 10s
+    }
+
+    sql "sync"
+
+    qt_stream_load_csv1 """ select HLL_CARDINALITY(h1) from ${tableName} order 
by k; """
+
+    // test partial update
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+        CREATE TABLE IF NOT EXISTS ${tableName}
+        (
+            k TINYINT,
+            v1 DECIMAL(10, 2) DEFAULT "0",
+            h1 hll NOT NULL COMMENT "hll column"
+        )
+        UNIQUE KEY(K)
+        DISTRIBUTED BY HASH(k)
+        PROPERTIES("replication_num" = "1");
+    """
+
+    sql "set enable_unique_key_partial_update=true;" 
+    sql "set enable_insert_strict=false;"
+
+    sql " insert into ${tableName} (k, v1) values (1, 1); "
+    sql " insert into ${tableName} (k, v1) values (2, 2); "
+    sql " insert into ${tableName} (k, v1) values (3, 3); "
+    sql " insert into ${tableName} (k, v1) values (4, 4); "
+    sql "sync"
+
+    qt_select_1 "select HLL_CARDINALITY(h1) from ${tableName} order by k;"
+
+    streamLoad {
+        table "${tableName}"
+
+        set 'partial_columns', 'true'
+        set 'column_separator', ','
+        set 'columns', 'k, v1'
+
+        file 'test_default_hll_streamload.csv'
+
+        time 10000 // limit inflight 10s
+    }
+
+    sql "sync"
+
+    qt_stream_load_csv1 """ select HLL_CARDINALITY(h1) from ${tableName} order 
by k; """
+
+} 
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to