This is an automated email from the ASF dual-hosted git repository.

liaoxin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ecfa59152ad [test](doc) add test case for error data handling doc 
(#43468)
ecfa59152ad is described below

commit ecfa59152ad58afe208a365d4aa30a27f3e5edfd
Author: Xin Liao <liaoxin...@126.com>
AuthorDate: Fri Nov 8 20:37:21 2024 +0800

    [test](doc) add test case for error data handling doc (#43468)
---
 .../import/import-way/error-data-handling.md.out   |  24 ++
 .../import/import-way/error_data_handling.csv      |   3 +
 .../import/import-way/test_data_partial.csv        |   3 +
 .../import-way/error-data-handling.md.groovy       | 322 +++++++++++++++++++++
 4 files changed, 352 insertions(+)

diff --git 
a/regression-test/data/doc/data-operate/import/import-way/error-data-handling.md.out
 
b/regression-test/data/doc/data-operate/import/import-way/error-data-handling.md.out
new file mode 100644
index 00000000000..4585bd5c583
--- /dev/null
+++ 
b/regression-test/data/doc/data-operate/import/import-way/error-data-handling.md.out
@@ -0,0 +1,24 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+1      kevin   18      shenzhen        500     2023-07-03T12:00:01
+3      \N      \N      \N      23      2023-07-03T12:00:02
+18     \N      \N      \N      9999999 2023-07-03T12:00:03
+
+-- !select --
+3      Jack    \N      90
+4      Mike    3       \N
+5      Lucy    4       95
+5      Lucy    4       95
+
+-- !select --
+3      Jack    \N      90
+4      Mike    3       \N
+5      Lucy    4       95
+5      Lucy    4       95
+
+-- !select --
+3      Jack    \N      90
+4      Mike    3       \N
+5      Lucy    4       95
+5      Lucy    4       95
+
diff --git 
a/regression-test/data/doc/data-operate/import/import-way/error_data_handling.csv
 
b/regression-test/data/doc/data-operate/import/import-way/error_data_handling.csv
new file mode 100644
index 00000000000..84811c9b36d
--- /dev/null
+++ 
b/regression-test/data/doc/data-operate/import/import-way/error_data_handling.csv
@@ -0,0 +1,3 @@
+3,Jack,abc,90
+4,Mike,3,abc
+5,Lucy,4,95
diff --git 
a/regression-test/data/doc/data-operate/import/import-way/test_data_partial.csv 
b/regression-test/data/doc/data-operate/import/import-way/test_data_partial.csv
new file mode 100644
index 00000000000..1e25af3968a
--- /dev/null
+++ 
b/regression-test/data/doc/data-operate/import/import-way/test_data_partial.csv
@@ -0,0 +1,3 @@
+1,500,2023-07-03 12:00:01
+3,23,2023-07-03 12:00:02
+18,9999999,2023-07-03 12:00:03
diff --git 
a/regression-test/suites/doc/data-operate/import/import-way/error-data-handling.md.groovy
 
b/regression-test/suites/doc/data-operate/import/import-way/error-data-handling.md.groovy
new file mode 100644
index 00000000000..d8057f374af
--- /dev/null
+++ 
b/regression-test/suites/doc/data-operate/import/import-way/error-data-handling.md.groovy
@@ -0,0 +1,322 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_error_data_handling", "p0") {
+    def tableName = "test_error_data_handling"
+    // partial update strict mode
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+        CREATE TABLE ${tableName} (
+            id INT,
+            name VARCHAR(10),
+            age INT,
+            city VARCHAR(10),
+            balance DECIMAL(9, 0),
+            last_access_time DATETIME
+        ) 
+        UNIQUE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1")
+    """
+
+    sql """
+        INSERT INTO ${tableName} VALUES (1, 'kevin', 18, 'shenzhen', 400, 
'2023-07-01 12:00:00')
+    """
+
+    streamLoad {
+        table "${tableName}"
+        file "test_data_partial.csv"
+        set 'column_separator', ','
+        set 'columns', 'id,balance,last_access_time'
+        set 'partial_columns', 'true'
+        set 'strict_mode', 'true'
+        time 10000
+        check {result, exception, startTime, endTime ->
+            assertTrue(exception == null)
+            def json = parseJson(result)
+            assertEquals("Fail", json.Status)
+            assertTrue(json.Message.contains("[DATA_QUALITY_ERROR]too many 
filtered rows"))
+            assertEquals(3, json.NumberTotalRows)
+            assertEquals(0, json.NumberLoadedRows)
+            assertEquals(2, json.NumberFilteredRows)
+        }
+    }
+
+    streamLoad {
+        table "${tableName}"
+        file "test_data_partial.csv"
+        set 'column_separator', ','
+        set 'columns', 'id,balance,last_access_time'
+        set 'partial_columns', 'true'
+        set 'strict_mode', 'false'
+        time 10000
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+        }
+    }
+
+    qt_select "SELECT * FROM ${tableName} ORDER BY id"
+    
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+        CREATE TABLE ${tableName} (
+            id TINYINT NULL,
+            name VARCHAR(10) NULL,
+            age DECIMAL(1,0) NULL,
+            score INT NULL
+    ) 
+    DISTRIBUTED BY HASH(id) BUCKETS 1
+    PROPERTIES("replication_num" = "1")
+    """
+
+    streamLoad {
+        table "${tableName}"
+        file "error_data_handling.csv"
+        set 'column_separator', ','
+        set 'strict_mode', 'false'
+
+        time 10000 
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+        }
+    }
+
+    streamLoad {
+        table "${tableName}"
+        file "error_data_handling.csv"
+        set 'column_separator', ','
+        set 'strict_mode', 'true'
+
+        time 10000 
+        check {result, exception, startTime, endTime ->
+            assertTrue(exception == null)
+            def json = parseJson(result)
+            assertEquals("Fail", json.Status)
+            assertTrue(json.Message.contains("[DATA_QUALITY_ERROR]too many 
filtered rows"))
+            assertEquals(3, json.NumberTotalRows)
+            assertEquals(0, json.NumberLoadedRows)
+            assertEquals(2, json.NumberFilteredRows)
+        }
+    }
+
+    streamLoad {
+        table "${tableName}"
+        file "error_data_handling.csv"
+        set 'column_separator', ','
+        set 'strict_mode', 'true'
+        set 'max_filter_ratio', '0.8'
+
+        time 10000 
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+        }
+    }
+
+    qt_select "SELECT * FROM ${tableName} ORDER BY id"
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+        CREATE TABLE ${tableName} (
+            id TINYINT NULL,
+            name VARCHAR(10) NULL,
+            age DECIMAL(1,0) NULL,
+            score INT NULL
+        ) 
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1")
+    """
+
+    String ak = getS3AK()
+    String sk = getS3SK()
+    def s3BucketName = getS3BucketName()
+    def s3Endpoint = getS3Endpoint()
+    def s3Region = getS3Region()
+    def label = "test_error_data_handling" + 
UUID.randomUUID().toString().replace("-", "_") 
+    sql """
+        LOAD LABEL ${label} (
+            DATA 
INFILE("s3://${s3BucketName}/regression/load/data/error_data_handling.csv")
+            INTO TABLE ${tableName}
+            COLUMNS TERMINATED BY ","
+        )
+        WITH S3 (
+            "AWS_ACCESS_KEY" = "$ak",
+            "AWS_SECRET_KEY" = "$sk",
+            "AWS_ENDPOINT" = "${s3Endpoint}",
+            "AWS_REGION" = "${s3Region}",
+            "provider" = "${getS3Provider()}"
+        )
+        properties(
+            "strict_mode" = "false"
+        )
+    """
+    def max_try_milli_secs = 1200000
+    while (max_try_milli_secs > 0) {
+        String[][] result = sql """ show load where label="${label}" order by 
createtime desc limit 1; """
+
+        if (result[0][2].equals("FINISHED")) {
+            logger.info("Load FINISHED ${label}: $result")
+            break
+        }
+        if (result[0][2].equals("CANCELLED")) {
+            assertTrue(false, "load failed: $result")
+            break
+        }
+        Thread.sleep(1000)
+        max_try_milli_secs -= 1000
+        if (max_try_milli_secs <= 0) {
+            assertTrue(false, "load Timeout: $label")
+        }
+    }
+
+    label = "test_error_data_handling" + 
UUID.randomUUID().toString().replace("-", "_") 
+    sql """
+        LOAD LABEL ${label} (
+            DATA 
INFILE("s3://${s3BucketName}/regression/load/data/error_data_handling.csv")
+            INTO TABLE ${tableName}
+            COLUMNS TERMINATED BY ","
+        )
+        WITH S3 (
+            "AWS_ACCESS_KEY" = "$ak",
+            "AWS_SECRET_KEY" = "$sk",
+            "AWS_ENDPOINT" = "${s3Endpoint}",
+            "AWS_REGION" = "${s3Region}",
+            "provider" = "${getS3Provider()}"
+        )
+        properties(
+            "strict_mode" = "true"
+        )
+    """
+    max_try_milli_secs = 1200000
+    while (max_try_milli_secs > 0) {
+        String[][] result = sql """ show load where label="${label}" order by 
createtime desc limit 1; """
+
+        if (result[0][2].equals("FINISHED")) {
+            logger.info("Load FINISHED ${label}: $result")
+            assertTrue(false, "load should be failed but was success: $result")
+            break
+        }
+        if (result[0][2].equals("CANCELLED")) {
+            logger.info("Load FINISHED ${label}: $result")
+            break
+        }
+        Thread.sleep(1000)
+        max_try_milli_secs -= 1000
+        if (max_try_milli_secs <= 0) {
+            assertTrue(false, "load Timeout: $label")
+        }
+    }
+
+    label = "test_error_data_handling" + 
UUID.randomUUID().toString().replace("-", "_") 
+    sql """
+        LOAD LABEL ${label} (
+            DATA 
INFILE("s3://${s3BucketName}/regression/load/data/error_data_handling.csv")
+            INTO TABLE ${tableName}
+            COLUMNS TERMINATED BY ","
+        )
+        WITH S3 (
+            "AWS_ACCESS_KEY" = "$ak",
+            "AWS_SECRET_KEY" = "$sk",
+            "AWS_ENDPOINT" = "${s3Endpoint}",
+            "AWS_REGION" = "${s3Region}",
+            "provider" = "${getS3Provider()}"
+        )
+        properties(
+            "max_filter_ratio" = "0.8",
+            "strict_mode" = "true"
+        )
+    """
+    max_try_milli_secs = 1200000
+    while (max_try_milli_secs > 0) {
+        String[][] result = sql """ show load where label="${label}" order by 
createtime desc limit 1; """
+
+        if (result[0][2].equals("FINISHED")) {
+            logger.info("Load FINISHED ${label}: $result")
+            break
+        }
+        if (result[0][2].equals("CANCELLED")) {
+            assertTrue(false, "load failed: $result")
+            break
+        }
+        Thread.sleep(1000)
+        max_try_milli_secs -= 1000
+        if (max_try_milli_secs <= 0) {
+            assertTrue(false, "load Timeout: $label")
+        }
+    }
+    qt_select "SELECT * FROM ${tableName} ORDER BY id"
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+        CREATE TABLE ${tableName} (
+            id TINYINT NULL,
+            name VARCHAR(10) NULL,
+            age DECIMAL(1,0) NULL,
+            score INT NULL
+        ) 
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1")
+    """
+    sql """
+        LOAD DATA LOCAL
+        INFILE '${context.dataPath}/error_data_handling.csv'
+        INTO TABLE ${tableName}
+        COLUMNS TERMINATED BY ","
+    """
+    test {
+        sql """
+            LOAD DATA LOCAL
+            INFILE '${context.dataPath}/error_data_handling.csv'
+            INTO TABLE ${tableName}
+            COLUMNS TERMINATED BY ","
+            PROPERTIES (
+                "strict_mode" = "true"
+            );
+        """
+        exception "too many filtered rows"
+    }
+
+    sql """
+        LOAD DATA LOCAL
+        INFILE '${context.dataPath}/error_data_handling.csv'
+        INTO TABLE ${tableName}
+        COLUMNS TERMINATED BY ","
+        PROPERTIES (
+            "max_filter_ratio" = "0.8",
+            "strict_mode" = "true"
+        );
+    """
+    qt_select "SELECT * FROM ${tableName} ORDER BY id"
+
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to