This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 11374d67b2195d24778657b2b29cbbe1b2b2c395
Author: Zhengguo Yang <yangz...@gmail.com>
AuthorDate: Fri Mar 25 09:28:41 2022 +0800

    [api-change] add soft limit of String type length (#8567)
    
    1. add a config string_type_soft_limit to soft limit max length of string 
type
    2. disable using String type in Key column, partition column and
       distribution column
    3. remove String type alias BLOB for futrue use
---
 be/src/common/config.h                             |  7 ++++
 be/src/exec/tablet_sink.cpp                        | 43 ++++++++++++++--------
 be/src/olap/delete_handler.cpp                     |  4 +-
 be/src/olap/olap_define.h                          |  3 --
 be/src/olap/row_block2.cpp                         |  8 ++--
 be/src/olap/types.h                                |  4 +-
 be/src/olap/wrapper_field.cpp                      |  5 ++-
 be/src/runtime/types.h                             |  3 +-
 be/src/vec/sink/vtablet_sink.cpp                   | 29 +++++++--------
 docs/en/administrator-guide/config/be_config.md    |  5 +++
 .../sql-statements/Data Types/STRING.md            |  2 +-
 docs/zh-CN/administrator-guide/config/be_config.md |  5 +++
 .../sql-statements/Data Types/STRING.md            |  2 +-
 fe/fe-core/src/main/cup/sql_parser.cup             |  2 -
 .../java/org/apache/doris/analysis/ColumnDef.java  |  5 ++-
 .../org/apache/doris/analysis/CreateTableStmt.java | 12 ++++--
 .../apache/doris/analysis/DistributionDesc.java    |  2 +-
 .../doris/analysis/HashDistributionDesc.java       | 18 ++++++++-
 .../org/apache/doris/analysis/PartitionDesc.java   |  5 +++
 .../doris/analysis/RandomDistributionDesc.java     |  2 +-
 .../java/org/apache/doris/catalog/ScalarType.java  |  1 -
 .../java/org/apache/doris/planner/PlannerTest.java | 15 ++++++++
 22 files changed, 123 insertions(+), 59 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index 5cb5a17..9c255c2 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -704,6 +704,13 @@ CONF_String(function_service_protocol, "h2:grpc");
 // use which load balancer to select server to connect
 CONF_String(rpc_load_balancer, "rr");
 
+// a soft limit of string type length, the hard limit is 2GB - 4, but if too 
long will cause very low performance,
+// so we set a soft limit, default is 1MB
+CONF_mInt32(string_type_length_soft_limit_bytes, "1048576");
+
+CONF_Validator(string_type_length_soft_limit_bytes,
+               [](const int config) -> bool { return config > 0 && config <= 
2147483643; });
+
 } // namespace config
 
 } // namespace doris
diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp
index 0a56369..2dcd9d4 100644
--- a/be/src/exec/tablet_sink.cpp
+++ b/be/src/exec/tablet_sink.cpp
@@ -191,7 +191,8 @@ Status NodeChannel::open_wait() {
             return;
         }
         // If rpc failed, mark all tablets on this node channel as failed
-        _index_channel->mark_as_failed(this->node_id(), this->host(), 
_add_batch_closure->cntl.ErrorText(), -1);
+        _index_channel->mark_as_failed(this->node_id(), this->host(),
+                                       _add_batch_closure->cntl.ErrorText(), 
-1);
         Status st = _index_channel->check_intolerable_failure();
         if (!st.ok()) {
             _cancel_with_msg(fmt::format("{}, err: {}", channel_info(), 
st.get_error_msg()));
@@ -214,7 +215,8 @@ Status NodeChannel::open_wait() {
         if (status.ok()) {
             // if has error tablet, handle them first
             for (auto& error : result.tablet_errors()) {
-                _index_channel->mark_as_failed(this->node_id(), this->host(), 
error.msg(), error.tablet_id());
+                _index_channel->mark_as_failed(this->node_id(), this->host(), 
error.msg(),
+                                               error.tablet_id());
             }
 
             Status st = _index_channel->check_intolerable_failure();
@@ -387,7 +389,7 @@ Status NodeChannel::close_wait(RuntimeState* state) {
     while (!_add_batches_finished && !_cancelled) {
         SleepFor(MonoDelta::FromMilliseconds(1));
     }
-    _close_time_ms = UnixMillis() - _close_time_ms; 
+    _close_time_ms = UnixMillis() - _close_time_ms;
 
     if (_add_batches_finished) {
         {
@@ -676,7 +678,8 @@ OlapTableSink::~OlapTableSink() {
     // OlapTableSink::_mem_tracker and its parents.
     // But their destructions are after OlapTableSink's.
     for (auto index_channel : _channels) {
-        index_channel->for_each_node_channel([](const 
std::shared_ptr<NodeChannel>& ch) { ch->clear_all_batches(); });
+        index_channel->for_each_node_channel(
+                [](const std::shared_ptr<NodeChannel>& ch) { 
ch->clear_all_batches(); });
     }
 }
 
@@ -838,11 +841,13 @@ Status OlapTableSink::open(RuntimeState* state) {
     RETURN_IF_ERROR(Expr::open(_output_expr_ctxs, state));
 
     for (auto index_channel : _channels) {
-        index_channel->for_each_node_channel([](const 
std::shared_ptr<NodeChannel>& ch) { ch->open(); });
+        index_channel->for_each_node_channel(
+                [](const std::shared_ptr<NodeChannel>& ch) { ch->open(); });
     }
 
     for (auto index_channel : _channels) {
-        index_channel->for_each_node_channel([&index_channel](const 
std::shared_ptr<NodeChannel>& ch) {
+        index_channel->for_each_node_channel([&index_channel](
+                                                     const 
std::shared_ptr<NodeChannel>& ch) {
             auto st = ch->open_wait();
             if (!st.ok()) {
                 // The open() phase is mainly to generate DeltaWriter 
instances on the nodes corresponding to each node channel.
@@ -932,13 +937,13 @@ Status OlapTableSink::send(RuntimeState* state, RowBatch* 
input_batch) {
         uint32_t tablet_index = 0;
         if (findTabletMode != FindTabletMode::FIND_TABLET_EVERY_ROW) {
             if (_partition_to_tablet_map.find(partition->id) == 
_partition_to_tablet_map.end()) {
-                tablet_index = _partition->find_tablet(tuple,*partition);
+                tablet_index = _partition->find_tablet(tuple, *partition);
                 _partition_to_tablet_map.emplace(partition->id, tablet_index);
             } else {
                 tablet_index = _partition_to_tablet_map[partition->id];
             }
         } else {
-            tablet_index = _partition->find_tablet(tuple,*partition);
+            tablet_index = _partition->find_tablet(tuple, *partition);
         }
         _partition_ids.emplace(partition->id);
         for (int j = 0; j < partition->indexes.size(); ++j) {
@@ -976,7 +981,8 @@ Status OlapTableSink::close(RuntimeState* state, Status 
close_status) {
         {
             SCOPED_TIMER(_close_timer);
             for (auto index_channel : _channels) {
-                index_channel->for_each_node_channel([](const 
std::shared_ptr<NodeChannel>& ch) { ch->mark_close(); });
+                index_channel->for_each_node_channel(
+                        [](const std::shared_ptr<NodeChannel>& ch) { 
ch->mark_close(); });
                 num_node_channels += index_channel->num_node_channels();
             }
 
@@ -989,7 +995,8 @@ Status OlapTableSink::close(RuntimeState* state, Status 
close_status) {
                          &total_add_batch_num](const 
std::shared_ptr<NodeChannel>& ch) {
                             auto s = ch->close_wait(state);
                             if (!s.ok()) {
-                                index_channel->mark_as_failed(ch->node_id(), 
ch->host(), s.get_error_msg(), -1);
+                                index_channel->mark_as_failed(ch->node_id(), 
ch->host(),
+                                                              
s.get_error_msg(), -1);
                                 LOG(WARNING)
                                         << ch->channel_info()
                                         << ", close channel failed, err: " << 
s.get_error_msg();
@@ -1039,7 +1046,8 @@ Status OlapTableSink::close(RuntimeState* state, Status 
close_status) {
         // print log of add batch time of all node, for tracing load 
performance easily
         std::stringstream ss;
         ss << "finished to close olap table sink. load_id=" << 
print_id(_load_id)
-           << ", txn_id=" << _txn_id << ", node add batch time(ms)/wait 
execution time(ms)/close time(ms)/num: ";
+           << ", txn_id=" << _txn_id
+           << ", node add batch time(ms)/wait execution time(ms)/close 
time(ms)/num: ";
         for (auto const& pair : node_add_batch_counter_map) {
             ss << "{" << pair.first << ":(" << 
(pair.second.add_batch_execution_time_us / 1000)
                << ")(" << (pair.second.add_batch_wait_execution_time_us / 
1000) << ")("
@@ -1048,8 +1056,9 @@ Status OlapTableSink::close(RuntimeState* state, Status 
close_status) {
         LOG(INFO) << ss.str();
     } else {
         for (auto channel : _channels) {
-            channel->for_each_node_channel(
-                    [&status](const std::shared_ptr<NodeChannel>& ch) { 
ch->cancel(status.get_error_msg()); });
+            channel->for_each_node_channel([&status](const 
std::shared_ptr<NodeChannel>& ch) {
+                ch->cancel(status.get_error_msg());
+            });
         }
     }
 
@@ -1181,13 +1190,14 @@ Status OlapTableSink::_validate_data(RuntimeState* 
state, RowBatch* batch, Bitma
             }
             case TYPE_STRING: {
                 StringValue* str_val = (StringValue*)slot;
-                if (str_val->len > OLAP_STRING_MAX_LENGTH) {
+                if (str_val->len > 
config::string_type_length_soft_limit_bytes) {
                     fmt::format_to(error_msg, "{}",
                                    "the length of input is too long than 
schema. ");
                     fmt::format_to(error_msg, "column_name: {}; ", 
desc->col_name());
                     fmt::format_to(error_msg, "first 128 bytes of input str: 
[{}] ",
                                    std::string(str_val->ptr, 128));
-                    fmt::format_to(error_msg, "schema length: {}; ", 
OLAP_STRING_MAX_LENGTH);
+                    fmt::format_to(error_msg, "schema length: {}; ",
+                                   
config::string_type_length_soft_limit_bytes);
                     fmt::format_to(error_msg, "actual length: {}; ", 
str_val->len);
                     row_valid = false;
                     continue;
@@ -1258,7 +1268,8 @@ void OlapTableSink::_send_batch_process() {
 
         if (running_channels_num == 0) {
             LOG(INFO) << "all node channels are stopped(maybe 
finished/offending/cancelled), "
-                         "sender thread exit. " << print_id(_load_id);
+                         "sender thread exit. "
+                      << print_id(_load_id);
             return;
         }
     } while (!_stop_background_threads_latch.wait_for(
diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index 6dd655f..2234044 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -143,7 +143,7 @@ bool DeleteConditionHandler::is_condition_value_valid(const 
TabletColumn& column
     case OLAP_FIELD_TYPE_VARCHAR:
         return value_str.size() <= column.length();
     case OLAP_FIELD_TYPE_STRING:
-        return value_str.size() <= OLAP_STRING_MAX_LENGTH;
+        return value_str.size() <= config::string_type_length_soft_limit_bytes;
     case OLAP_FIELD_TYPE_DATE:
     case OLAP_FIELD_TYPE_DATETIME:
         return valid_datetime(value_str);
@@ -303,7 +303,7 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema,
 }
 
 bool DeleteHandler::is_filter_data(const int64_t data_version, const 
RowCursor& row) const {
-    // According to semantics, the delete condition stored in _del_conds 
should be an OR relationship, 
+    // According to semantics, the delete condition stored in _del_conds 
should be an OR relationship,
     // so as long as the data matches one of the _del_conds, it will return 
true.
     for (const auto& del_cond : _del_conds) {
         if (data_version <= del_cond.filter_version &&
diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h
index b16c614..6af5024 100644
--- a/be/src/olap/olap_define.h
+++ b/be/src/olap/olap_define.h
@@ -56,9 +56,6 @@ static const uint16_t OLAP_VARCHAR_MAX_LENGTH = 65535;
 // the max length supported for string type 2GB
 static const uint32_t OLAP_STRING_MAX_LENGTH = 2147483647;
 
-// the max length supported for vec string type 1MB
-static constexpr size_t MAX_SIZE_OF_VEC_STRING = 1024 * 1024;
-
 // the max length supported for array
 static const uint16_t OLAP_ARRAY_MAX_LENGTH = 65535;
 
diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index 5d43c94..8a99ca4 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -17,6 +17,7 @@
 
 #include "olap/row_block2.h"
 
+#include <algorithm>
 #include <sstream>
 #include <utility>
 
@@ -192,15 +193,16 @@ Status RowBlockV2::_copy_data_to_column(int cid, 
doris::vectorized::MutableColum
     }
     case OLAP_FIELD_TYPE_STRING: {
         auto column_string = assert_cast<vectorized::ColumnString*>(column);
-
+        size_t limit = config::string_type_length_soft_limit_bytes;
         for (uint16_t j = 0; j < _selected_size; ++j) {
             if (!nullable_mark_array[j]) {
                 uint16_t row_idx = _selection_vector[j];
                 auto slice = reinterpret_cast<const 
Slice*>(column_block(cid).cell_ptr(row_idx));
-                if (LIKELY(slice->size <= MAX_SIZE_OF_VEC_STRING)) {
+                if (LIKELY(slice->size <= limit)) {
                     column_string->insert_data(slice->data, slice->size);
                 } else {
-                    return Status::NotSupported("Not support string len over 
than 1MB in vec engine.");
+                    return Status::NotSupported(fmt::format(
+                            "Not support string len over than {} in vec 
engine.", limit));
                 }
             } else {
                 column_string->insert_default();
diff --git a/be/src/olap/types.h b/be/src/olap/types.h
index e734c7d..00c45d7 100644
--- a/be/src/olap/types.h
+++ b/be/src/olap/types.h
@@ -1117,9 +1117,9 @@ template <>
 struct FieldTypeTraits<OLAP_FIELD_TYPE_STRING> : public 
FieldTypeTraits<OLAP_FIELD_TYPE_CHAR> {
     static OLAPStatus from_string(void* buf, const std::string& scan_key) {
         size_t value_len = scan_key.length();
-        if (value_len > OLAP_STRING_MAX_LENGTH) {
+        if (value_len > config::string_type_length_soft_limit_bytes) {
             LOG(WARNING) << "the len of value string is too long, len=" << 
value_len
-                         << ", max_len=" << OLAP_STRING_MAX_LENGTH;
+                         << ", max_len=" << 
config::string_type_length_soft_limit_bytes;
             return OLAP_ERR_INPUT_PARAMETER_ERROR;
         }
 
diff --git a/be/src/olap/wrapper_field.cpp b/be/src/olap/wrapper_field.cpp
index be908a2..887b93b 100644
--- a/be/src/olap/wrapper_field.cpp
+++ b/be/src/olap/wrapper_field.cpp
@@ -28,8 +28,9 @@ WrapperField* WrapperField::create(const TabletColumn& 
column, uint32_t len) {
             (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == 
OLAP_FIELD_TYPE_VARCHAR ||
              column.type() == OLAP_FIELD_TYPE_HLL || column.type() == 
OLAP_FIELD_TYPE_OBJECT ||
              column.type() == OLAP_FIELD_TYPE_STRING);
-    size_t max_length = column.type() == OLAP_FIELD_TYPE_STRING ? 
OLAP_STRING_MAX_LENGTH
-                                                                : 
OLAP_VARCHAR_MAX_LENGTH;
+    size_t max_length = column.type() == OLAP_FIELD_TYPE_STRING
+                                ? config::string_type_length_soft_limit_bytes
+                                : OLAP_VARCHAR_MAX_LENGTH;
     if (is_string_type && len > max_length) {
         OLAP_LOG_WARNING("length of string parameter is too long[len=%lu, 
max_len=%lu].", len,
                          max_length);
diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h
index 9235e6c..4742565 100644
--- a/be/src/runtime/types.h
+++ b/be/src/runtime/types.h
@@ -45,7 +45,6 @@ struct TypeDescriptor {
     /// Only set if type == TYPE_CHAR or type == TYPE_VARCHAR
     int len;
     static const int MAX_VARCHAR_LENGTH = OLAP_VARCHAR_MAX_LENGTH;
-    static const int MAX_STRING_LENGTH = OLAP_STRING_MAX_LENGTH;
     static const int MAX_CHAR_LENGTH = 255;
     static const int MAX_CHAR_INLINE_LENGTH = 128;
 
@@ -99,7 +98,7 @@ struct TypeDescriptor {
     static TypeDescriptor create_string_type() {
         TypeDescriptor ret;
         ret.type = TYPE_STRING;
-        ret.len = MAX_STRING_LENGTH;
+        ret.len = config::string_type_length_soft_limit_bytes;
         return ret;
     }
 
diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index ea93c09..e9768c9 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -115,10 +115,11 @@ Status VOlapTableSink::send(RuntimeState* state, 
vectorized::Block* input_block)
         if (!_vpartition->find_partition(&block_row, &partition)) {
             RETURN_IF_ERROR(state->append_error_msg_to_file([]() -> 
std::string { return ""; },
                     [&]() -> std::string {
-                    fmt::memory_buffer buf;
-                    fmt::format_to(buf, "no partition for this tuple. 
tuple=[]");
-                    return fmt::to_string(buf);
-                    }, &stop_processing));
+                        fmt::memory_buffer buf;
+                        fmt::format_to(buf, "no partition for this tuple. 
tuple=[]");
+                        return fmt::to_string(buf);
+                    },
+                    &stop_processing));
             _number_filtered_rows++;
             if (stop_processing) {
                 return Status::EndOfFile("Encountered unqualified data, stop 
processing");
@@ -162,9 +163,11 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, 
vectorized::Block* bl
     const auto num_rows = block->rows();
     fmt::memory_buffer error_msg;
     auto set_invalid_and_append_error_msg = [&](int row) {
-         filter_bitmap->Set(row, true);
-         return state->append_error_msg_to_file([]() -> std::string { return 
""; },
-                 [&error_msg]() -> std::string { return 
fmt::to_string(error_msg); }, stop_processing);
+        filter_bitmap->Set(row, true);
+        return state->append_error_msg_to_file(
+                []() -> std::string { return ""; },
+                [&error_msg]() -> std::string { return 
fmt::to_string(error_msg); },
+                stop_processing);
     };
 
     for (int i = 0; i < _output_tuple_desc->slots().size(); ++i) {
@@ -199,12 +202,8 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, 
vectorized::Block* bl
                 const auto column_string =
                         assert_cast<const 
vectorized::ColumnString*>(real_column_ptr.get());
 
-                size_t limit = MAX_SIZE_OF_VEC_STRING;
-                if (desc->type().type != TYPE_STRING) {
-                    DCHECK(desc->type().len >= 0);
-                    limit = std::min(limit, (size_t)desc->type().len);
-                }
-
+                size_t limit =
+                        std::min(config::string_type_length_soft_limit_bytes, 
desc->type().len);
                 for (int j = 0; j < num_rows; ++j) {
                     if (!filter_bitmap->Get(j)) {
                         auto str_val = column_string->get_data_at(j);
@@ -218,14 +217,14 @@ Status VOlapTableSink::_validate_data(RuntimeState* 
state, vectorized::Block* bl
                             fmt::format_to(error_msg, "input str: [{}] ", 
str_val.to_prefix(10));
                             fmt::format_to(error_msg, "schema length: {}; ", 
desc->type().len);
                             fmt::format_to(error_msg, "actual length: {}; ", 
str_val.size);
-                        } else if (str_val.size > MAX_SIZE_OF_VEC_STRING) {
+                        } else if (str_val.size > limit) {
                             fmt::format_to(
                                     error_msg, "{}",
                                     "the length of input string is too long 
than vec schema. ");
                             fmt::format_to(error_msg, "column_name: {}; ", 
desc->col_name());
                             fmt::format_to(error_msg, "input str: [{}] ", 
str_val.to_prefix(10));
                             fmt::format_to(error_msg, "schema length: {}; ", 
desc->type().len);
-                            fmt::format_to(error_msg, "limit length: {}; ", 
MAX_SIZE_OF_VEC_STRING);
+                            fmt::format_to(error_msg, "limit length: {}; ", 
limit);
                             fmt::format_to(error_msg, "actual length: {}; ", 
str_val.size);
                         }
 
diff --git a/docs/en/administrator-guide/config/be_config.md 
b/docs/en/administrator-guide/config/be_config.md
index c917492..0f8bfaa 100644
--- a/docs/en/administrator-guide/config/be_config.md
+++ b/docs/en/administrator-guide/config/be_config.md
@@ -1499,3 +1499,8 @@ Translated with www.DeepL.com/Translator (free version)
 * Type: int32
 * Description: The maximum amount of data read by each OlapScanner.
 * Default: 1024
+
+### `string_type_length_soft_limit_bytes`
+* Type: int32
+* Description: A soft limit of string type length.
+* Description: 1048576
\ No newline at end of file
diff --git a/docs/en/sql-reference/sql-statements/Data Types/STRING.md 
b/docs/en/sql-reference/sql-statements/Data Types/STRING.md
index 654a7b9..9a01939 100644
--- a/docs/en/sql-reference/sql-statements/Data Types/STRING.md 
+++ b/docs/en/sql-reference/sql-statements/Data Types/STRING.md 
@@ -27,7 +27,7 @@ under the License.
 # STRING
 ## Description
 STRING (M)
-A variable length string, max legnth is 2147483643(2GB - 4).
+A variable length string, max legnth is 2147483643(2GB - 4). The length of the 
String type is also limited by the configuration 
`string_type_length_soft_limit_bytes` of be, the actual maximum length that can 
be stored take the minimum value of both, the String type can only be used in 
the value column, not in the key column and the partition and bucket columns
 
 Note: Variable length strings are stored in UTF-8 encoding, so usually English 
characters occupies 1 byte, and Chinese characters occupies 3 bytes.
 
diff --git a/docs/zh-CN/administrator-guide/config/be_config.md 
b/docs/zh-CN/administrator-guide/config/be_config.md
index 65277f2..2b86af5 100644
--- a/docs/zh-CN/administrator-guide/config/be_config.md
+++ b/docs/zh-CN/administrator-guide/config/be_config.md
@@ -1516,3 +1516,8 @@ webserver默认工作线程数
 * 类型: int32
 * 描述: 每个OlapScanner 读取的最大数据量
 * 默认值: 1024
+
+### `string_type_length_soft_limit_bytes`
+* 类型: int32
+* 描述: String 类型最大长度的软限,单位是字节
+* 默认值: 1048576
diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/STRING.md 
b/docs/zh-CN/sql-reference/sql-statements/Data Types/STRING.md
index 7b80e79..88fb608 100644
--- a/docs/zh-CN/sql-reference/sql-statements/Data Types/STRING.md      
+++ b/docs/zh-CN/sql-reference/sql-statements/Data Types/STRING.md      
@@ -27,7 +27,7 @@ under the License.
 # STRING
 ## description
     STRING
-    变长字符串,最大支持2147483643 字节(2GB-4)。用法类似VARCHAR。
+    变长字符串,最大支持2147483643 字节(2GB-4)。String类型的长度还受 be 配置  
`string_type_soft_limit`, 实际能存储的最大长度 取两者最小值,String类型只能用在value 列,不能用在 key 列和分区 
分桶列
     
     注意:变长字符串是以UTF-8编码存储的,因此通常英文字符占1个字节,中文字符占3个字节。
 
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup 
b/fe/fe-core/src/main/cup/sql_parser.cup
index da8d815..f42e864 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -4569,8 +4569,6 @@ type ::=
   {: RESULT = ScalarType.createStringType(); :}
   | KW_TEXT
   {: RESULT = ScalarType.createStringType(); :}
-  | KW_BLOB
-  {: RESULT = ScalarType.createStringType(); :}
   | KW_VARCHAR LPAREN INTEGER_LITERAL:len RPAREN
   {: ScalarType type = ScalarType.createVarcharType(len.intValue());
      type.setAssignedStrLenInColDefinition();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
index bd32419..cf61e09 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
@@ -215,7 +215,10 @@ public class ColumnDef {
                 throw new AnalysisException("Array type column default value 
only support null");
             }
         }
-
+        if (isKey() && type.getPrimitiveType() == PrimitiveType.STRING) {
+            throw new AnalysisException("String Type should not be used in key 
column[" + getName()
+                    + "].");
+        }
         if (type.getPrimitiveType() == PrimitiveType.MAP) {
             if (defaultValue.isSet && defaultValue != 
DefaultValue.NULL_DEFAULT_VALUE) {
                 throw new AnalysisException("Map type column default value 
just support null");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
index ed2b689..47a34b9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
@@ -296,7 +296,8 @@ public class CreateTableStmt extends DdlStmt {
                 }
                 if (hasAggregate) {
                     for (ColumnDef columnDef : columnDefs) {
-                        if (columnDef.getAggregateType() == null) {
+                        if (columnDef.getAggregateType() == null
+                                && 
!columnDef.getType().isScalarType(PrimitiveType.STRING)) {
                             keysColumnNames.add(columnDef.getName());
                         }
                     }
@@ -315,6 +316,9 @@ public class CreateTableStmt extends DdlStmt {
                         if (columnDef.getType().isFloatingPointType()) {
                             break;
                         }
+                        if (columnDef.getType().getPrimitiveType() == 
PrimitiveType.STRING) {
+                            break;
+                        }
                         if (columnDef.getType().getPrimitiveType() == 
PrimitiveType.VARCHAR) {
                             keysColumnNames.add(columnDef.getName());
                             break;
@@ -324,8 +328,8 @@ public class CreateTableStmt extends DdlStmt {
                     // The OLAP table must has at least one short key and the 
float and double should not be short key.
                     // So the float and double could not be the first column 
in OLAP table.
                     if (keysColumnNames.isEmpty()) {
-                        throw new AnalysisException("The olap table first 
column could not be float or double,"
-                                + " use decimal instead.");
+                        throw new AnalysisException("The olap table first 
column could not be float, double, string"
+                                + " use decimal or varchar instead.");
                     }
                     keysDesc = new KeysDesc(KeysType.DUP_KEYS, 
keysColumnNames);
                 }
@@ -422,7 +426,7 @@ public class CreateTableStmt extends DdlStmt {
             if (distributionDesc == null) {
                 throw new AnalysisException("Create olap table should contain 
distribution desc");
             }
-            distributionDesc.analyze(columnSet);
+            distributionDesc.analyze(columnSet, columnDefs);
         } else if (engineName.equalsIgnoreCase("elasticsearch")) {
             EsUtil.analyzePartitionAndDistributionDesc(partitionDesc, 
distributionDesc);
         } else {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DistributionDesc.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DistributionDesc.java
index c349185..5d5b132 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DistributionDesc.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DistributionDesc.java
@@ -40,7 +40,7 @@ public class DistributionDesc implements Writable {
 
     }
 
-    public void analyze(Set<String> colSet) throws AnalysisException {
+    public void analyze(Set<String> colSet, List<ColumnDef> columnDefs) throws 
AnalysisException {
         throw new NotImplementedException();
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/HashDistributionDesc.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/HashDistributionDesc.java
index 2379d3d..d02faa4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/HashDistributionDesc.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/HashDistributionDesc.java
@@ -22,11 +22,13 @@ import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.DistributionInfo;
 import org.apache.doris.catalog.DistributionInfo.DistributionInfoType;
 import org.apache.doris.catalog.HashDistributionInfo;
+import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.io.Text;
 
 import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -58,17 +60,29 @@ public class HashDistributionDesc extends DistributionDesc {
     }
 
     @Override
-    public void analyze(Set<String> cols) throws AnalysisException {
+    public void analyze(Set<String> colSet, List<ColumnDef> columnDefs) throws 
AnalysisException {
         if (numBucket <= 0) {
             throw new AnalysisException("Number of hash distribution should be 
larger than zero.");
         }
         if (distributionColumnNames == null || distributionColumnNames.size() 
== 0) {
             throw new AnalysisException("Number of hash column should be 
larger than zero.");
         }
+        Set<String> distColSet = 
Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER);
         for (String columnName : distributionColumnNames) {
-            if (!cols.contains(columnName)) {
+            if (!colSet.contains(columnName)) {
                 throw new AnalysisException("Distribution column(" + 
columnName + ") doesn't exist.");
             }
+            if (!distColSet.add(columnName)) {
+                throw new AnalysisException("Duplicated distribution column " 
+ columnName);
+            }
+            for (ColumnDef columnDef : columnDefs) {
+                if (columnDef.getName().equals(columnName)) {
+                    if 
(columnDef.getType().isScalarType(PrimitiveType.STRING)) {
+                        throw new AnalysisException("String Type should not be 
used in distribution column["
+                                + columnDef.getName() + "].");
+                    }
+                }
+            }
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java
index 7566cdd..83c592c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionDesc.java
@@ -21,6 +21,7 @@ import org.apache.doris.catalog.AggregateType;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.PartitionInfo;
 import org.apache.doris.catalog.PartitionType;
+import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.qe.ConnectContext;
@@ -78,6 +79,10 @@ public class PartitionDesc {
                     if (columnDef.getType().isFloatingPointType()) {
                         throw new AnalysisException("Floating point type 
column can not be partition column");
                     }
+                    if 
(columnDef.getType().isScalarType(PrimitiveType.STRING)) {
+                        throw new AnalysisException("String Type should not be 
used in partition column["
+                                + columnDef.getName() + "].");
+                    }
                     if 
(!ConnectContext.get().getSessionVariable().isAllowPartitionColumnNullable()
                             && columnDef.isAllowNull()) {
                         throw new AnalysisException("The partition column must 
be NOT NULL");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/RandomDistributionDesc.java
 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/RandomDistributionDesc.java
index 947cebb..d32af89 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/RandomDistributionDesc.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/RandomDistributionDesc.java
@@ -42,7 +42,7 @@ public class RandomDistributionDesc extends DistributionDesc {
     }
 
     @Override
-    public void analyze(Set<String> colSet) throws AnalysisException {
+    public void analyze(Set<String> colSet, List<ColumnDef> columnDefs) throws 
AnalysisException {
         if (numBucket <= 0) {
             throw new AnalysisException("Number of random distribution should 
be larger than zero.");
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java
index 9484d7a..714e067 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java
@@ -196,7 +196,6 @@ public class ScalarType extends Type {
                 return createVarcharType();
             case "STRING":
             case "TEXT":
-            case "BLOB":
                 return createStringType();
             case "HLL":
                 return createHllType();
diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java
index ff29dc2..8548ba6 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java
@@ -22,6 +22,7 @@ import org.apache.doris.analysis.CreateTableStmt;
 import org.apache.doris.analysis.ExplainOptions;
 import org.apache.doris.analysis.Expr;
 import org.apache.doris.catalog.Catalog;
+import org.apache.doris.common.AnalysisException;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.qe.StmtExecutor;
 import org.apache.doris.utframe.UtFrameUtils;
@@ -31,7 +32,9 @@ import org.apache.commons.lang3.StringUtils;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 
 import java.io.File;
 import java.util.List;
@@ -41,6 +44,9 @@ public class PlannerTest {
     private static String runningDir = "fe/mocked/DemoTest/" + 
UUID.randomUUID().toString() + "/";
     private static ConnectContext ctx;
 
+    @Rule
+    public ExpectedException expectedEx = ExpectedException.none();
+
     @After
     public void tearDown() throws Exception {
         FileUtils.deleteDirectory(new File(runningDir));
@@ -434,4 +440,13 @@ public class PlannerTest {
         compare.accept("select * from db1.tbl2 where k1 > 2.1", "select * from 
db1.tbl2 where k1 > 2");
     }
 
+    @Test
+    public void testStringType() throws Exception {
+        String createTbl1 = "create table db1.tbl1(k1 string, k2 varchar(32), 
k3 varchar(32), k4 int) "
+                + "AGGREGATE KEY(k1, k2,k3,k4) distributed by hash(k1) buckets 
3 properties('replication_num' = '1')";
+        expectedEx.expect(AnalysisException.class);
+        expectedEx.expectMessage("String Type should not be used in key 
column[k1].");
+        UtFrameUtils.parseAndAnalyzeStmt(createTbl1, ctx);
+    }
+
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to