This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-c108335-hive-sql
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-c108335-hive-sql by
this push:
new 37f916607de [Fix](Serde) Support hive compatible output format #49036
37f916607de is described below
commit 37f916607de97456d1593819f2d10350e2864613
Author: BePPPower <[email protected]>
AuthorDate: Wed Mar 12 15:33:12 2025 +0800
[Fix](Serde) Support hive compatible output format #49036
---
be/src/vec/data_types/serde/data_type_array_serde.cpp | 3 ++-
be/src/vec/data_types/serde/data_type_map_serde.cpp | 3 ++-
be/src/vec/data_types/serde/data_type_number_serde.cpp | 9 ++++++++-
be/src/vec/data_types/serde/data_type_serde.h | 15 +++++++++++++++
be/src/vec/data_types/serde/data_type_struct_serde.cpp | 3 ++-
be/src/vec/sink/vmysql_result_writer.cpp | 16 ++++++++++++++++
.../java/org/apache/doris/nereids/NereidsPlanner.java | 1 +
.../main/java/org/apache/doris/qe/SessionVariable.java | 8 ++++++--
gensrc/thrift/PaloInternalService.thrift | 3 ++-
9 files changed, 54 insertions(+), 7 deletions(-)
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index d654e3ae22d..ea748cdd6c4 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -331,7 +331,8 @@ Status DataTypeArraySerDe::_write_column_to_mysql(const
IColumn& column,
const auto end_arr_element = offsets[row_idx_of_col_arr];
for (auto j = begin_arr_element; j < end_arr_element; ++j) {
if (j != begin_arr_element) {
- if (0 != result.push_string(", ", 2)) {
+ if (0 != result.push_string(options.mysql_collection_delim.c_str(),
+
options.mysql_collection_delim.size())) {
return Status::InternalError("pack mysql buffer failed.");
}
}
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp
b/be/src/vec/data_types/serde/data_type_map_serde.cpp
index 7a4921623f3..d2c311b70d9 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp
@@ -419,7 +419,8 @@ Status DataTypeMapSerDe::_write_column_to_mysql(const
IColumn& column,
const auto& offsets = map_column.get_offsets();
for (auto j = offsets[col_index - 1]; j < offsets[col_index]; ++j) {
if (j != offsets[col_index - 1]) {
- if (0 != result.push_string(", ", 2)) {
+ if (0 != result.push_string(options.mysql_collection_delim.c_str(),
+
options.mysql_collection_delim.size())) {
return Status::InternalError("pack mysql buffer failed.");
}
}
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index d58439c9dd8..69e3271715f 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -277,8 +277,15 @@ Status
DataTypeNumberSerDe<T>::_write_column_to_mysql(const IColumn& column,
int buf_ret = 0;
auto& data = assert_cast<const ColumnType&>(column).get_data();
const auto col_index = index_check_const(row_idx, col_const);
- if constexpr (std::is_same_v<T, Int8> || std::is_same_v<T, UInt8>) {
+ if constexpr (std::is_same_v<T, Int8>) {
buf_ret = result.push_tinyint(data[col_index]);
+ } else if constexpr (std::is_same_v<T, UInt8>) {
+ if (options.is_bool_value_num) {
+ buf_ret = result.push_tinyint(data[col_index]);
+ } else {
+ std::string bool_value = data[col_index] ? "true" : "false";
+ result.push_string(bool_value.c_str(), bool_value.size());
+ }
} else if constexpr (std::is_same_v<T, Int16> || std::is_same_v<T,
UInt16>) {
buf_ret = result.push_smallint(data[col_index]);
} else if constexpr (std::is_same_v<T, Int32> || std::is_same_v<T,
UInt32>) {
diff --git a/be/src/vec/data_types/serde/data_type_serde.h
b/be/src/vec/data_types/serde/data_type_serde.h
index f54c4604499..a53c3dd5136 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -180,6 +180,21 @@ public:
const char* nested_string_wrapper;
int wrapper_len;
+ /**
+ * mysql_collection_delim is used to separate elements in collection,
such as array, map, struct
+ * It is used to write to mysql.
+ */
+ std::string mysql_collection_delim = ", ";
+
+ /**
+ * is_bool_value_num is used to display bool value in collection, such
as array, map, struct
+ * eg, if set to true, the array<true> will be:
+ * [1]
+ * if set to false, the array<true> will be:
+ * [true]
+ */
+ bool is_bool_value_num = true;
+
[[nodiscard]] char get_collection_delimiter(
int hive_text_complex_type_delimiter_level) const {
CHECK(0 <= hive_text_complex_type_delimiter_level &&
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index ead4d0b2088..8cbfb3d360c 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -349,7 +349,8 @@ Status DataTypeStructSerDe::_write_column_to_mysql(const
IColumn& column,
bool begin = true;
for (size_t j = 0; j < elem_serdes_ptrs.size(); ++j) {
if (!begin) {
- if (0 != result.push_string(", ", 2)) {
+ if (0 != result.push_string(options.mysql_collection_delim.c_str(),
+
options.mysql_collection_delim.size())) {
return Status::InternalError("pack mysql buffer failed.");
}
}
diff --git a/be/src/vec/sink/vmysql_result_writer.cpp
b/be/src/vec/sink/vmysql_result_writer.cpp
index ac04c6367cb..dabf79960a8 100644
--- a/be/src/vec/sink/vmysql_result_writer.cpp
+++ b/be/src/vec/sink/vmysql_result_writer.cpp
@@ -163,6 +163,8 @@ Status VMysqlResultWriter<is_binary_format>::_set_options(
_options.map_key_delim = ':';
_options.null_format = "null";
_options.null_len = 4;
+ _options.mysql_collection_delim = ", ";
+ _options.is_bool_value_num = true;
break;
case TSerdeDialect::PRESTO:
// eg:
@@ -173,6 +175,20 @@ Status VMysqlResultWriter<is_binary_format>::_set_options(
_options.map_key_delim = '=';
_options.null_format = "NULL";
_options.null_len = 4;
+ _options.mysql_collection_delim = ", ";
+ _options.is_bool_value_num = true;
+ break;
+ case TSerdeDialect::HIVE:
+ // eg:
+ // array: ["abc","def","",null]
+ // map: {"k1":null,"k2":"v3"}
+ _options.nested_string_wrapper = "\"";
+ _options.wrapper_len = 1;
+ _options.map_key_delim = ':';
+ _options.null_format = "null";
+ _options.null_len = 4;
+ _options.mysql_collection_delim = ",";
+ _options.is_bool_value_num = false;
break;
default:
return Status::InternalError("unknown serde dialect: {}",
serde_dialect);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
index 56adf5f2f82..5f6b74a597b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
@@ -810,6 +810,7 @@ public class NereidsPlanner extends Planner {
statementContext.setFormatOptions(FormatOptions.getForPresto());
break;
case "doris":
+ case "hive":
statementContext.setFormatOptions(FormatOptions.getDefault());
break;
default:
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 1a502151278..04279c665d7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -4511,8 +4511,10 @@ public class SessionVariable implements Serializable,
Writable {
throw new UnsupportedOperationException("serdeDialect value is
empty");
}
- if (!serdeDialect.equalsIgnoreCase("doris") &&
!serdeDialect.equalsIgnoreCase("presto")
- && !serdeDialect.equalsIgnoreCase("trino")) {
+ if (!serdeDialect.equalsIgnoreCase("doris")
+ && !serdeDialect.equalsIgnoreCase("presto")
+ && !serdeDialect.equalsIgnoreCase("trino")
+ && !serdeDialect.equalsIgnoreCase("hive")) {
LOG.warn("serdeDialect value is invalid, the invalid value is {}",
serdeDialect);
throw new UnsupportedOperationException(
"sqlDialect value is invalid, the invalid value is " +
serdeDialect);
@@ -4685,6 +4687,8 @@ public class SessionVariable implements Serializable,
Writable {
case "presto":
case "trino":
return TSerdeDialect.PRESTO;
+ case "hive":
+ return TSerdeDialect.HIVE;
default:
throw new IllegalArgumentException("Unknown serde dialect: " +
serdeDialect);
}
diff --git a/gensrc/thrift/PaloInternalService.thrift
b/gensrc/thrift/PaloInternalService.thrift
index d2805ba83cb..0bf401122a1 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -82,7 +82,8 @@ struct TResourceLimit {
enum TSerdeDialect {
DORIS = 0,
- PRESTO = 1
+ PRESTO = 1,
+ HIVE = 2
}
// Query options that correspond to PaloService.PaloQueryOptions,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]