This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 9bf7a0e9559dab7e55e07721b8d548bd1810f987 Author: TengJianPing <18241664+jackte...@users.noreply.github.com> AuthorDate: Sat Sep 2 00:58:48 2023 +0800 [function](bitmap) support bitmap_to_base64 and bitmap_from_base64 (#23759) --- be/src/util/bitmap_value.h | 23 ++- be/src/vec/functions/function_bitmap.cpp | 106 +++++++++++++ be/test/vec/function/function_bitmap_test.cpp | 171 +++++++++++++++++++++ be/test/vec/function/function_test_util.h | 15 ++ .../doris/catalog/BuiltinScalarFunctions.java | 4 + .../functions/scalar/BitmapFromBase64.java | 71 +++++++++ .../functions/scalar/BitmapToBase64.java | 69 +++++++++ .../expressions/visitor/ScalarFunctionVisitor.java | 10 ++ gensrc/script/doris_builtins_functions.py | 3 + .../bitmap_functions/test_bitmap_function.out | 117 +++++++++----- .../bitmap_functions/test_bitmap_function.groovy | 78 ++++++++++ 11 files changed, 627 insertions(+), 40 deletions(-) diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index 32486008c7..02a5595440 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -1229,7 +1229,7 @@ public: return; } - if (bits.size() == 1 && !config::enable_set_in_bitmap_value) { + if (bits.size() == 1) { _type = SINGLE; _sv = bits[0]; return; @@ -1247,6 +1247,27 @@ public: } } + BitmapTypeCode::type get_type_code() const { + switch (_type) { + case EMPTY: + return BitmapTypeCode::EMPTY; + case SINGLE: + if (_sv <= std::numeric_limits<uint32_t>::max()) { + return BitmapTypeCode::SINGLE32; + } else { + return BitmapTypeCode::SINGLE64; + } + case SET: + return BitmapTypeCode::SET; + case BITMAP: + if (_bitmap->is32BitsEnough()) { + return BitmapTypeCode::BITMAP32; + } else { + return BitmapTypeCode::BITMAP64; + } + } + } + template <typename T> void add_many(const T* values, const size_t count) { switch (_type) { diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 578ce0e34d..8c7d81cd9b 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -40,6 +40,7 @@ #include "util/hash_util.hpp" #include "util/murmur_hash3.h" #include "util/string_parser.hpp" +#include "util/url_coding.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" #include "vec/columns/column_array.h" @@ -250,6 +251,58 @@ struct BitmapFromString { } }; +struct NameBitmapFromBase64 { + static constexpr auto name = "bitmap_from_base64"; +}; +struct BitmapFromBase64 { + using ArgumentType = DataTypeString; + + static constexpr auto name = "bitmap_from_base64"; + + static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, + std::vector<BitmapValue>& res, NullMap& null_map, + size_t input_rows_count) { + res.reserve(input_rows_count); + if (offsets.size() == 0 && input_rows_count == 1) { + // For NULL constant + res.emplace_back(); + null_map[0] = 1; + return Status::OK(); + } + std::string decode_buff; + int last_decode_buff_len = 0; + int curr_decode_buff_len = 0; + for (size_t i = 0; i < input_rows_count; ++i) { + const char* src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); + int64_t src_size = offsets[i] - offsets[i - 1]; + if (0 != src_size % 4) { + // return Status::InvalidArgument( + // fmt::format("invalid base64: {}", std::string(src_str, src_size))); + res.emplace_back(); + null_map[i] = 1; + continue; + } + curr_decode_buff_len = src_size + 3; + if (curr_decode_buff_len > last_decode_buff_len) { + decode_buff.resize(curr_decode_buff_len); + last_decode_buff_len = curr_decode_buff_len; + } + int outlen = base64_decode(src_str, src_size, decode_buff.data()); + if (outlen < 0) { + res.emplace_back(); + null_map[i] = 1; + } else { + BitmapValue bitmap_val; + if (!bitmap_val.deserialize(decode_buff.data())) { + return Status::RuntimeError( + fmt::format("bitmap_from_base64 decode failed: base64: {}", src_str)); + } + res.emplace_back(std::move(bitmap_val)); + } + } + return Status::OK(); + } +}; struct BitmapFromArray { using ArgumentType = DataTypeArray; static constexpr auto name = "bitmap_from_array"; @@ -887,6 +940,55 @@ struct BitmapToString { } }; +struct NameBitmapToBase64 { + static constexpr auto name = "bitmap_to_base64"; +}; + +struct BitmapToBase64 { + using ReturnType = DataTypeString; + static constexpr auto TYPE_INDEX = TypeIndex::BitMap; + using Type = DataTypeBitMap::FieldType; + using ReturnColumnType = ColumnString; + using Chars = ColumnString::Chars; + using Offsets = ColumnString::Offsets; + + static Status vector(const std::vector<BitmapValue>& data, Chars& chars, Offsets& offsets) { + size_t size = data.size(); + offsets.resize(size); + size_t output_char_size = 0; + for (size_t i = 0; i < size; ++i) { + BitmapValue& bitmap_val = const_cast<BitmapValue&>(data[i]); + auto ser_size = bitmap_val.getSizeInBytes(); + output_char_size += ser_size * (int)(4.0 * ceil((double)ser_size / 3.0)); + } + ColumnString::check_chars_length(output_char_size, size); + chars.resize(output_char_size); + auto chars_data = chars.data(); + + size_t cur_ser_size = 0; + size_t last_ser_size = 0; + std::string ser_buff; + size_t encoded_offset = 0; + for (size_t i = 0; i < size; ++i) { + BitmapValue& bitmap_val = const_cast<BitmapValue&>(data[i]); + cur_ser_size = bitmap_val.getSizeInBytes(); + if (cur_ser_size > last_ser_size) { + last_ser_size = cur_ser_size; + ser_buff.resize(cur_ser_size); + } + bitmap_val.write_to(ser_buff.data()); + + int outlen = base64_encode((const unsigned char*)ser_buff.data(), cur_ser_size, + chars_data + encoded_offset); + DCHECK(outlen > 0); + + encoded_offset += (int)(4.0 * ceil((double)cur_ser_size / 3.0)); + offsets[i] = encoded_offset; + } + return Status::OK(); + } +}; + struct SubBitmap { static constexpr auto name = "sub_bitmap"; using TData1 = std::vector<BitmapValue>; @@ -1117,6 +1219,8 @@ using FunctionBitmapMin = FunctionBitmapSingle<FunctionBitmapMinImpl>; using FunctionBitmapMax = FunctionBitmapSingle<FunctionBitmapMaxImpl>; using FunctionBitmapToString = FunctionUnaryToType<BitmapToString, NameBitmapToString>; +using FunctionBitmapToBase64 = FunctionUnaryToType<BitmapToBase64, NameBitmapToBase64>; +using FunctionBitmapFromBase64 = FunctionBitmapAlwaysNull<BitmapFromBase64>; using FunctionBitmapNot = FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapNot, NameBitmapNot>; using FunctionBitmapAndNot = @@ -1137,6 +1241,8 @@ void register_function_bitmap(SimpleFunctionFactory& factory) { factory.register_function<FunctionToBitmap>(); factory.register_function<FunctionToBitmapWithCheck>(); factory.register_function<FunctionBitmapFromString>(); + factory.register_function<FunctionBitmapToBase64>(); + factory.register_function<FunctionBitmapFromBase64>(); factory.register_function<FunctionBitmapFromArray>(); factory.register_function<FunctionBitmapHash>(); factory.register_function<FunctionBitmapHash64>(); diff --git a/be/test/vec/function/function_bitmap_test.cpp b/be/test/vec/function/function_bitmap_test.cpp index 689199e5c5..34d74a549d 100644 --- a/be/test/vec/function/function_bitmap_test.cpp +++ b/be/test/vec/function/function_bitmap_test.cpp @@ -14,12 +14,16 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. +#include <gtest/gtest.h> #include <stdint.h> +#include <cstdint> #include <limits> +#include <numeric> #include <string> #include <vector> +#include "common/config.h" #include "common/status.h" #include "function_test_util.h" #include "gtest/gtest_pred_impl.h" @@ -27,6 +31,7 @@ #include "testutil/any_type.h" #include "util/bitmap_value.h" #include "vec/core/types.h" +#include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" @@ -77,6 +82,172 @@ TEST(function_bitmap_test, function_bitmap_to_string_test) { check_function<DataTypeString, true>(func_name, input_types, data_set); } +namespace doris { +namespace config { +DECLARE_Bool(enable_set_in_bitmap_value); +} +} // namespace doris +TEST(function_bitmap_test, function_bitmap_to_base64) { + config::Register::Field field("bool", "enable_set_in_bitmap_value", + &config::enable_set_in_bitmap_value, "false", false); + config::Register::_s_field_map->insert( + std::make_pair(std::string("enable_set_in_bitmap_value"), field)); + + std::string func_name = "bitmap_to_base64"; + InputTypeSet input_types = {TypeIndex::BitMap}; + + EXPECT_TRUE(config::set_config("enable_set_in_bitmap_value", "false", false, true).ok()); + std::vector<uint64_t> bits32 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32}; // SET_TYPE_THRESHOLD + 1 + std::vector<uint64_t> bits64 { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, (uint64_t)4294967296}; // SET_TYPE_THRESHOLD + 1 + + BitmapValue bitmap32_1(1); // single + BitmapValue bitmap32_2({1, 9999999}); // bitmap + BitmapValue bitmap32_3(bits32); // bitmap + + BitmapValue bitmap64_1((uint64_t)4294967296); // single + BitmapValue bitmap64_2({1, (uint64_t)4294967296}); // bitmap + BitmapValue bitmap64_3(bits64); // bitmap + + BitmapValue empty_bitmap; + + EXPECT_EQ(bitmap32_1.get_type_code(), BitmapTypeCode::SINGLE32); + EXPECT_EQ(bitmap32_2.get_type_code(), BitmapTypeCode::BITMAP32); + EXPECT_EQ(bitmap32_3.get_type_code(), BitmapTypeCode::BITMAP32); + + EXPECT_EQ(bitmap64_1.get_type_code(), BitmapTypeCode::SINGLE64); + EXPECT_EQ(bitmap64_2.get_type_code(), BitmapTypeCode::BITMAP64); + EXPECT_EQ(bitmap64_3.get_type_code(), BitmapTypeCode::BITMAP64); + + DataSet data_set = { + {{&bitmap32_1}, std::string("AQEAAAA=")}, + {{&bitmap32_2}, std::string("AjowAAACAAAAAAAAAJgAAAAYAAAAGgAAAAEAf5Y=")}, + {{&bitmap32_3}, std::string("AjswAAABAAAgAAEAAAAgAA==")}, + {{&bitmap64_1}, std::string("AwAAAAABAAAA")}, + {{&bitmap64_2}, + std::string("BAIAAAAAOjAAAAEAAAAAAAAAEAAAAAEAAQAAADowAAABAAAAAAAAABAAAAAAAA==")}, + {{&bitmap64_3}, + std::string("BAIAAAAAOzAAAAEAAB8AAQAAAB8AAQAAADowAAABAAAAAAAAABAAAAAAAA==")}, + {{&empty_bitmap}, std::string("AA==")}, + {{Null()}, Null()}}; + + check_function<DataTypeString, true>(func_name, input_types, data_set); + + EXPECT_TRUE(config::set_config("enable_set_in_bitmap_value", "true", false, true).ok()); + bitmap32_1 = BitmapValue(1); // single + bitmap32_2 = BitmapValue({1, 9999999}); // set + bitmap32_3 = BitmapValue(bits32); // bitmap + + bitmap64_1 = BitmapValue((uint64_t)4294967296); // single + bitmap64_2 = BitmapValue({1, (uint64_t)4294967296}); // set + bitmap64_3 = BitmapValue(bits64); // bitmap + + EXPECT_EQ(bitmap32_1.get_type_code(), BitmapTypeCode::SINGLE32); + EXPECT_EQ(bitmap32_2.get_type_code(), BitmapTypeCode::SET); + EXPECT_EQ(bitmap32_3.get_type_code(), BitmapTypeCode::BITMAP32); + + EXPECT_EQ(bitmap64_1.get_type_code(), BitmapTypeCode::SINGLE64); + EXPECT_EQ(bitmap64_2.get_type_code(), BitmapTypeCode::SET); + EXPECT_EQ(bitmap64_3.get_type_code(), BitmapTypeCode::BITMAP64); + + DataSet data_set2 = { + {{&bitmap32_1}, std::string("AQEAAAA=")}, + {{&bitmap32_2}, std::string("BQIBAAAAAAAAAH+WmAAAAAAA")}, + {{&bitmap32_3}, std::string("AjswAAABAAAgAAEAAAAgAA==")}, + {{&bitmap64_1}, std::string("AwAAAAABAAAA")}, + {{&bitmap64_2}, std::string("BQIAAAAAAQAAAAEAAAAAAAAA")}, + {{&bitmap64_3}, + std::string("BAIAAAAAOzAAAAEAAB8AAQAAAB8AAQAAADowAAABAAAAAAAAABAAAAAAAA==")}, + {{&empty_bitmap}, std::string("AA==")}, + {{Null()}, Null()}}; + + check_function<DataTypeString, true>(func_name, input_types, data_set2); +} + +TEST(function_bitmap_test, function_bitmap_from_base64) { + config::Register::Field field("bool", "enable_set_in_bitmap_value", + &config::enable_set_in_bitmap_value, "false", false); + config::Register::_s_field_map->insert( + std::make_pair(std::string("enable_set_in_bitmap_value"), field)); + + std::string func_name = "bitmap_from_base64"; + InputTypeSet input_types = {TypeIndex::String}; + + EXPECT_TRUE(config::set_config("enable_set_in_bitmap_value", "false", false, true).ok()); + std::string bitmap32_base64_1("AQEAAAA="); + std::string bitmap32_base64_2("AjowAAACAAAAAAAAAJgAAAAYAAAAGgAAAAEAf5Y="); + std::string bitmap32_base64_3("AjswAAABAAAgAAEAAAAgAA=="); + + std::string bitmap64_base64_1("AwAAAAABAAAA"); + std::string bitmap64_base64_2( + "BAIAAAAAOjAAAAEAAAAAAAAAEAAAAAEAAQAAADowAAABAAAAAAAAABAAAAAAAA=="); + std::string bitmap64_base64_3("BAIAAAAAOzAAAAEAAB8AAQAAAB8AAQAAADowAAABAAAAAAAAABAAAAAAAA=="); + std::string base64_empty("AA=="); + + std::vector<uint64_t> bits32 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32}; // SET_TYPE_THRESHOLD + 1 + std::vector<uint64_t> bits64 { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, (uint64_t)4294967296}; // SET_TYPE_THRESHOLD + 1 + + BitmapValue bitmap32_1(1); // single + BitmapValue bitmap32_2({1, 9999999}); // bitmap + BitmapValue bitmap32_3(bits32); // bitmap + + BitmapValue bitmap64_1((uint64_t)4294967296); // single + BitmapValue bitmap64_2({1, (uint64_t)4294967296}); // bitmap + BitmapValue bitmap64_3(bits64); // bitmap + + BitmapValue empty_bitmap; + DataSet data_set = {{{bitmap32_base64_1}, bitmap32_1}, {{bitmap32_base64_2}, bitmap32_2}, + {{bitmap32_base64_3}, bitmap32_3}, {{bitmap64_base64_1}, bitmap64_1}, + {{bitmap64_base64_2}, bitmap64_2}, {{bitmap64_base64_3}, bitmap64_3}, + {{base64_empty}, empty_bitmap}, {{Null()}, Null()}}; + + check_function<DataTypeBitMap, true>(func_name, input_types, data_set); + + EXPECT_TRUE(config::set_config("enable_set_in_bitmap_value", "true", false, true).ok()); + bitmap32_base64_1 = ("AQEAAAA="); + bitmap32_base64_2 = ("BQIBAAAAAAAAAH"); + bitmap32_base64_3 = ("AjswAAABAAAgAAEAAAAgAA=="); + + bitmap64_base64_1 = ("AwAAAAABAAAA"); + bitmap64_base64_2 = ("BQIAAAAAAQAAAAEAAAAAAAAA"); + bitmap64_base64_3 = ("BAIAAAAAOzAAAAEAAB8AAQAAAB8AAQAAADowAAABAAAAAAAAABAAAAAAAA=="); + + check_function<DataTypeBitMap, true>(func_name, input_types, data_set); + + /* sr + mysql [(none)]>select bitmap_to_base64(bitmap_from_string("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32")); + +----------------------------------------------------------------------------------------------------------------------------------+ + | bitmap_to_base64(bitmap_from_string('0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32')) | + +----------------------------------------------------------------------------------------------------------------------------------+ + | AjowAAABAAAAAAAgABAAAAAAAAEAAgADAAQABQAGAAcACAAJAAoACwAMAA0ADgAPABAAEQASABMAFAAVABYAFwAYABkAGgAbABwAHQAeAB8AIAA= | + +----------------------------------------------------------------------------------------------------------------------------------+ + + mysql [(none)]>select bitmap_to_base64(bitmap_from_string("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296")); + +--------------------------------------------------------------------------------------------------------------------------------------------------+ + | bitmap_to_base64(bitmap_from_string('0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296')) | + +--------------------------------------------------------------------------------------------------------------------------------------------------+ + | BAIAAAAAOjAAAAEAAAAAAB8AEAAAAAAAAQACAAMABAAFAAYABwAIAAkACgALAAwADQAOAA8AEAARABIAEwAUABUAFgAXABgAGQAaABsAHAAdAB4AHwABAAAAOjAAAAEAAAAAAAAAEAAAAAAA | + +--------------------------------------------------------------------------------------------------------------------------------------------------+ + */ + bitmap32_base64_3 = + ("AjowAAABAAAAAAAgABAAAAAAAAEAAgADAAQABQAGAAcACAAJAAoACwAMAA0ADgAPABAAEQASABMAFAAVABYAF" + "wAYABkAGgAbABwAHQAeAB8AIAA="); + bitmap64_base64_3 = + ("BAIAAAAAOjAAAAEAAAAAAB8AEAAAAAAAAQACAAMABAAFAAYABwAIAAkACgALAAwADQAOAA8AEAARABIAEwAUA" + "BUAFgAXABgAGQAaABsAHAAdAB4AHwABAAAAOjAAAAEAAAAAAAAAEAAAAAAA"); + data_set = {{{bitmap32_base64_3}, bitmap32_3}, {{bitmap64_base64_3}, bitmap64_3}}; + check_function<DataTypeBitMap, true>(func_name, input_types, data_set); +} + TEST(function_bitmap_test, function_bitmap_and_count) { std::string func_name = "bitmap_and_count"; InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap}; diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index c4e3f4379d..fc9fb8a60d 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -33,9 +33,12 @@ #include "testutil/any_type.h" #include "testutil/function_utils.h" #include "udf/udf.h" +#include "util/bitmap_value.h" #include "util/jsonb_utils.h" #include "vec/columns/column.h" +#include "vec/columns/column_complex.h" #include "vec/columns/column_const.h" +#include "vec/columns/column_nullable.h" #include "vec/common/string_ref.h" #include "vec/core/block.h" #include "vec/core/column_numbers.h" @@ -43,6 +46,7 @@ #include "vec/core/field.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/functions/simple_function_factory.h" @@ -313,6 +317,17 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty if constexpr (std::is_same_v<ReturnType, DataTypeDecimal<Decimal128>>) { const auto& column_data = field.get<DecimalField<Decimal128>>().get_value(); EXPECT_EQ(expect_data.value, column_data.value) << " at row " << i; + } else if constexpr (std::is_same_v<ReturnType, DataTypeBitMap>) { + const ColumnBitmap* bitmap_col = nullptr; + if constexpr (nullable) { + auto nullable_column = assert_cast<const ColumnNullable*>(column.get()); + bitmap_col = assert_cast<const ColumnBitmap*>( + nullable_column->get_nested_column_ptr().get()); + } else { + bitmap_col = assert_cast<const ColumnBitmap*>(column.get()); + } + EXPECT_EQ(expect_data.to_string(), bitmap_col->get_element(i).to_string()) + << " at row " << i; } else if constexpr (std::is_same_v<ReturnType, DataTypeFloat32> || std::is_same_v<ReturnType, DataTypeFloat64> || std::is_same_v<ReturnType, DataTypeTime>) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index 9e2733f08f..610f6fe830 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -61,6 +61,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapContain import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapCount; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapEmpty; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapFromArray; +import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapFromBase64; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapFromString; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapHasAll; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapHasAny; @@ -74,6 +75,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapOrCount import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapSubsetInRange; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapSubsetLimit; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapToArray; +import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapToBase64; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapToString; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapXor; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapXorCount; @@ -398,6 +400,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(BitmapCount.class, "bitmap_count"), scalar(BitmapEmpty.class, "bitmap_empty"), scalar(BitmapFromArray.class, "bitmap_from_array"), + scalar(BitmapFromBase64.class, "bitmap_from_base64"), scalar(BitmapFromString.class, "bitmap_from_string"), scalar(BitmapHasAll.class, "bitmap_has_all"), scalar(BitmapHasAny.class, "bitmap_has_any"), @@ -411,6 +414,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(BitmapSubsetInRange.class, "bitmap_subset_in_range"), scalar(BitmapSubsetLimit.class, "bitmap_subset_limit"), scalar(BitmapToArray.class, "bitmap_to_array"), + scalar(BitmapToBase64.class, "bitmap_to_base64"), scalar(BitmapToString.class, "bitmap_to_string"), scalar(BitmapXor.class, "bitmap_xor"), scalar(BitmapXorCount.class, "bitmap_xor_count"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitmapFromBase64.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitmapFromBase64.java new file mode 100644 index 0000000000..0994a120ec --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitmapFromBase64.java @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.BitmapType; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarcharType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'bitmap_from_string'. This class is generated by GenerateFunction. + */ +public class BitmapFromBase64 extends ScalarFunction + implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable { + + public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( + FunctionSignature.ret(BitmapType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(BitmapType.INSTANCE).args(StringType.INSTANCE) + ); + + /** + * constructor with 1 argument. + */ + public BitmapFromBase64(Expression arg) { + super("bitmap_from_base64", arg); + } + + /** + * withChildren. + */ + @Override + public BitmapFromBase64 withChildren(List<Expression> children) { + Preconditions.checkArgument(children.size() == 1); + return new BitmapFromBase64(children.get(0)); + } + + @Override + public List<FunctionSignature> getSignatures() { + return SIGNATURES; + } + + @Override + public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) { + return visitor.visitBitmapFromBase64(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitmapToBase64.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitmapToBase64.java new file mode 100644 index 0000000000..444a908c50 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitmapToBase64.java @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; +import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.BitmapType; +import org.apache.doris.nereids.types.StringType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'bitmap_to_base64'. This class is generated by GenerateFunction. + */ +public class BitmapToBase64 extends ScalarFunction + implements UnaryExpression, ExplicitlyCastableSignature, PropagateNullable { + + public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( + FunctionSignature.ret(StringType.INSTANCE).args(BitmapType.INSTANCE) + ); + + /** + * constructor with 1 argument. + */ + public BitmapToBase64(Expression arg) { + super("bitmap_to_base64", arg); + } + + /** + * withChildren. + */ + @Override + public BitmapToBase64 withChildren(List<Expression> children) { + Preconditions.checkArgument(children.size() == 1); + return new BitmapToBase64(children.get(0)); + } + + @Override + public List<FunctionSignature> getSignatures() { + return SIGNATURES; + } + + @Override + public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) { + return visitor.visitBitmapToBase64(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index 8855791793..99b198e74b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -65,6 +65,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapContain import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapCount; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapEmpty; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapFromArray; +import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapFromBase64; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapFromString; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapHasAll; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapHasAny; @@ -78,6 +79,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapOrCount import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapSubsetInRange; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapSubsetLimit; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapToArray; +import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapToBase64; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapToString; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapXor; import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapXorCount; @@ -531,6 +533,14 @@ public interface ScalarFunctionVisitor<R, C> { return visitScalarFunction(bitmapFromString, context); } + default R visitBitmapFromBase64(BitmapFromBase64 bitmapFromBase64, C context) { + return visitScalarFunction(bitmapFromBase64, context); + } + + default R visitBitmapToBase64(BitmapToBase64 bitmapToBase64, C context) { + return visitScalarFunction(bitmapToBase64, context); + } + default R visitBitmapHasAll(BitmapHasAll bitmapHasAll, C context) { return visitScalarFunction(bitmapHasAll, context); } diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 4ace08d9eb..0422303e68 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1788,6 +1788,9 @@ visible_functions = { [['bitmap_to_string'], 'STRING', ['BITMAP'], ''], [['bitmap_from_string'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NULLABLE'], [['bitmap_from_string'], 'BITMAP', ['STRING'], 'ALWAYS_NULLABLE'], + [['bitmap_to_base64'], 'STRING', ['BITMAP'], ''], + [['bitmap_from_base64'], 'BITMAP', ['VARCHAR'], 'ALWAYS_NULLABLE'], + [['bitmap_from_base64'], 'BITMAP', ['STRING'], 'ALWAYS_NULLABLE'], [['bitmap_from_array'], 'BITMAP', ['ARRAY_TINYINT'], 'ALWAYS_NULLABLE'], [['bitmap_from_array'], 'BITMAP', ['ARRAY_SMALLINT'], 'ALWAYS_NULLABLE'], [['bitmap_from_array'], 'BITMAP', ['ARRAY_INT'], 'ALWAYS_NULLABLE'], diff --git a/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out b/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out index 4eeae0ccd9..2706af1a6e 100644 --- a/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out +++ b/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out @@ -421,27 +421,6 @@ true -- !sql -- 3 --- !sql -- -\N - --- !sql -- -0 - --- !sql -- -0 - --- !sql_orthogonal_bitmap_intersect_count2 -- -0 - --- !sql_orthogonal_bitmap_intersect_count3_1 -- -2 - --- !sql_orthogonal_bitmap_intersect_count3_2 -- -2 - --- !sql_orthogonal_bitmap_intersect_count4 -- -1 - -- !sql_orthogonal_bitmap_union_count2 -- 0 @@ -495,12 +474,6 @@ true -- !sql_bitmap_intersect_check0 -- 1 --- !sql_bitmap_intersect_check1 -- -1 - --- !sql_bitmap_intersect_check2 -- -1 - -- !sql_bitmap_intersect_nereids0 -- 1 1 @@ -513,21 +486,87 @@ true -- !sql_bitmap_intersect_no_nereids1 -- 1 1 --- !sql_orthogonal_bitmap_intersect_nereids1 -- -1 1 +-- !sql_bitmap_base64_nereids0 -- +\N --- !sql_orthogonal_bitmap_intersect_not_nereids1 -- -1 1 +-- !sql_bitmap_base64_nereids1 -- --- !sql_orthogonal_bitmap_intersect_count_nereids0 -- -1 1 --- !sql_orthogonal_bitmap_intersect_count_nereids1 -- -1 1 +-- !sql_bitmap_base64_nereids2 -- +\N --- !sql_orthogonal_bitmap_intersect_count_not_nereids0 -- -1 1 +-- !sql_bitmap_base64_nereids3 -- +1 --- !sql_orthogonal_bitmap_intersect_count_not_nereids1 -- -1 1 +-- !sql_bitmap_base64_nereids4 -- +0,1,2,3 + +-- !sql_bitmap_base64_nereids5 -- +0,1,2,3,4294967296 + +-- !sql_bitmap_base64_nereids6 -- +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32 + +-- !sql_bitmap_base64_nereids7 -- +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296 + +-- !sql_bitmap_base64_nereids8 -- +1 + +-- !sql_bitmap_base64_nereids9 -- + +0 +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32 +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296 +0,1,2,3,4294967296 +1,9999999 + +-- !sql_bitmap_base64_0 -- +\N + +-- !sql_bitmap_base64_1 -- + + +-- !sql_bitmap_base64_2 -- +\N + +-- !sql_bitmap_base64_3 -- +1 + +-- !sql_bitmap_base64_4 -- +0,1,2,3 + +-- !sql_bitmap_base64_5 -- +0,1,2,3,4294967296 + +-- !sql_bitmap_base64_6 -- +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32 + +-- !sql_bitmap_base64_7 -- +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296 + +-- !sql_bitmap_base64_8 -- +1 + +-- !sql_bitmap_base64_9 -- + +0 +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32 +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296 +0,1,2,3,4294967296 +1,9999999 + +-- !sql_bitmap_base64_not_null0 -- +0 +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32 +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296 +0,1,2,3,4294967296 +1,9999999 + +-- !sql_bitmap_base64_not_null1 -- +0 +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32 +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296 +0,1,2,3,4294967296 +1,9999999 diff --git a/regression-test/suites/query_p0/sql_functions/bitmap_functions/test_bitmap_function.groovy b/regression-test/suites/query_p0/sql_functions/bitmap_functions/test_bitmap_function.groovy index daeb5baf21..4d3a831255 100644 --- a/regression-test/suites/query_p0/sql_functions/bitmap_functions/test_bitmap_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/bitmap_functions/test_bitmap_function.groovy @@ -832,4 +832,82 @@ suite("test_bitmap_function") { // select count(distinct tag) as count1, // orthogonal_bitmap_intersect_count(id_bitmap, tag, 0) as count2_bitmap from test_orthog_bitmap_intersect; // """ + + sql """ set experimental_enable_nereids_planner=true; """ + qt_sql_bitmap_base64_nereids0 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(null))); """ + qt_sql_bitmap_base64_nereids1 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("")))); """ + qt_sql_bitmap_base64_nereids2 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string(" ")))); """ + qt_sql_bitmap_base64_nereids3 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("1")))); """ + qt_sql_bitmap_base64_nereids4 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("0, 1, 2, 3")))); """ + qt_sql_bitmap_base64_nereids5 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("0, 1, 2, 3, 4294967296")))); """ + qt_sql_bitmap_base64_nereids6 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32")))) """ + qt_sql_bitmap_base64_nereids7 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296")))) """ + qt_sql_bitmap_base64_nereids8 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(to_bitmap(1)))); """ + + sql """ DROP TABLE IF EXISTS test_bitmap_base64 """ + sql """ + CREATE TABLE test_bitmap_base64 ( + dt INT(11) NULL, + id bitmap BITMAP_UNION NULL + ) ENGINE=OLAP + AGGREGATE KEY(dt) + DISTRIBUTED BY HASH(dt) BUCKETS 2 + properties ( + "replication_num" = "1" + ); + """ + sql """ + INSERT INTO + test_bitmap_base64 + VALUES + (0, to_bitmap(null)), + (1, bitmap_from_string("0")), + (2, bitmap_from_string("1,9999999")), + (3, bitmap_from_string("0, 1, 2, 3, 4294967296")), + (4, bitmap_from_string("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32")), + (5, bitmap_from_string("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296")) + ; + """ + qt_sql_bitmap_base64_nereids9 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(id))) s from test_bitmap_base64 order by s; """ + + sql """ set experimental_enable_nereids_planner=false; """ + qt_sql_bitmap_base64_0 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(null))); """ + qt_sql_bitmap_base64_1 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("")))); """ + qt_sql_bitmap_base64_2 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string(" ")))); """ + qt_sql_bitmap_base64_3 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("1")))); """ + qt_sql_bitmap_base64_4 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("0, 1, 2, 3")))); """ + qt_sql_bitmap_base64_5 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("0, 1, 2, 3, 4294967296")))); """ + qt_sql_bitmap_base64_6 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32")))) """ + qt_sql_bitmap_base64_7 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(bitmap_from_string("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296")))) """ + qt_sql_bitmap_base64_8 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(to_bitmap(1)))); """ + + qt_sql_bitmap_base64_9 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(id))) s from test_bitmap_base64 order by s; """ + + sql """ set experimental_enable_nereids_planner=true; """ + sql """ DROP TABLE IF EXISTS test_bitmap_base64_not_null """ + sql """ + CREATE TABLE test_bitmap_base64_not_null ( + dt INT(11) NULL, + id bitmap BITMAP_UNION NOT NULL + ) ENGINE=OLAP + AGGREGATE KEY(dt) + DISTRIBUTED BY HASH(dt) BUCKETS 2 + properties ( + "replication_num" = "1" + ); + """ + sql """ + INSERT INTO + test_bitmap_base64_not_null + VALUES + (1, bitmap_from_string("0")), + (2, bitmap_from_string("1,9999999")), + (3, bitmap_from_string("0, 1, 2, 3, 4294967296")), + (4, bitmap_from_string("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32")), + (5, bitmap_from_string("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,4294967296")) + ; + """ + qt_sql_bitmap_base64_not_null0 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(id))) s from test_bitmap_base64_not_null order by s; """ + sql """ set experimental_enable_nereids_planner=false; """ + qt_sql_bitmap_base64_not_null1 """ select bitmap_to_string(bitmap_from_base64(bitmap_to_base64(id))) s from test_bitmap_base64_not_null order by s; """ } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org