This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new abf753c1ca7 [enhancement]Optimize GeoFunctions for const columns (#34396) abf753c1ca7 is described below commit abf753c1ca7e654a63fb12f7bc9d148fc52b726a Author: koarz <66543806+ko...@users.noreply.github.com> AuthorDate: Thu May 16 14:37:47 2024 +0800 [enhancement]Optimize GeoFunctions for const columns (#34396) --- be/src/vec/functions/functions_geo.cpp | 236 ++++++++++++++++++++++------- be/test/vec/function/function_geo_test.cpp | 151 ++++++++++++------ 2 files changed, 280 insertions(+), 107 deletions(-) diff --git a/be/src/vec/functions/functions_geo.cpp b/be/src/vec/functions/functions_geo.cpp index ac6969c582d..172f000928f 100644 --- a/be/src/vec/functions/functions_geo.cpp +++ b/be/src/vec/functions/functions_geo.cpp @@ -44,33 +44,70 @@ struct StPoint { DCHECK_EQ(arguments.size(), 2); auto return_type = block.get_data_type(result); - auto column_x = - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - auto column_y = - block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + const auto& [left_column, left_const] = + unpack_if_const(block.get_by_position(arguments[0]).column); + const auto& [right_column, right_const] = + unpack_if_const(block.get_by_position(arguments[1]).column); - const auto size = column_x->size(); + const auto size = std::max(left_column->size(), right_column->size()); MutableColumnPtr res = return_type->create_column(); GeoPoint point; std::string buf; - for (int row = 0; row < size; ++row) { - auto cur_res = point.from_coord(column_x->operator[](row).get<Float64>(), - column_y->operator[](row).get<Float64>()); - if (cur_res != GEO_PARSE_OK) { - res->insert_data(nullptr, 0); - continue; - } - - buf.clear(); - point.encode_to(&buf); - res->insert_data(buf.data(), buf.size()); + if (left_const) { + const_vector(left_column, right_column, res, size, point, buf); + } else if (right_const) { + vector_const(left_column, right_column, res, size, point, buf); + } else { + vector_vector(left_column, right_column, res, size, point, buf); } block.replace_by_position(result, std::move(res)); return Status::OK(); } + + static void loop_do(GeoParseStatus& cur_res, MutableColumnPtr& res, GeoPoint& point, + std::string& buf) { + if (cur_res != GEO_PARSE_OK) { + res->insert_data(nullptr, 0); + return; + } + + buf.clear(); + point.encode_to(&buf); + res->insert_data(buf.data(), buf.size()); + } + + static void const_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size, GeoPoint& point, + std::string& buf) { + double x = left_column->operator[](0).get<Float64>(); + for (int row = 0; row < size; ++row) { + auto cur_res = point.from_coord(x, right_column->operator[](row).get<Float64>()); + loop_do(cur_res, res, point, buf); + } + } + + static void vector_const(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size, GeoPoint& point, + std::string& buf) { + double y = right_column->operator[](0).get<Float64>(); + for (int row = 0; row < size; ++row) { + auto cur_res = point.from_coord(right_column->operator[](row).get<Float64>(), y); + loop_do(cur_res, res, point, buf); + } + } + + static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size, GeoPoint& point, + std::string& buf) { + for (int row = 0; row < size; ++row) { + auto cur_res = point.from_coord(left_column->operator[](row).get<Float64>(), + right_column->operator[](row).get<Float64>()); + loop_do(cur_res, res, point, buf); + } + } }; struct StAsTextName { @@ -304,37 +341,78 @@ struct StAzimuth { auto return_type = block.get_data_type(result); MutableColumnPtr res = return_type->create_column(); - auto p1 = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - auto p2 = block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); - const auto size = p1->size(); + const auto& [left_column, left_const] = + unpack_if_const(block.get_by_position(arguments[0]).column); + const auto& [right_column, right_const] = + unpack_if_const(block.get_by_position(arguments[1]).column); + + const auto size = std::max(left_column->size(), right_column->size()); GeoPoint point1; GeoPoint point2; + if (left_const) { + const_vector(left_column, right_column, res, size, point1, point2); + } else if (right_const) { + vector_const(left_column, right_column, res, size, point1, point2); + } else { + vector_vector(left_column, right_column, res, size, point1, point2); + } + block.replace_by_position(result, std::move(res)); + return Status::OK(); + } + + static void loop_do(bool& pt1, bool& pt2, GeoPoint& point1, GeoPoint& point2, + MutableColumnPtr& res) { + if (!(pt1 && pt2)) { + res->insert_default(); + return; + } + + double angle = 0; + if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) { + res->insert_default(); + return; + } + res->insert_data(const_cast<const char*>((char*)&angle), 0); + } + static void const_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, size_t size, GeoPoint& point1, + GeoPoint& point2) { + auto shape_value1 = left_column->get_data_at(0); + auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size); for (int row = 0; row < size; ++row) { - auto shape_value1 = p1->get_data_at(row); + auto shape_value2 = right_column->get_data_at(row); + auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size); + + loop_do(pt1, pt2, point1, point2, res); + } + } + + static void vector_const(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, size_t size, GeoPoint& point1, + GeoPoint& point2) { + auto shape_value2 = right_column->get_data_at(0); + auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size); + for (int row = 0; row < size; ++row) { + auto shape_value1 = left_column->get_data_at(row); auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size); - if (!pt1) { - res->insert_default(); - continue; - } - auto shape_value2 = p2->get_data_at(row); + loop_do(pt1, pt2, point1, point2, res); + } + } + + static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, size_t size, GeoPoint& point1, + GeoPoint& point2) { + for (int row = 0; row < size; ++row) { + auto shape_value1 = left_column->get_data_at(row); + auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size); + auto shape_value2 = right_column->get_data_at(row); auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size); - if (!pt2) { - res->insert_default(); - continue; - } - double angle = 0; - if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) { - res->insert_default(); - continue; - } - res->insert_data(const_cast<const char*>((char*)&angle), 0); + loop_do(pt1, pt2, point1, point2, res); } - block.replace_by_position(result, std::move(res)); - return Status::OK(); } }; @@ -463,34 +541,76 @@ struct StContains { size_t result) { DCHECK_EQ(arguments.size(), 2); auto return_type = block.get_data_type(result); - auto shape1 = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); - auto shape2 = block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + const auto& [left_column, left_const] = + unpack_if_const(block.get_by_position(arguments[0]).column); + const auto& [right_column, right_const] = + unpack_if_const(block.get_by_position(arguments[1]).column); + + const auto size = std::max(left_column->size(), right_column->size()); - const auto size = shape1->size(); MutableColumnPtr res = return_type->create_column(); + if (left_const) { + const_vector(left_column, right_column, res, size); + } else if (right_const) { + vector_const(left_column, right_column, res, size); + } else { + vector_vector(left_column, right_column, res, size); + } + block.replace_by_position(result, std::move(res)); + return Status::OK(); + } + + static void loop_do(StringRef& lhs_value, StringRef& rhs_value, + std::vector<std::shared_ptr<GeoShape>>& shapes, int& i, + MutableColumnPtr& res) { + StringRef* strs[2] = {&lhs_value, &rhs_value}; + for (i = 0; i < 2; ++i) { + shapes[i] = + std::shared_ptr<GeoShape>(GeoShape::from_encoded(strs[i]->data, strs[i]->size)); + if (shapes[i] == nullptr) { + res->insert_default(); + break; + } + } + + if (i == 2) { + auto contains_value = shapes[0]->contains(shapes[1].get()); + res->insert_data(const_cast<const char*>((char*)&contains_value), 0); + } + } + + static void const_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size) { int i; + auto lhs_value = left_column->get_data_at(0); std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr}; for (int row = 0; row < size; ++row) { - auto lhs_value = shape1->get_data_at(row); - auto rhs_value = shape2->get_data_at(row); - StringRef* strs[2] = {&lhs_value, &rhs_value}; - for (i = 0; i < 2; ++i) { - shapes[i] = std::shared_ptr<GeoShape>( - GeoShape::from_encoded(strs[i]->data, strs[i]->size)); - if (shapes[i] == nullptr) { - res->insert_default(); - break; - } - } + auto rhs_value = right_column->get_data_at(row); + loop_do(lhs_value, rhs_value, shapes, i, res); + } + } - if (i == 2) { - auto contains_value = shapes[0]->contains(shapes[1].get()); - res->insert_data(const_cast<const char*>((char*)&contains_value), 0); - } + static void vector_const(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size) { + int i; + auto rhs_value = right_column->get_data_at(0); + std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr}; + for (int row = 0; row < size; ++row) { + auto lhs_value = left_column->get_data_at(row); + loop_do(lhs_value, rhs_value, shapes, i, res); + } + } + + static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& right_column, + MutableColumnPtr& res, const size_t size) { + int i; + std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr}; + for (int row = 0; row < size; ++row) { + auto lhs_value = left_column->get_data_at(row); + auto rhs_value = right_column->get_data_at(row); + loop_do(lhs_value, rhs_value, shapes, i, res); } - block.replace_by_position(result, std::move(res)); - return Status::OK(); } static Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) { @@ -638,7 +758,7 @@ struct StAsBinary { auto return_type = block.get_data_type(result); MutableColumnPtr res = return_type->create_column(); - auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + auto col = block.get_by_position(arguments[0]).column; const auto size = col->size(); std::unique_ptr<GeoShape> shape; diff --git a/be/test/vec/function/function_geo_test.cpp b/be/test/vec/function/function_geo_test.cpp index c4b699e8816..0860eae773b 100644 --- a/be/test/vec/function/function_geo_test.cpp +++ b/be/test/vec/function/function_geo_test.cpp @@ -40,20 +40,38 @@ using namespace ut_type; TEST(VGeoFunctionsTest, function_geo_st_point_test) { std::string func_name = "st_point"; + + GeoPoint point; + auto cur_res = point.from_coord(24.7, 56.7); + EXPECT_TRUE(cur_res == GEO_PARSE_OK); + std::string buf; + point.encode_to(&buf); + + DataSet data_set = {{{(double)24.7, (double)56.7}, buf}, + {{Null(), (double)5}, Null()}, + {{(double)5, Null()}, Null()}}; { InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Float64}; - GeoPoint point; - auto cur_res = point.from_coord(24.7, 56.7); - EXPECT_TRUE(cur_res == GEO_PARSE_OK); - std::string buf; - point.encode_to(&buf); + static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {Consted {TypeIndex::Float64}, TypeIndex::Float64}; - DataSet data_set = {{{(double)24.7, (double)56.7}, buf}, - {{Null(), (double)5}, Null()}, - {{(double)5, Null()}, Null()}}; + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast<void>( + check_function<DataTypeString, true>(func_name, input_types, const_dataset)); + } + } + { + InputTypeSet input_types = {TypeIndex::Float64, Consted {TypeIndex::Float64}}; - static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set)); + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast<void>( + check_function<DataTypeString, true>(func_name, input_types, const_dataset)); + } } } @@ -199,61 +217,96 @@ TEST(VGeoFunctionsTest, function_geo_st_angle) { TEST(VGeoFunctionsTest, function_geo_st_azimuth) { std::string func_name = "st_azimuth"; + GeoPoint point1; + auto cur_res1 = point1.from_coord(0, 0); + EXPECT_TRUE(cur_res1 == GEO_PARSE_OK); + GeoPoint point2; + auto cur_res2 = point2.from_coord(1, 0); + EXPECT_TRUE(cur_res2 == GEO_PARSE_OK); + + std::string buf1; + point1.encode_to(&buf1); + std::string buf2; + point2.encode_to(&buf2); + + DataSet data_set = {{{buf1, buf2}, (double)1.5707963267948966}, + {{buf1, Null()}, Null()}, + {{Null(), buf2}, Null()}}; { InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; - GeoPoint point1; - auto cur_res1 = point1.from_coord(0, 0); - EXPECT_TRUE(cur_res1 == GEO_PARSE_OK); - GeoPoint point2; - auto cur_res2 = point2.from_coord(1, 0); - EXPECT_TRUE(cur_res2 == GEO_PARSE_OK); - - std::string buf1; - point1.encode_to(&buf1); - std::string buf2; - point2.encode_to(&buf2); + static_cast<void>(check_function<DataTypeFloat64, true>(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}}; - DataSet data_set = {{{buf1, buf2}, (double)1.5707963267948966}, - {{buf1, Null()}, Null()}, - {{Null(), buf2}, Null()}}; + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast<void>( + check_function<DataTypeFloat64, true>(func_name, input_types, const_dataset)); + } + } + { + InputTypeSet input_types = {Consted {TypeIndex::String}, TypeIndex::String}; - static_cast<void>(check_function<DataTypeFloat64, true>(func_name, input_types, data_set)); + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast<void>( + check_function<DataTypeFloat64, true>(func_name, input_types, const_dataset)); + } } } TEST(VGeoFunctionsTest, function_geo_st_contains) { std::string func_name = "st_contains"; + + std::string buf1; + std::string buf2; + std::string buf3; + GeoParseStatus status; + + std::string shape1 = std::string("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"); + std::unique_ptr<GeoShape> shape(GeoShape::from_wkt(shape1.data(), shape1.size(), &status)); + EXPECT_TRUE(status == GEO_PARSE_OK); + EXPECT_TRUE(shape != nullptr); + shape->encode_to(&buf1); + + GeoPoint point1; + status = point1.from_coord(5, 5); + EXPECT_TRUE(status == GEO_PARSE_OK); + point1.encode_to(&buf2); + + GeoPoint point2; + status = point2.from_coord(50, 50); + EXPECT_TRUE(status == GEO_PARSE_OK); + point2.encode_to(&buf3); + + DataSet data_set = {{{buf1, buf2}, (uint8_t)1}, + {{buf1, buf3}, (uint8_t)0}, + {{buf1, Null()}, Null()}, + {{Null(), buf3}, Null()}}; { InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; - std::string buf1; - std::string buf2; - std::string buf3; - GeoParseStatus status; - - std::string shape1 = std::string("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"); - std::unique_ptr<GeoShape> shape(GeoShape::from_wkt(shape1.data(), shape1.size(), &status)); - EXPECT_TRUE(status == GEO_PARSE_OK); - EXPECT_TRUE(shape != nullptr); - shape->encode_to(&buf1); - - GeoPoint point1; - status = point1.from_coord(5, 5); - EXPECT_TRUE(status == GEO_PARSE_OK); - point1.encode_to(&buf2); - - GeoPoint point2; - status = point2.from_coord(50, 50); - EXPECT_TRUE(status == GEO_PARSE_OK); - point2.encode_to(&buf3); + static_cast<void>(check_function<DataTypeUInt8, true>(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {Consted {TypeIndex::String}, TypeIndex::String}; - DataSet data_set = {{{buf1, buf2}, (uint8_t)1}, - {{buf1, buf3}, (uint8_t)0}, - {{buf1, Null()}, Null()}, - {{Null(), buf3}, Null()}}; + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast<void>( + check_function<DataTypeUInt8, true>(func_name, input_types, const_dataset)); + } + } + { + InputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}}; - static_cast<void>(check_function<DataTypeUInt8, true>(func_name, input_types, data_set)); + for (const auto& line : data_set) { + DataSet const_dataset = {line}; + static_cast<void>( + check_function<DataTypeUInt8, true>(func_name, input_types, const_dataset)); + } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org