This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new abf753c1ca7 [enhancement]Optimize GeoFunctions for const columns 
(#34396)
abf753c1ca7 is described below

commit abf753c1ca7e654a63fb12f7bc9d148fc52b726a
Author: koarz <66543806+ko...@users.noreply.github.com>
AuthorDate: Thu May 16 14:37:47 2024 +0800

    [enhancement]Optimize GeoFunctions for const columns (#34396)
---
 be/src/vec/functions/functions_geo.cpp     | 236 ++++++++++++++++++++++-------
 be/test/vec/function/function_geo_test.cpp | 151 ++++++++++++------
 2 files changed, 280 insertions(+), 107 deletions(-)

diff --git a/be/src/vec/functions/functions_geo.cpp 
b/be/src/vec/functions/functions_geo.cpp
index ac6969c582d..172f000928f 100644
--- a/be/src/vec/functions/functions_geo.cpp
+++ b/be/src/vec/functions/functions_geo.cpp
@@ -44,33 +44,70 @@ struct StPoint {
         DCHECK_EQ(arguments.size(), 2);
         auto return_type = block.get_data_type(result);
 
-        auto column_x =
-                
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
-        auto column_y =
-                
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+        const auto& [left_column, left_const] =
+                unpack_if_const(block.get_by_position(arguments[0]).column);
+        const auto& [right_column, right_const] =
+                unpack_if_const(block.get_by_position(arguments[1]).column);
 
-        const auto size = column_x->size();
+        const auto size = std::max(left_column->size(), right_column->size());
 
         MutableColumnPtr res = return_type->create_column();
 
         GeoPoint point;
         std::string buf;
-        for (int row = 0; row < size; ++row) {
-            auto cur_res = 
point.from_coord(column_x->operator[](row).get<Float64>(),
-                                            
column_y->operator[](row).get<Float64>());
-            if (cur_res != GEO_PARSE_OK) {
-                res->insert_data(nullptr, 0);
-                continue;
-            }
-
-            buf.clear();
-            point.encode_to(&buf);
-            res->insert_data(buf.data(), buf.size());
+        if (left_const) {
+            const_vector(left_column, right_column, res, size, point, buf);
+        } else if (right_const) {
+            vector_const(left_column, right_column, res, size, point, buf);
+        } else {
+            vector_vector(left_column, right_column, res, size, point, buf);
         }
 
         block.replace_by_position(result, std::move(res));
         return Status::OK();
     }
+
+    static void loop_do(GeoParseStatus& cur_res, MutableColumnPtr& res, 
GeoPoint& point,
+                        std::string& buf) {
+        if (cur_res != GEO_PARSE_OK) {
+            res->insert_data(nullptr, 0);
+            return;
+        }
+
+        buf.clear();
+        point.encode_to(&buf);
+        res->insert_data(buf.data(), buf.size());
+    }
+
+    static void const_vector(const ColumnPtr& left_column, const ColumnPtr& 
right_column,
+                             MutableColumnPtr& res, const size_t size, 
GeoPoint& point,
+                             std::string& buf) {
+        double x = left_column->operator[](0).get<Float64>();
+        for (int row = 0; row < size; ++row) {
+            auto cur_res = point.from_coord(x, 
right_column->operator[](row).get<Float64>());
+            loop_do(cur_res, res, point, buf);
+        }
+    }
+
+    static void vector_const(const ColumnPtr& left_column, const ColumnPtr& 
right_column,
+                             MutableColumnPtr& res, const size_t size, 
GeoPoint& point,
+                             std::string& buf) {
+        double y = right_column->operator[](0).get<Float64>();
+        for (int row = 0; row < size; ++row) {
+            auto cur_res = 
point.from_coord(right_column->operator[](row).get<Float64>(), y);
+            loop_do(cur_res, res, point, buf);
+        }
+    }
+
+    static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& 
right_column,
+                              MutableColumnPtr& res, const size_t size, 
GeoPoint& point,
+                              std::string& buf) {
+        for (int row = 0; row < size; ++row) {
+            auto cur_res = 
point.from_coord(left_column->operator[](row).get<Float64>(),
+                                            
right_column->operator[](row).get<Float64>());
+            loop_do(cur_res, res, point, buf);
+        }
+    }
 };
 
 struct StAsTextName {
@@ -304,37 +341,78 @@ struct StAzimuth {
         auto return_type = block.get_data_type(result);
         MutableColumnPtr res = return_type->create_column();
 
-        auto p1 = 
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
-        auto p2 = 
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
-        const auto size = p1->size();
+        const auto& [left_column, left_const] =
+                unpack_if_const(block.get_by_position(arguments[0]).column);
+        const auto& [right_column, right_const] =
+                unpack_if_const(block.get_by_position(arguments[1]).column);
+
+        const auto size = std::max(left_column->size(), right_column->size());
 
         GeoPoint point1;
         GeoPoint point2;
+        if (left_const) {
+            const_vector(left_column, right_column, res, size, point1, point2);
+        } else if (right_const) {
+            vector_const(left_column, right_column, res, size, point1, point2);
+        } else {
+            vector_vector(left_column, right_column, res, size, point1, 
point2);
+        }
+        block.replace_by_position(result, std::move(res));
+        return Status::OK();
+    }
+
+    static void loop_do(bool& pt1, bool& pt2, GeoPoint& point1, GeoPoint& 
point2,
+                        MutableColumnPtr& res) {
+        if (!(pt1 && pt2)) {
+            res->insert_default();
+            return;
+        }
+
+        double angle = 0;
+        if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) {
+            res->insert_default();
+            return;
+        }
+        res->insert_data(const_cast<const char*>((char*)&angle), 0);
+    }
 
+    static void const_vector(const ColumnPtr& left_column, const ColumnPtr& 
right_column,
+                             MutableColumnPtr& res, size_t size, GeoPoint& 
point1,
+                             GeoPoint& point2) {
+        auto shape_value1 = left_column->get_data_at(0);
+        auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size);
         for (int row = 0; row < size; ++row) {
-            auto shape_value1 = p1->get_data_at(row);
+            auto shape_value2 = right_column->get_data_at(row);
+            auto pt2 = point2.decode_from(shape_value2.data, 
shape_value2.size);
+
+            loop_do(pt1, pt2, point1, point2, res);
+        }
+    }
+
+    static void vector_const(const ColumnPtr& left_column, const ColumnPtr& 
right_column,
+                             MutableColumnPtr& res, size_t size, GeoPoint& 
point1,
+                             GeoPoint& point2) {
+        auto shape_value2 = right_column->get_data_at(0);
+        auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size);
+        for (int row = 0; row < size; ++row) {
+            auto shape_value1 = left_column->get_data_at(row);
             auto pt1 = point1.decode_from(shape_value1.data, 
shape_value1.size);
-            if (!pt1) {
-                res->insert_default();
-                continue;
-            }
 
-            auto shape_value2 = p2->get_data_at(row);
+            loop_do(pt1, pt2, point1, point2, res);
+        }
+    }
+
+    static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& 
right_column,
+                              MutableColumnPtr& res, size_t size, GeoPoint& 
point1,
+                              GeoPoint& point2) {
+        for (int row = 0; row < size; ++row) {
+            auto shape_value1 = left_column->get_data_at(row);
+            auto pt1 = point1.decode_from(shape_value1.data, 
shape_value1.size);
+            auto shape_value2 = right_column->get_data_at(row);
             auto pt2 = point2.decode_from(shape_value2.data, 
shape_value2.size);
-            if (!pt2) {
-                res->insert_default();
-                continue;
-            }
 
-            double angle = 0;
-            if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) {
-                res->insert_default();
-                continue;
-            }
-            res->insert_data(const_cast<const char*>((char*)&angle), 0);
+            loop_do(pt1, pt2, point1, point2, res);
         }
-        block.replace_by_position(result, std::move(res));
-        return Status::OK();
     }
 };
 
@@ -463,34 +541,76 @@ struct StContains {
                           size_t result) {
         DCHECK_EQ(arguments.size(), 2);
         auto return_type = block.get_data_type(result);
-        auto shape1 = 
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
-        auto shape2 = 
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+        const auto& [left_column, left_const] =
+                unpack_if_const(block.get_by_position(arguments[0]).column);
+        const auto& [right_column, right_const] =
+                unpack_if_const(block.get_by_position(arguments[1]).column);
+
+        const auto size = std::max(left_column->size(), right_column->size());
 
-        const auto size = shape1->size();
         MutableColumnPtr res = return_type->create_column();
 
+        if (left_const) {
+            const_vector(left_column, right_column, res, size);
+        } else if (right_const) {
+            vector_const(left_column, right_column, res, size);
+        } else {
+            vector_vector(left_column, right_column, res, size);
+        }
+        block.replace_by_position(result, std::move(res));
+        return Status::OK();
+    }
+
+    static void loop_do(StringRef& lhs_value, StringRef& rhs_value,
+                        std::vector<std::shared_ptr<GeoShape>>& shapes, int& i,
+                        MutableColumnPtr& res) {
+        StringRef* strs[2] = {&lhs_value, &rhs_value};
+        for (i = 0; i < 2; ++i) {
+            shapes[i] =
+                    
std::shared_ptr<GeoShape>(GeoShape::from_encoded(strs[i]->data, strs[i]->size));
+            if (shapes[i] == nullptr) {
+                res->insert_default();
+                break;
+            }
+        }
+
+        if (i == 2) {
+            auto contains_value = shapes[0]->contains(shapes[1].get());
+            res->insert_data(const_cast<const char*>((char*)&contains_value), 
0);
+        }
+    }
+
+    static void const_vector(const ColumnPtr& left_column, const ColumnPtr& 
right_column,
+                             MutableColumnPtr& res, const size_t size) {
         int i;
+        auto lhs_value = left_column->get_data_at(0);
         std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr};
         for (int row = 0; row < size; ++row) {
-            auto lhs_value = shape1->get_data_at(row);
-            auto rhs_value = shape2->get_data_at(row);
-            StringRef* strs[2] = {&lhs_value, &rhs_value};
-            for (i = 0; i < 2; ++i) {
-                shapes[i] = std::shared_ptr<GeoShape>(
-                        GeoShape::from_encoded(strs[i]->data, strs[i]->size));
-                if (shapes[i] == nullptr) {
-                    res->insert_default();
-                    break;
-                }
-            }
+            auto rhs_value = right_column->get_data_at(row);
+            loop_do(lhs_value, rhs_value, shapes, i, res);
+        }
+    }
 
-            if (i == 2) {
-                auto contains_value = shapes[0]->contains(shapes[1].get());
-                res->insert_data(const_cast<const 
char*>((char*)&contains_value), 0);
-            }
+    static void vector_const(const ColumnPtr& left_column, const ColumnPtr& 
right_column,
+                             MutableColumnPtr& res, const size_t size) {
+        int i;
+        auto rhs_value = right_column->get_data_at(0);
+        std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr};
+        for (int row = 0; row < size; ++row) {
+            auto lhs_value = left_column->get_data_at(row);
+            loop_do(lhs_value, rhs_value, shapes, i, res);
+        }
+    }
+
+    static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& 
right_column,
+                              MutableColumnPtr& res, const size_t size) {
+        int i;
+        std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr};
+        for (int row = 0; row < size; ++row) {
+            auto lhs_value = left_column->get_data_at(row);
+            auto rhs_value = right_column->get_data_at(row);
+            loop_do(lhs_value, rhs_value, shapes, i, res);
         }
-        block.replace_by_position(result, std::move(res));
-        return Status::OK();
     }
 
     static Status open(FunctionContext* context, 
FunctionContext::FunctionStateScope scope) {
@@ -638,7 +758,7 @@ struct StAsBinary {
         auto return_type = block.get_data_type(result);
         MutableColumnPtr res = return_type->create_column();
 
-        auto col = 
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        auto col = block.get_by_position(arguments[0]).column;
         const auto size = col->size();
 
         std::unique_ptr<GeoShape> shape;
diff --git a/be/test/vec/function/function_geo_test.cpp 
b/be/test/vec/function/function_geo_test.cpp
index c4b699e8816..0860eae773b 100644
--- a/be/test/vec/function/function_geo_test.cpp
+++ b/be/test/vec/function/function_geo_test.cpp
@@ -40,20 +40,38 @@ using namespace ut_type;
 
 TEST(VGeoFunctionsTest, function_geo_st_point_test) {
     std::string func_name = "st_point";
+
+    GeoPoint point;
+    auto cur_res = point.from_coord(24.7, 56.7);
+    EXPECT_TRUE(cur_res == GEO_PARSE_OK);
+    std::string buf;
+    point.encode_to(&buf);
+
+    DataSet data_set = {{{(double)24.7, (double)56.7}, buf},
+                        {{Null(), (double)5}, Null()},
+                        {{(double)5, Null()}, Null()}};
     {
         InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Float64};
 
-        GeoPoint point;
-        auto cur_res = point.from_coord(24.7, 56.7);
-        EXPECT_TRUE(cur_res == GEO_PARSE_OK);
-        std::string buf;
-        point.encode_to(&buf);
+        static_cast<void>(check_function<DataTypeString, true>(func_name, 
input_types, data_set));
+    }
+    {
+        InputTypeSet input_types = {Consted {TypeIndex::Float64}, 
TypeIndex::Float64};
 
-        DataSet data_set = {{{(double)24.7, (double)56.7}, buf},
-                            {{Null(), (double)5}, Null()},
-                            {{(double)5, Null()}, Null()}};
+        for (const auto& line : data_set) {
+            DataSet const_dataset = {line};
+            static_cast<void>(
+                    check_function<DataTypeString, true>(func_name, 
input_types, const_dataset));
+        }
+    }
+    {
+        InputTypeSet input_types = {TypeIndex::Float64, Consted 
{TypeIndex::Float64}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, 
input_types, data_set));
+        for (const auto& line : data_set) {
+            DataSet const_dataset = {line};
+            static_cast<void>(
+                    check_function<DataTypeString, true>(func_name, 
input_types, const_dataset));
+        }
     }
 }
 
@@ -199,61 +217,96 @@ TEST(VGeoFunctionsTest, function_geo_st_angle) {
 
 TEST(VGeoFunctionsTest, function_geo_st_azimuth) {
     std::string func_name = "st_azimuth";
+    GeoPoint point1;
+    auto cur_res1 = point1.from_coord(0, 0);
+    EXPECT_TRUE(cur_res1 == GEO_PARSE_OK);
+    GeoPoint point2;
+    auto cur_res2 = point2.from_coord(1, 0);
+    EXPECT_TRUE(cur_res2 == GEO_PARSE_OK);
+
+    std::string buf1;
+    point1.encode_to(&buf1);
+    std::string buf2;
+    point2.encode_to(&buf2);
+
+    DataSet data_set = {{{buf1, buf2}, (double)1.5707963267948966},
+                        {{buf1, Null()}, Null()},
+                        {{Null(), buf2}, Null()}};
     {
         InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
 
-        GeoPoint point1;
-        auto cur_res1 = point1.from_coord(0, 0);
-        EXPECT_TRUE(cur_res1 == GEO_PARSE_OK);
-        GeoPoint point2;
-        auto cur_res2 = point2.from_coord(1, 0);
-        EXPECT_TRUE(cur_res2 == GEO_PARSE_OK);
-
-        std::string buf1;
-        point1.encode_to(&buf1);
-        std::string buf2;
-        point2.encode_to(&buf2);
+        static_cast<void>(check_function<DataTypeFloat64, true>(func_name, 
input_types, data_set));
+    }
+    {
+        InputTypeSet input_types = {TypeIndex::String, Consted 
{TypeIndex::String}};
 
-        DataSet data_set = {{{buf1, buf2}, (double)1.5707963267948966},
-                            {{buf1, Null()}, Null()},
-                            {{Null(), buf2}, Null()}};
+        for (const auto& line : data_set) {
+            DataSet const_dataset = {line};
+            static_cast<void>(
+                    check_function<DataTypeFloat64, true>(func_name, 
input_types, const_dataset));
+        }
+    }
+    {
+        InputTypeSet input_types = {Consted {TypeIndex::String}, 
TypeIndex::String};
 
-        static_cast<void>(check_function<DataTypeFloat64, true>(func_name, 
input_types, data_set));
+        for (const auto& line : data_set) {
+            DataSet const_dataset = {line};
+            static_cast<void>(
+                    check_function<DataTypeFloat64, true>(func_name, 
input_types, const_dataset));
+        }
     }
 }
 
 TEST(VGeoFunctionsTest, function_geo_st_contains) {
     std::string func_name = "st_contains";
+
+    std::string buf1;
+    std::string buf2;
+    std::string buf3;
+    GeoParseStatus status;
+
+    std::string shape1 = std::string("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 
0))");
+    std::unique_ptr<GeoShape> shape(GeoShape::from_wkt(shape1.data(), 
shape1.size(), &status));
+    EXPECT_TRUE(status == GEO_PARSE_OK);
+    EXPECT_TRUE(shape != nullptr);
+    shape->encode_to(&buf1);
+
+    GeoPoint point1;
+    status = point1.from_coord(5, 5);
+    EXPECT_TRUE(status == GEO_PARSE_OK);
+    point1.encode_to(&buf2);
+
+    GeoPoint point2;
+    status = point2.from_coord(50, 50);
+    EXPECT_TRUE(status == GEO_PARSE_OK);
+    point2.encode_to(&buf3);
+
+    DataSet data_set = {{{buf1, buf2}, (uint8_t)1},
+                        {{buf1, buf3}, (uint8_t)0},
+                        {{buf1, Null()}, Null()},
+                        {{Null(), buf3}, Null()}};
     {
         InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
 
-        std::string buf1;
-        std::string buf2;
-        std::string buf3;
-        GeoParseStatus status;
-
-        std::string shape1 = std::string("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 
0))");
-        std::unique_ptr<GeoShape> shape(GeoShape::from_wkt(shape1.data(), 
shape1.size(), &status));
-        EXPECT_TRUE(status == GEO_PARSE_OK);
-        EXPECT_TRUE(shape != nullptr);
-        shape->encode_to(&buf1);
-
-        GeoPoint point1;
-        status = point1.from_coord(5, 5);
-        EXPECT_TRUE(status == GEO_PARSE_OK);
-        point1.encode_to(&buf2);
-
-        GeoPoint point2;
-        status = point2.from_coord(50, 50);
-        EXPECT_TRUE(status == GEO_PARSE_OK);
-        point2.encode_to(&buf3);
+        static_cast<void>(check_function<DataTypeUInt8, true>(func_name, 
input_types, data_set));
+    }
+    {
+        InputTypeSet input_types = {Consted {TypeIndex::String}, 
TypeIndex::String};
 
-        DataSet data_set = {{{buf1, buf2}, (uint8_t)1},
-                            {{buf1, buf3}, (uint8_t)0},
-                            {{buf1, Null()}, Null()},
-                            {{Null(), buf3}, Null()}};
+        for (const auto& line : data_set) {
+            DataSet const_dataset = {line};
+            static_cast<void>(
+                    check_function<DataTypeUInt8, true>(func_name, 
input_types, const_dataset));
+        }
+    }
+    {
+        InputTypeSet input_types = {TypeIndex::String, Consted 
{TypeIndex::String}};
 
-        static_cast<void>(check_function<DataTypeUInt8, true>(func_name, 
input_types, data_set));
+        for (const auto& line : data_set) {
+            DataSet const_dataset = {line};
+            static_cast<void>(
+                    check_function<DataTypeUInt8, true>(func_name, 
input_types, const_dataset));
+        }
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to