This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 3315c163833d0fc38d5218916bb80ab2cc3fabf4
Author: zclllyybb <[email protected]>
AuthorDate: Thu Feb 1 19:07:54 2024 +0800

    [enhance](function) refactor from_format_str and support more format 
(#30452)
---
 be/src/vec/functions/function.cpp                  |  11 +-
 be/src/vec/functions/function.h                    |  16 +-
 be/src/vec/functions/function_bitmap.cpp           |   2 +-
 be/src/vec/functions/nullif.cpp                    |   2 +-
 be/src/vec/runtime/vdatetime_value.cpp             | 278 +++++++++---------
 be/src/vec/runtime/vdatetime_value.h               |   2 +-
 be/test/vec/function/function_array_index_test.cpp |  14 +-
 be/test/vec/function/function_test_util.h          |  19 +-
 .../date-time-functions/date-format.md             |  10 +-
 .../date-time-functions/date-format.md             |   4 +-
 .../org/apache/doris/analysis/DateLiteral.java     | 311 +++++++++++----------
 .../data/correctness/test_str_to_date.out          |  18 +-
 .../data/datatype_p0/date/test_invalid_date.out    |   7 -
 .../suites/correctness/test_str_to_date.groovy     |  14 +-
 .../datatype_p0/date/test_invalid_date.groovy      |  37 ---
 15 files changed, 374 insertions(+), 371 deletions(-)

diff --git a/be/src/vec/functions/function.cpp 
b/be/src/vec/functions/function.cpp
index 8df03496c11..e0f785b0ef6 100644
--- a/be/src/vec/functions/function.cpp
+++ b/be/src/vec/functions/function.cpp
@@ -94,13 +94,13 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const 
Block& block, const Colum
     return ColumnNullable::create(src_not_nullable, result_null_map_column);
 }
 
-bool get_null_presence(const Block& block, const ColumnNumbers& args) {
+bool have_null_column(const Block& block, const ColumnNumbers& args) {
     return std::ranges::any_of(args, [&block](const auto& elem) {
         return block.get_by_position(elem).type->is_nullable();
     });
 }
 
-bool get_null_presence(const ColumnsWithTypeAndName& args) {
+bool have_null_column(const ColumnsWithTypeAndName& args) {
     return std::ranges::any_of(args, [](const auto& elem) { return 
elem.type->is_nullable(); });
 }
 
@@ -202,20 +202,21 @@ Status 
PreparedFunctionImpl::default_implementation_for_nulls(
         return Status::OK();
     }
 
-    if (get_null_presence(block, args)) {
+    if (have_null_column(block, args)) {
         bool need_to_default = need_replace_null_data_to_default();
         if (context) {
             need_to_default &= context->check_overflow_for_decimal();
         }
+        // extract nested column from nulls
         ColumnNumbers new_args;
         for (auto arg : args) {
             new_args.push_back(block.columns());
             
block.insert(block.get_by_position(arg).get_nested(need_to_default));
             
DCHECK(!block.get_by_position(new_args.back()).column->is_nullable());
         }
-
         RETURN_IF_ERROR(execute_without_low_cardinality_columns(context, 
block, new_args, result,
                                                                 block.rows(), 
dry_run));
+        // after run with nested, wrap them in null.
         block.get_by_position(result).column = wrap_in_nullable(
                 block.get_by_position(result).column, block, args, result, 
input_rows_count);
 
@@ -267,7 +268,7 @@ DataTypePtr 
FunctionBuilderImpl::get_return_type_without_low_cardinality(
     check_number_of_arguments(arguments.size());
 
     if (!arguments.empty() && use_default_implementation_for_nulls()) {
-        if (get_null_presence(arguments)) {
+        if (have_null_column(arguments)) {
             ColumnNumbers numbers(arguments.size());
             std::iota(numbers.begin(), numbers.end(), 0);
             auto [nested_block, _] =
diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h
index 73f49a237ae..df5d8d60942 100644
--- a/be/src/vec/functions/function.h
+++ b/be/src/vec/functions/function.h
@@ -65,8 +65,8 @@ concept HasGetVariadicArgumentTypesImpl = requires(T t) {
     { t.get_variadic_argument_types_impl() } -> std::same_as<DataTypes>;
 };
 
-bool get_null_presence(const Block& block, const ColumnNumbers& args);
-bool get_null_presence(const ColumnsWithTypeAndName& args);
+bool have_null_column(const Block& block, const ColumnNumbers& args);
+bool have_null_column(const ColumnsWithTypeAndName& args);
 
 /// The simplest executable object.
 /// Motivation:
@@ -288,9 +288,11 @@ public:
 
     bool is_variadic() const override { return false; }
 
-    /// Default implementation. Will check only in non-variadic case.
+    // Default implementation. Will check only in non-variadic case.
     void check_number_of_arguments(size_t number_of_arguments) const override;
-
+    // the return type should be same with what FE plans.
+    // it returns: `get_return_type_impl` if 
`use_default_implementation_for_nulls` = false
+    //  `get_return_type_impl` warpped in NULL if 
`use_default_implementation_for_nulls` = true and input has NULL
     DataTypePtr get_return_type(const ColumnsWithTypeAndName& arguments) const;
 
     DataTypes get_variadic_argument_types() const override {
@@ -300,7 +302,9 @@ public:
     ColumnNumbers get_arguments_that_are_always_constant() const override { 
return {}; }
 
 protected:
-    /// Get the result type by argument type. If the function does not apply 
to these arguments, throw an exception.
+    // Get the result type by argument type. If the function does not apply to 
these arguments, throw an exception.
+    // the get_return_type_impl and its overrides should only return the 
nested type if `use_default_implementation_for_nulls` is true.
+    // whether to wrap in nullable type will be automatically decided.
     virtual DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& 
arguments) const {
         DataTypes data_types(arguments.size());
         for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = 
arguments[i].type;
@@ -317,7 +321,7 @@ protected:
       *  if some of arguments are Nullable(Nothing) then don't call 
get_return_type(), call build_impl() with return_type = Nullable(Nothing),
       *  if some of arguments are Nullable, then:
       *   - Nullable types are substituted with nested types for 
get_return_type() function
-      *   - wrap get_return_type() result in Nullable type and pass to 
build_impl
+      *   - WRAP get_return_type() RESULT IN NULLABLE type and pass to 
build_impl
       *
       * Otherwise build returns build_impl(arguments, 
get_return_type(arguments));
       */
diff --git a/be/src/vec/functions/function_bitmap.cpp 
b/be/src/vec/functions/function_bitmap.cpp
index 88147ec6cd9..6886d53004b 100644
--- a/be/src/vec/functions/function_bitmap.cpp
+++ b/be/src/vec/functions/function_bitmap.cpp
@@ -700,7 +700,7 @@ Status execute_bitmap_op_count_null_to_zero(
         size_t input_rows_count,
         const std::function<Status(FunctionContext*, Block&, const 
ColumnNumbers&, size_t, size_t)>&
                 exec_impl_func) {
-    if (get_null_presence(block, arguments)) {
+    if (have_null_column(block, arguments)) {
         auto [temporary_block, new_args, new_result] =
                 create_block_with_nested_columns(block, arguments, result);
         RETURN_IF_ERROR(exec_impl_func(context, temporary_block, new_args, 
new_result,
diff --git a/be/src/vec/functions/nullif.cpp b/be/src/vec/functions/nullif.cpp
index 315ca52d1bc..928fb1c0767 100644
--- a/be/src/vec/functions/nullif.cpp
+++ b/be/src/vec/functions/nullif.cpp
@@ -75,7 +75,7 @@ public:
         }
 
         if (!arguments.empty()) {
-            if (get_null_presence(arguments)) {
+            if (have_null_column(arguments)) {
                 return 
make_nullable(std::make_shared<doris::vectorized::DataTypeUInt8>());
             }
         }
diff --git a/be/src/vec/runtime/vdatetime_value.cpp 
b/be/src/vec/runtime/vdatetime_value.cpp
index 58c6c25f9ba..334779f9390 100644
--- a/be/src/vec/runtime/vdatetime_value.cpp
+++ b/be/src/vec/runtime/vdatetime_value.cpp
@@ -19,20 +19,18 @@
 
 #include <cctz/civil_time.h>
 #include <cctz/time_zone.h>
-#include <ctype.h>
 #include <glog/logging.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
+
+#include <cctype>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
 // IWYU pragma: no_include <bits/chrono.h>
 #include <algorithm>
-#include <cctype>
 #include <chrono> // IWYU pragma: keep
 // IWYU pragma: no_include <bits/std_abs.h>
 #include <cmath>
-#include <cstring>
-#include <exception>
-#include <string>
+#include <cstdint>
 #include <string_view>
 
 #include "common/compiler_util.h"
@@ -393,10 +391,7 @@ bool VecDateTimeValue::from_time_int64(int64_t value) {
         return false;
     }
     _second = value % 100;
-    if (_second > TIME_MAX_SECOND) {
-        return false;
-    }
-    return true;
+    return _second <= TIME_MAX_SECOND;
 }
 
 char* VecDateTimeValue::append_date_buffer(char* to) const {
@@ -686,7 +681,7 @@ bool VecDateTimeValue::to_format_string(const char* format, 
int len, char* to) c
     }
     char buf[64];
     char* cursor = buf;
-    char* pos = NULL;
+    char* pos = nullptr;
     const char* ptr = format;
     const char* end = format + len;
     char ch = '\0';
@@ -1145,12 +1140,9 @@ static bool str_to_int64(const char* ptr, const char** 
endptr, int64_t* ret) {
     uint64_t value_3 = 0;
 
     // Check overflow.
-    if (value_1 > cutoff_1 ||
-        (value_1 == cutoff_1 &&
-         (value_2 > cutoff_2 || (value_2 == cutoff_2 && value_3 > cutoff_3)))) 
{
-        return false;
-    }
-    return true;
+    return value_1 <= cutoff_1 &&
+           (value_1 != cutoff_1 ||
+            (value_2 <= cutoff_2 && (value_2 != cutoff_2 || value_3 <= 
cutoff_3)));
 }
 
 static int min(int a, int b) {
@@ -1161,7 +1153,7 @@ static int find_in_lib(const char* lib[], const char* 
str, const char* end) {
     int pos = 0;
     int find_count = 0;
     int find_pos = 0;
-    for (; lib[pos] != NULL; ++pos) {
+    for (; lib[pos] != nullptr; ++pos) {
         const char* i = str;
         const char* j = lib[pos];
         while (i < end && *j) {
@@ -1199,26 +1191,41 @@ static int check_word(const char* lib[], const char* 
str, const char* end, const
 // change this method should also change that.
 bool VecDateTimeValue::from_date_format_str(const char* format, int 
format_len, const char* value,
                                             int value_len, const char** 
sub_val_end) {
+    if (value_len <= 0) [[unlikely]] {
+        return false;
+    }
     const char* ptr = format;
     const char* end = format + format_len;
     const char* val = value;
     const char* val_end = value + value_len;
 
-    bool date_part_used = false;
-    bool time_part_used = false;
-    bool frac_part_used = false;
-    bool already_set_time_part = false;
-
-    int day_part = 0;
+    bool already_set_time_part = false; // skip time part in the end's setting.
+
+    uint32_t part_used = 0;
+    constexpr int YEAR_PART = 1U << 0;
+    constexpr int MONTH_PART = 1U << 1;
+    constexpr int DAY_PART = 1U << 2;
+    constexpr int NORMAL_DATE_PART = YEAR_PART | MONTH_PART | DAY_PART;
+    constexpr int WEEKDAY_PART = 1U << 3;
+    constexpr int YEARDAY_PART = 1U << 4;
+    constexpr int WEEK_NUM_PART = 1U << 5;
+    constexpr int SPECIAL_DATE_PART = WEEKDAY_PART | YEARDAY_PART | 
WEEK_NUM_PART;
+    [[maybe_unused]] constexpr int DATE_PART = NORMAL_DATE_PART | 
SPECIAL_DATE_PART;
+    constexpr int HOUR_PART = 1U << 6;
+    constexpr int MINUTE_PART = 1U << 7;
+    constexpr int SECOND_PART = 1U << 8;
+    constexpr int TIME_PART = HOUR_PART | MINUTE_PART | SECOND_PART;
+
+    int half_day = 0; // 0 for am/none, 12 for pm.
     int weekday = -1;
     int yearday = -1;
-    int week_num = -1;
+    int week_num = -1; // week idx in one year
 
     bool strict_week_number = false;
     bool sunday_first = false;
     bool strict_week_number_year_type = false;
     int strict_week_number_year = -1;
-    bool usa_time = false;
+    bool hour_system_12 = false;
 
     auto [year, month, day, hour, minute, second] = std::tuple {0, 0, 0, 0, 0, 
0};
     while (ptr < end && val < val_end) {
@@ -1231,7 +1238,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
         }
         // Check switch
         if (*ptr == '%' && ptr + 1 < end) {
-            const char* tmp = NULL;
+            const char* tmp = nullptr;
             int64_t int_value = 0;
             ptr++;
             switch (*ptr++) {
@@ -1245,7 +1252,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 int_value += int_value >= 70 ? 1900 : 2000;
                 year = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= YEAR_PART;
                 break;
             case 'Y':
                 // Year, numeric, four digits
@@ -1258,7 +1265,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 year = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= YEAR_PART;
                 break;
                 // Month
             case 'm':
@@ -1269,7 +1276,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 month = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= MONTH_PART;
                 break;
             case 'M':
                 int_value = check_word(const_cast<const char**>(s_month_name), 
val, val_end, &val);
@@ -1277,6 +1284,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                     return false;
                 }
                 month = int_value;
+                part_used |= MONTH_PART;
                 break;
             case 'b':
                 int_value = check_word(s_ab_month_name, val, val_end, &val);
@@ -1284,6 +1292,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                     return false;
                 }
                 month = int_value;
+                part_used |= MONTH_PART;
                 break;
                 // Day
             case 'd':
@@ -1294,7 +1303,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 day = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= DAY_PART;
                 break;
             case 'D':
                 tmp = val + min(2, val_end - val);
@@ -1303,13 +1312,14 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 day = int_value;
                 val = tmp + min(2, val_end - tmp);
-                date_part_used = true;
+                part_used |= DAY_PART;
                 break;
                 // Hour
             case 'h':
             case 'I':
             case 'l':
-                usa_time = true;
+                hour_system_12 = true;
+                part_used |= HOUR_PART;
                 // Fall through
             case 'k':
             case 'H':
@@ -1319,7 +1329,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 hour = int_value;
                 val = tmp;
-                time_part_used = true;
+                part_used |= HOUR_PART;
                 break;
                 // Minute
             case 'i':
@@ -1329,7 +1339,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 minute = int_value;
                 val = tmp;
-                time_part_used = true;
+                part_used |= MINUTE_PART;
                 break;
                 // Second
             case 's':
@@ -1340,7 +1350,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 second = int_value;
                 val = tmp;
-                time_part_used = true;
+                part_used |= SECOND_PART;
                 break;
                 // Micro second
             case 'f':
@@ -1351,16 +1361,15 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 val = tmp;
                 break;
-                // AM/PM
+                // AM/PM, only meaningful for 12-hour system.
             case 'p':
-                if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || 
!usa_time) {
+                if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || 
!hour_system_12) {
                     return false;
                 }
                 if (toupper(*val) == 'P') {
                     // PM
-                    day_part = 12;
+                    half_day = 12;
                 }
-                time_part_used = true;
                 val += 2;
                 break;
                 // Weekday
@@ -1371,7 +1380,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 int_value++;
                 weekday = int_value;
-                date_part_used = true;
+                part_used |= WEEKDAY_PART;
                 break;
             case 'a':
                 int_value = check_word(s_ab_day_name, val, val_end, &val);
@@ -1380,7 +1389,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 int_value++;
                 weekday = int_value;
-                date_part_used = true;
+                part_used |= WEEKDAY_PART;
                 break;
             case 'w':
                 tmp = val + min(1, val_end - val);
@@ -1395,7 +1404,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 weekday = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= WEEKDAY_PART;
                 break;
             case 'j':
                 tmp = val + min(3, val_end - val);
@@ -1404,7 +1413,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 yearday = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= YEARDAY_PART;
                 break;
             case 'u':
             case 'v':
@@ -1422,7 +1431,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                     return false;
                 }
                 val = tmp;
-                date_part_used = true;
+                part_used |= WEEK_NUM_PART;
                 break;
                 // strict week number, must be used with %V or %v
             case 'x':
@@ -1434,7 +1443,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 }
                 strict_week_number_year = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= WEEK_NUM_PART;
                 break;
             case 'r': {
                 VecDateTimeValue tmp_val;
@@ -1445,7 +1454,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 this->_minute = tmp_val._minute;
                 this->_second = tmp_val._second;
                 val = tmp;
-                time_part_used = true;
+                part_used |= TIME_PART;
                 already_set_time_part = true;
                 break;
             }
@@ -1457,7 +1466,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
                 this->_hour = tmp_val._hour;
                 this->_minute = tmp_val._minute;
                 this->_second = tmp_val._second;
-                time_part_used = true;
+                part_used |= TIME_PART;
                 already_set_time_part = true;
                 val = tmp;
                 break;
@@ -1497,8 +1506,7 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
         }
     }
 
-    // continue to iterate pattern if has
-    // to find out if it has time part.
+    // for compatible with mysql, like something have %H:%i:%s format but no 
relative content...
     while (ptr < end) {
         if (*ptr == '%' && ptr + 1 < end) {
             ptr++;
@@ -1511,10 +1519,11 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
             case 'l':
             case 'r':
             case 's':
+            case 'f':
             case 'S':
             case 'p':
             case 'T':
-                time_part_used = true;
+                part_used |= TIME_PART;
                 break;
             default:
                 break;
@@ -1524,33 +1533,29 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
         }
     }
 
-    if (usa_time) {
+    if (!part_used) {
+        return false;
+    }
+
+    if (hour_system_12) {
         if (hour > 12 || hour < 1) {
             return false;
         }
-        hour = (hour % 12) + day_part;
+        hour = (hour % 12) + half_day;
     }
     if (sub_val_end) {
         *sub_val_end = val;
     }
 
     // Compute timestamp type
-    if (frac_part_used) {
-        if (date_part_used) {
+    if (part_used & DATE_PART) {
+        if (part_used & TIME_PART) {
             _type = TIME_DATETIME;
         } else {
-            _type = TIME_TIME;
+            _type = TIME_DATE;
         }
     } else {
-        if (date_part_used) {
-            if (time_part_used) {
-                _type = TIME_DATETIME;
-            } else {
-                _type = TIME_DATE;
-            }
-        } else {
-            _type = TIME_TIME;
-        }
+        _type = TIME_TIME;
     }
 
     _neg = false;
@@ -1592,12 +1597,13 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
     if (already_set_date_part && already_set_time_part) {
         return true;
     }
-    // for two special date cases, complete default month/day
-    if (!time_part_used && year > 0) {
-        if (std::string_view {format, end} == "%Y") {
-            month = day = 1;
-        } else if (std::string_view {format, end} == "%Y-%m") {
+    // complete default month/day
+    if (!(part_used & ~NORMAL_DATE_PART)) { // Ymd part only
+        if (!(part_used & DAY_PART)) {
             day = 1;
+            if (!(part_used & MONTH_PART)) {
+                month = 1;
+            }
         }
     }
 
@@ -1614,7 +1620,9 @@ bool VecDateTimeValue::from_date_format_str(const char* 
format, int format_len,
 template <TimeUnit unit, bool need_check>
 bool VecDateTimeValue::date_add_interval(const TimeInterval& interval) {
     if constexpr (need_check) {
-        if (!is_valid_date()) return false;
+        if (!is_valid_date()) {
+            return false;
+        }
     }
 
     int sign = interval.is_neg ? -1 : 1;
@@ -1771,7 +1779,7 @@ void VecDateTimeValue::from_unixtime(int64_t timestamp, 
const cctz::time_zone& c
 
 const char* VecDateTimeValue::month_name() const {
     if (_month < 1 || _month > 12) {
-        return NULL;
+        return nullptr;
     }
     return s_month_name[_month];
 }
@@ -1779,14 +1787,14 @@ const char* VecDateTimeValue::month_name() const {
 const char* VecDateTimeValue::day_name() const {
     int day = weekday();
     if (day < 0 || day >= 7) {
-        return NULL;
+        return nullptr;
     }
     return s_day_name[day];
 }
 
 VecDateTimeValue VecDateTimeValue::local_time() {
     VecDateTimeValue value;
-    value.from_unixtime(time(NULL), TimezoneUtils::default_time_zone);
+    value.from_unixtime(time(nullptr), TimezoneUtils::default_time_zone);
     return value;
 }
 
@@ -2170,17 +2178,33 @@ void DateV2Value<T>::set_zero() {
 template <typename T>
 bool DateV2Value<T>::from_date_format_str(const char* format, int format_len, 
const char* value,
                                           int value_len, const char** 
sub_val_end) {
+    if (value_len <= 0) [[unlikely]] {
+        return false;
+    }
     const char* ptr = format;
     const char* end = format + format_len;
     const char* val = value;
     const char* val_end = value + value_len;
 
-    bool date_part_used = false;
-    bool time_part_used = false;
-    bool frac_part_used = false;
-    bool already_set_time_part = false;
-
-    int day_part = 0;
+    bool already_set_time_part = false; // skip time part in the end's setting.
+
+    uint32_t part_used = 0;
+    constexpr int YEAR_PART = 1U << 0;
+    constexpr int MONTH_PART = 1U << 1;
+    constexpr int DAY_PART = 1U << 2;
+    constexpr int NORMAL_DATE_PART = YEAR_PART | MONTH_PART | DAY_PART;
+    constexpr int WEEKDAY_PART = 1U << 3;
+    constexpr int YEARDAY_PART = 1U << 4;
+    constexpr int WEEK_NUM_PART = 1U << 5;
+    constexpr int SPECIAL_DATE_PART = WEEKDAY_PART | YEARDAY_PART | 
WEEK_NUM_PART;
+    [[maybe_unused]] constexpr int DATE_PART = NORMAL_DATE_PART | 
SPECIAL_DATE_PART;
+    constexpr int HOUR_PART = 1U << 6;
+    constexpr int MINUTE_PART = 1U << 7;
+    constexpr int SECOND_PART = 1U << 8;
+    constexpr int FRAC_PART = 1U << 9;
+    constexpr int TIME_PART = HOUR_PART | MINUTE_PART | SECOND_PART | 
FRAC_PART;
+
+    int half_day = 0; // 0 for am/none, 12 for pm.
     int weekday = -1;
     int yearday = -1;
     int week_num = -1;
@@ -2189,7 +2213,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
     bool sunday_first = false;
     bool strict_week_number_year_type = false;
     int strict_week_number_year = -1;
-    bool usa_time = false;
+    bool hour_system_12 = false;
 
     auto [year, month, day, hour, minute, second, microsecond] = std::tuple 
{0, 0, 0, 0, 0, 0, 0};
     while (ptr < end && val < val_end) {
@@ -2202,7 +2226,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
         }
         // Check switch
         if (*ptr == '%' && ptr + 1 < end) {
-            const char* tmp = NULL;
+            const char* tmp = nullptr;
             int64_t int_value = 0;
             ptr++;
             switch (*ptr++) {
@@ -2216,7 +2240,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 int_value += int_value >= 70 ? 1900 : 2000;
                 year = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= YEAR_PART;
                 break;
             case 'Y':
                 // Year, numeric, four digits
@@ -2229,7 +2253,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 year = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= YEAR_PART;
                 break;
                 // Month
             case 'm':
@@ -2240,7 +2264,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 month = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= MONTH_PART;
                 break;
             case 'M':
                 int_value = check_word(const_cast<const char**>(s_month_name), 
val, val_end, &val);
@@ -2248,6 +2272,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                     return false;
                 }
                 month = int_value;
+                part_used |= MONTH_PART;
                 break;
             case 'b':
                 int_value = check_word(s_ab_month_name, val, val_end, &val);
@@ -2255,6 +2280,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                     return false;
                 }
                 month = int_value;
+                part_used |= MONTH_PART;
                 break;
                 // Day
             case 'd':
@@ -2265,7 +2291,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 day = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= DAY_PART;
                 break;
             case 'D':
                 tmp = val + min(2, val_end - val);
@@ -2274,13 +2300,14 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 day = int_value;
                 val = tmp + min(2, val_end - tmp);
-                date_part_used = true;
+                part_used |= DAY_PART;
                 break;
                 // Hour
             case 'h':
             case 'I':
             case 'l':
-                usa_time = true;
+                hour_system_12 = true;
+                part_used |= HOUR_PART;
                 // Fall through
             case 'k':
             case 'H':
@@ -2290,7 +2317,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 hour = int_value;
                 val = tmp;
-                time_part_used = true;
+                part_used |= HOUR_PART;
                 break;
                 // Minute
             case 'i':
@@ -2300,7 +2327,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 minute = int_value;
                 val = tmp;
-                time_part_used = true;
+                part_used |= MINUTE_PART;
                 break;
                 // Second
             case 's':
@@ -2311,7 +2338,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 second = int_value;
                 val = tmp;
-                time_part_used = true;
+                part_used |= SECOND_PART;
                 break;
             // Micro second
             case 'f':
@@ -2333,21 +2360,19 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 if constexpr (is_datetime) {
                     microsecond = int_value * int_exp10(6 - min(6, tmp - val));
-                    frac_part_used = true;
+                    part_used |= FRAC_PART;
                 }
                 val = tmp;
-                time_part_used = true;
                 break;
                 // AM/PM
             case 'p':
-                if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || 
!usa_time) {
+                if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || 
!hour_system_12) {
                     return false;
                 }
                 if (toupper(*val) == 'P') {
                     // PM
-                    day_part = 12;
+                    half_day = 12;
                 }
-                time_part_used = true;
                 val += 2;
                 break;
                 // Weekday
@@ -2358,7 +2383,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 int_value++;
                 weekday = int_value;
-                date_part_used = true;
+                part_used |= WEEKDAY_PART;
                 break;
             case 'a':
                 int_value = check_word(s_ab_day_name, val, val_end, &val);
@@ -2367,7 +2392,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 int_value++;
                 weekday = int_value;
-                date_part_used = true;
+                part_used |= WEEKDAY_PART;
                 break;
             case 'w':
                 tmp = val + min(1, val_end - val);
@@ -2382,7 +2407,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 weekday = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= WEEKDAY_PART;
                 break;
             case 'j':
                 tmp = val + min(3, val_end - val);
@@ -2391,7 +2416,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 yearday = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= YEARDAY_PART;
                 break;
             case 'u':
             case 'v':
@@ -2409,7 +2434,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                     return false;
                 }
                 val = tmp;
-                date_part_used = true;
+                part_used |= WEEK_NUM_PART;
                 break;
                 // strict week number, must be used with %V or %v
             case 'x':
@@ -2421,7 +2446,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                 }
                 strict_week_number_year = int_value;
                 val = tmp;
-                date_part_used = true;
+                part_used |= WEEK_NUM_PART;
                 break;
             case 'r': {
                 if constexpr (is_datetime) {
@@ -2434,7 +2459,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                     this->date_v2_value_.minute_ = tmp_val.minute();
                     this->date_v2_value_.second_ = tmp_val.second();
                     val = tmp;
-                    time_part_used = true;
+                    part_used |= TIME_PART;
                     already_set_time_part = true;
                     break;
                 } else {
@@ -2450,7 +2475,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                     this->date_v2_value_.hour_ = tmp_val.hour();
                     this->date_v2_value_.minute_ = tmp_val.minute();
                     this->date_v2_value_.second_ = tmp_val.second();
-                    time_part_used = true;
+                    part_used |= TIME_PART;
                     already_set_time_part = true;
                     val = tmp;
                     break;
@@ -2493,12 +2518,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
         }
     }
 
-    // ptr == format means input value string is "", not do parse format 
failed here
-    if (ptr == format) {
-        return false;
-    }
-    // continue to iterate pattern if has
-    // to find out if it has time part.
+    // for compatible with mysql, like something have %H:%i:%s format but no 
relative content...
     while (ptr < end) {
         if (*ptr == '%' && ptr + 1 < end) {
             ptr++;
@@ -2515,7 +2535,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
             case 'S':
             case 'p':
             case 'T':
-                time_part_used = true;
+                part_used |= TIME_PART;
                 break;
             default:
                 break;
@@ -2525,25 +2545,27 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
         }
     }
 
-    if (usa_time) {
+    if (!part_used) {
+        return false;
+    }
+
+    if (hour_system_12) {
         if (hour > 12 || hour < 1) {
             return false;
         }
-        hour = (hour % 12) + day_part;
+        hour = (hour % 12) + half_day;
     }
     if (sub_val_end) {
         *sub_val_end = val;
     }
 
     // Compute timestamp type
-    if (frac_part_used) {
+    if (part_used & FRAC_PART) {
         if constexpr (!is_datetime) {
-            LOG(WARNING) << "Microsecond is not allowed for date type!";
             return false;
         }
-    } else if (time_part_used) {
+    } else if (part_used & TIME_PART) {
         if constexpr (!is_datetime) {
-            LOG(WARNING) << "Time part is not allowed for date type!";
             return false;
         }
     }
@@ -2582,7 +2604,9 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
     //    so we only need to set date part
     // 3. if both are true, means all part of date_time be set, no need 
check_range_and_set_time
     bool already_set_date_part = yearday > 0 || (week_num >= 0 && weekday > 0);
-    if (already_set_date_part && already_set_time_part) return true;
+    if (already_set_date_part && already_set_time_part) {
+        return true;
+    }
     if (already_set_date_part) {
         if constexpr (is_datetime) {
             return check_range_and_set_time(date_v2_value_.year_, 
date_v2_value_.month_,
@@ -2592,13 +2616,13 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
                                             date_v2_value_.day_, 0, 0, 0, 0);
         }
     }
-
-    // for two special date cases, complete default month/day
-    if (!time_part_used && year > 0) {
-        if (std::string_view {format, end} == "%Y") {
-            month = day = 1;
-        } else if (std::string_view {format, end} == "%Y-%m") {
+    // complete default month/day
+    if (!(part_used & ~NORMAL_DATE_PART)) { // Ymd part only
+        if (!(part_used & DAY_PART)) {
             day = 1;
+            if (!(part_used & MONTH_PART)) {
+                month = 1;
+            }
         }
     }
 
@@ -2613,7 +2637,7 @@ bool DateV2Value<T>::from_date_format_str(const char* 
format, int format_len, co
     }
     if constexpr (is_datetime) {
         return check_range_and_set_time(year, month, day, hour, minute, 
second, microsecond,
-                                        time_part_used && !date_part_used);
+                                        !(part_used & ~TIME_PART));
     } else {
         return check_range_and_set_time(year, month, day, 0, 0, 0, 0);
     }
diff --git a/be/src/vec/runtime/vdatetime_value.h 
b/be/src/vec/runtime/vdatetime_value.h
index ad3619aba53..7b138b2bc3a 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -206,7 +206,7 @@ static constexpr uint32_t DATEV2_YEAR_WIDTH = 23;
 static constexpr uint32_t DATETIMEV2_YEAR_WIDTH = 18;
 static constexpr uint32_t DATETIMEV2_MONTH_WIDTH = 4;
 
-static RE2 time_zone_offset_format_reg("^[+-]{1}\\d{2}\\:\\d{2}$");
+static RE2 time_zone_offset_format_reg(R"(^[+-]{1}\d{2}\:\d{2}$)");
 
 uint8_t mysql_week_mode(uint32_t mode);
 
diff --git a/be/test/vec/function/function_array_index_test.cpp 
b/be/test/vec/function/function_array_index_test.cpp
index 7f496dc1cce..92c7901bbfe 100644
--- a/be/test/vec/function/function_array_index_test.cpp
+++ b/be/test/vec/function/function_array_index_test.cpp
@@ -114,10 +114,8 @@ TEST(function_array_index_test, array_contains) {
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, 
TypeIndex::Date};
 
-        Array vec = {str_to_date_time("2022-01-02", false), 
str_to_date_time("", false),
-                     str_to_date_time("2022-07-08", false)};
+        Array vec = {str_to_date_time("2022-01-02", false), 
str_to_date_time("2022-07-08", false)};
         DataSet data_set = {{{vec, std::string("2022-01-02")}, UInt8(1)},
-                            {{vec, std::string("")}, UInt8(1)},
                             {{vec, std::string("2022-01-03")}, UInt8(0)},
                             {{Null(), std::string("2022-01-04")}, Null()},
                             {{empty_arr, std::string("2022-01-02")}, 
UInt8(0)}};
@@ -129,10 +127,9 @@ TEST(function_array_index_test, array_contains) {
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, 
TypeIndex::DateTime};
 
-        Array vec = {str_to_date_time("2022-01-02 00:00:00"), 
str_to_date_time(""),
+        Array vec = {str_to_date_time("2022-01-02 00:00:00"),
                      str_to_date_time("2022-07-08 00:00:00")};
         DataSet data_set = {{{vec, std::string("2022-01-02 00:00:00")}, 
UInt8(1)},
-                            {{vec, std::string("")}, UInt8(1)},
                             {{vec, std::string("2022-01-03 00:00:00")}, 
UInt8(0)},
                             {{Null(), std::string("2022-01-04 00:00:00")}, 
Null()},
                             {{empty_arr, std::string("2022-01-02 00:00:00")}, 
UInt8(0)}};
@@ -217,10 +214,8 @@ TEST(function_array_index_test, array_position) {
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date, 
TypeIndex::Date};
 
-        Array vec = {str_to_date_time("2022-01-02", false), 
str_to_date_time("", false),
-                     str_to_date_time("2022-07-08", false)};
+        Array vec = {str_to_date_time("2022-01-02", false), 
str_to_date_time("2022-07-08", false)};
         DataSet data_set = {{{vec, std::string("2022-01-02")}, Int64(1)},
-                            {{vec, std::string("")}, Int64(2)},
                             {{vec, std::string("2022-01-03")}, Int64(0)},
                             {{Null(), std::string("2022-01-04")}, Null()},
                             {{empty_arr, std::string("2022-01-02")}, 
Int64(0)}};
@@ -232,10 +227,9 @@ TEST(function_array_index_test, array_position) {
     {
         InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime, 
TypeIndex::DateTime};
 
-        Array vec = {str_to_date_time("2022-01-02 00:00:00"), 
str_to_date_time(""),
+        Array vec = {str_to_date_time("2022-01-02 00:00:00"),
                      str_to_date_time("2022-07-08 00:00:00")};
         DataSet data_set = {{{vec, std::string("2022-01-02 00:00:00")}, 
Int64(1)},
-                            {{vec, std::string("")}, Int64(2)},
                             {{vec, std::string("2022-01-03 00:00:00")}, 
Int64(0)},
                             {{Null(), std::string("2022-01-04 00:00:00")}, 
Null()},
                             {{empty_arr, std::string("2022-01-02 00:00:00")}, 
Int64(0)}};
diff --git a/be/test/vec/function/function_test_util.h 
b/be/test/vec/function/function_test_util.h
index f488690f51d..44cb954e3c3 100644
--- a/be/test/vec/function/function_test_util.h
+++ b/be/test/vec/function/function_test_util.h
@@ -52,18 +52,13 @@
 #include "vec/data_types/data_type_number.h"
 #include "vec/functions/simple_function_factory.h"
 
-namespace doris {
-namespace vectorized {
+namespace doris::vectorized {
+
 class DataTypeJsonb;
 class DataTypeTime;
 class TableFunction;
 template <typename T>
 class DataTypeDecimal;
-} // namespace vectorized
-} // namespace doris
-
-namespace doris::vectorized {
-
 using InputDataSet = std::vector<std::vector<AnyType>>; // without result
 using CellSet = std::vector<AnyType>;
 using Expect = AnyType;
@@ -71,6 +66,7 @@ using Row = std::pair<CellSet, Expect>;
 using DataSet = std::vector<Row>;
 using InputTypeSet = std::vector<AnyType>;
 
+// FIXME: should use exception or expected to deal null value.w
 int64_t str_to_date_time(std::string datetime_str, bool data_time = true);
 uint32_t str_to_date_v2(std::string datetime_str, std::string datetime_format);
 uint64_t str_to_datetime_v2(std::string datetime_str, std::string 
datetime_format);
@@ -300,7 +296,7 @@ Status check_function(const std::string& func_name, const 
InputTypeSet& input_ty
             if constexpr (std::is_same_v<ReturnType, DataTypeJsonb>) {
                 const auto& expect_data = any_cast<String>(data_set[i].second);
                 auto s = column->get_data_at(i);
-                if (expect_data.size() == 0) {
+                if (expect_data.empty()) {
                     // zero size result means invalid
                     EXPECT_EQ(0, s.size) << " invalid result size should be 0 
at row " << i;
                 } else {
@@ -321,7 +317,8 @@ Status check_function(const std::string& func_name, const 
InputTypeSet& input_ty
                 } else if constexpr (std::is_same_v<ReturnType, 
DataTypeBitMap>) {
                     const ColumnBitmap* bitmap_col = nullptr;
                     if constexpr (nullable) {
-                        auto nullable_column = assert_cast<const 
ColumnNullable*>(column.get());
+                        const auto* nullable_column =
+                                assert_cast<const 
ColumnNullable*>(column.get());
                         bitmap_col = assert_cast<const ColumnBitmap*>(
                                 
nullable_column->get_nested_column_ptr().get());
                     } else {
@@ -344,7 +341,9 @@ Status check_function(const std::string& func_name, const 
InputTypeSet& input_ty
         if constexpr (nullable) {
             bool is_null = data_set[i].second.type() == &typeid(Null);
             EXPECT_EQ(is_null, column->is_null_at(i)) << " at row " << i;
-            if (!is_null) check_column_data();
+            if (!is_null) {
+                check_column_data();
+            }
         } else {
             check_column_data();
         }
diff --git 
a/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md 
b/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md
index ab86ec15c2b..54f2acd7597 100644
--- a/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md
+++ b/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md
@@ -70,23 +70,23 @@ The formats available are:
 
 % m | month, numerical value (00-12)
 
-%p | AM or PM
+% p | AM or PM, only available on 12-hours system
 
-% R | Time, 12 - hour (hh: mm: SS AM or PM)
+% R | Time, 12-hour (hh:mm:ss), could be with or without AM/PM marking
 
 % S | seconds (00-59)
 
 % s | seconds (00-59)
 
-% T | Time, 24 - hour (hh: mm: ss)
+% T | Time, 24-hour (hh:mm:ss)
 
 % U | Week (00-53) Sunday is the first day of the week
 
-% U | Week (00 - 53) Monday is the first day of the week
+% U | Week (00-53) Monday is the first day of the week
 
 % V | Week (01-53) Sunday is the first day of the week, and% X is used.
 
-% v | Week (01 - 53) Monday is the first day of the week, and% x is used
+% v | Week (01-53) Monday is the first day of the week, and% x is used
 
 % W | Sunday
 
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md
index 69a6315d729..a5fb25f2986 100644
--- 
a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md
+++ 
b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md
@@ -70,9 +70,9 @@ date 参数是合法的日期。format 规定日期/时间的输出格式。
 
 %m | 月,数值(00-12)
 
-%p | AM 或 PM
+%p | AM 或 PM,仅在采用12小时制时可用。
 
-%r | 时间,12-小时(hh:mm:ss AM 或 PM)
+%r | 时间,12-小时(hh:mm:ss),可以包含或不包含AM/PM。
 
 %S | 秒(00-59)
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
index 2f7f183b1ad..37171b70fac 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
@@ -1203,16 +1203,28 @@ public class DateLiteral extends LiteralExpr {
     // this method is exaclty same as from_date_format_str() in 
be/src/runtime/datetime_value.cpp
     // change this method should also change that.
     public int fromDateFormatStr(String format, String value, boolean 
hasSubVal) throws InvalidFormatException {
-        int fp = 0; // pointer to the current format string
-        int fend = format.length(); // end of format string
-        int vp = 0; // pointer to the date string value
-        int vend = value.length(); // end of date string value
-
-        boolean datePartUsed = false;
-        boolean timePartUsed = false;
-        boolean microSecondPartUsed = false;
-
-        int dayPart = 0;
+        int pFormat = 0; // pointer to the current format string
+        int endFormat = format.length(); // end of format string
+        int pValue = 0; // pointer to the date string value
+        int endValue = value.length(); // end of date string value
+
+        int partUsed = 0;
+        final int yearPart = 1 << 0;
+        final int monthPart = 1 << 1;
+        final int dayPart = 1 << 2;
+        final int weekdayPart = 1 << 3;
+        final int yeardayPart = 1 << 4;
+        final int weekNumPart = 1 << 5;
+        final int normalDatePart = yearPart | monthPart | dayPart;
+        final int specialDatePart = weekdayPart | yeardayPart | weekNumPart;
+        final int datePart = normalDatePart | specialDatePart;
+        final int hourPart = 1 << 6;
+        final int minutePart = 1 << 7;
+        final int secondPart = 1 << 8;
+        final int fracPart = 1 << 9;
+        final int timePart = hourPart | minutePart | secondPart | fracPart;
+
+        int halfDay = 0; // 0 for am/none, 12 for pm.
         long weekday = -1;
         long yearday = -1;
         long weekNum = -1;
@@ -1221,169 +1233,170 @@ public class DateLiteral extends LiteralExpr {
         boolean sundayFirst = false;
         boolean strictWeekNumberYearType = false;
         long strictWeekNumberYear = -1;
-        boolean usaTime = false;
+        boolean hourSystem12 = false; // hour in [0..12] and with am/pm
 
-        char f;
-        while (fp < fend && vp < vend) {
+        char now;
+        while (pFormat < endFormat && pValue < endValue) {
             // Skip space character
-            while (vp < vend && Character.isSpaceChar(value.charAt(vp))) {
-                vp++;
+            while (pValue < endValue && 
Character.isSpaceChar(value.charAt(pValue))) {
+                pValue++;
             }
-            if (vp >= vend) {
+            if (pValue >= endValue) {
                 break;
             }
 
             // Check switch
-            f = format.charAt(fp);
-            if (f == '%' && fp + 1 < fend) {
+            now = format.charAt(pFormat);
+            if (now == '%' && pFormat + 1 < endFormat) {
                 int tmp = 0;
                 long intValue = 0;
-                fp++;
-                f = format.charAt(fp);
-                fp++;
-                switch (f) {
+                pFormat++;
+                now = format.charAt(pFormat);
+                pFormat++;
+                switch (now) {
                     // Year
                     case 'y':
                         // Year, numeric (two digits)
-                        tmp = vp + Math.min(2, vend - vp);
-                        intValue = strToLong(value.substring(vp, tmp));
+                        tmp = pValue + Math.min(2, endValue - pValue);
+                        intValue = strToLong(value.substring(pValue, tmp));
                         intValue += intValue >= 70 ? 1900 : 2000;
                         this.year = intValue;
-                        vp = tmp;
-                        datePartUsed = true;
+                        pValue = tmp;
+                        partUsed |= yearPart;
                         break;
                     case 'Y':
                         // Year, numeric, four digits
-                        tmp = vp + Math.min(4, vend - vp);
-                        intValue = strToLong(value.substring(vp, tmp));
-                        if (tmp - vp <= 2) {
+                        tmp = pValue + Math.min(4, endValue - pValue);
+                        intValue = strToLong(value.substring(pValue, tmp));
+                        if (tmp - pValue <= 2) {
                             intValue += intValue >= 70 ? 1900 : 2000;
                         }
                         this.year = intValue;
-                        vp = tmp;
-                        datePartUsed = true;
+                        pValue = tmp;
+                        partUsed |= yearPart;
                         break;
                     // Month
                     case 'm':
                     case 'c':
-                        tmp = vp + Math.min(2, vend - vp);
-                        intValue = strToLong(value.substring(vp, tmp));
+                        tmp = pValue + Math.min(2, endValue - pValue);
+                        intValue = strToLong(value.substring(pValue, tmp));
                         this.month = intValue;
-                        vp = tmp;
-                        datePartUsed = true;
+                        pValue = tmp;
+                        partUsed |= monthPart;
                         break;
                     case 'M': {
-                        int nextPos = findWord(value, vp);
-                        intValue = checkWord(MONTH_NAME_DICT, 
value.substring(vp, nextPos));
+                        int nextPos = findWord(value, pValue);
+                        intValue = checkWord(MONTH_NAME_DICT, 
value.substring(pValue, nextPos));
                         this.month = intValue;
-                        vp = nextPos;
+                        pValue = nextPos;
+                        partUsed |= monthPart;
                         break;
                     }
                     case 'b': {
-                        int nextPos = findWord(value, vp);
-                        intValue = checkWord(MONTH_ABBR_NAME_DICT, 
value.substring(vp, nextPos));
+                        int nextPos = findWord(value, pValue);
+                        intValue = checkWord(MONTH_ABBR_NAME_DICT, 
value.substring(pValue, nextPos));
                         this.month = intValue;
-                        vp = nextPos;
+                        pValue = nextPos;
+                        partUsed |= monthPart;
                         break;
                     }
                     // Day
                     case 'd':
                     case 'e':
-                        tmp = vp + Math.min(2, vend - vp);
-                        intValue = strToLong(value.substring(vp, tmp));
+                        tmp = pValue + Math.min(2, endValue - pValue);
+                        intValue = strToLong(value.substring(pValue, tmp));
                         this.day = intValue;
-                        vp = tmp;
-                        datePartUsed = true;
+                        pValue = tmp;
+                        partUsed |= dayPart;
                         break;
                     case 'D':
-                        tmp = vp + Math.min(2, vend - vp);
-                        intValue = strToLong(value.substring(vp, tmp));
+                        tmp = pValue + Math.min(2, endValue - pValue);
+                        intValue = strToLong(value.substring(pValue, tmp));
                         this.day = intValue;
-                        vp = tmp + Math.min(2, vend - tmp);
-                        datePartUsed = true;
+                        pValue = tmp + Math.min(2, endValue - tmp);
+                        partUsed |= dayPart;
                         break;
                     // Hour
                     case 'h':
                     case 'I':
                     case 'l':
-                        usaTime = true;
+                        hourSystem12 = true;
+                        partUsed |= hourPart;
                     case 'k': // CHECKSTYLE IGNORE THIS LINE: Fall through
                     case 'H':
-                        tmp = findNumber(value, vp, 2);
-                        intValue = strToLong(value.substring(vp, tmp));
+                        tmp = findNumber(value, pValue, 2);
+                        intValue = strToLong(value.substring(pValue, tmp));
                         this.hour = intValue;
-                        vp = tmp;
-                        timePartUsed = true;
+                        pValue = tmp;
+                        partUsed |= hourPart;
                         break;
                     // Minute
                     case 'i':
-                        tmp = vp + Math.min(2, vend - vp);
-                        intValue = strToLong(value.substring(vp, tmp));
+                        tmp = pValue + Math.min(2, endValue - pValue);
+                        intValue = strToLong(value.substring(pValue, tmp));
                         this.minute = intValue;
-                        vp = tmp;
-                        timePartUsed = true;
+                        pValue = tmp;
+                        partUsed |= minutePart;
                         break;
                     // Second
                     case 's':
                     case 'S':
-                        tmp = vp + Math.min(2, vend - vp);
-                        intValue = strToLong(value.substring(vp, tmp));
+                        tmp = pValue + Math.min(2, endValue - pValue);
+                        intValue = strToLong(value.substring(pValue, tmp));
                         this.second = intValue;
-                        vp = tmp;
-                        timePartUsed = true;
+                        pValue = tmp;
+                        partUsed |= secondPart;
                         break;
                     // Micro second
                     case 'f':
-                        // FIXME: fix same with BE
-                        tmp = vp;
+                        tmp = pValue;
                         // when there's still something to the end, fix the 
scale of ms.
-                        while (tmp < vend && 
Character.isDigit(value.charAt(tmp))) {
+                        while (tmp < endValue && 
Character.isDigit(value.charAt(tmp))) {
                             tmp += 1;
                         }
 
-                        if (tmp - vp > 6) {
-                            int tmp2 = vp + 6;
-                            intValue = strToLong(value.substring(vp, tmp2));
+                        if (tmp - pValue > 6) {
+                            int tmp2 = pValue + 6;
+                            intValue = strToLong(value.substring(pValue, 
tmp2));
                         } else {
-                            intValue = strToLong(value.substring(vp, tmp));
+                            intValue = strToLong(value.substring(pValue, tmp));
                         }
-                        this.microsecond = (long) (intValue * Math.pow(10, 6 - 
Math.min(6, tmp - vp)));
-                        timePartUsed = true;
-                        microSecondPartUsed = true;
-                        vp = tmp;
+                        this.microsecond = (long) (intValue * Math.pow(10, 6 - 
Math.min(6, tmp - pValue)));
+                        partUsed |= fracPart;
+                        pValue = tmp;
                         break;
                     // AM/PM
                     case 'p':
-                        if ((vend - vp) < 2 || 
Character.toUpperCase(value.charAt(vp + 1)) != 'M' || !usaTime) {
+                        if ((endValue - pValue) < 2 || 
Character.toUpperCase(value.charAt(pValue + 1)) != 'M'
+                                || !hourSystem12) {
                             throw new InvalidFormatException("Invalid %p 
format");
                         }
-                        if (Character.toUpperCase(value.charAt(vp)) == 'P') {
+                        if (Character.toUpperCase(value.charAt(pValue)) == 
'P') {
                             // PM
-                            dayPart = 12;
+                            halfDay = 12;
                         }
-                        timePartUsed = true;
-                        vp += 2;
+                        pValue += 2;
                         break;
                     // Weekday
                     case 'W': {
-                        int nextPos = findWord(value, vp);
-                        intValue = checkWord(WEEK_DAY_NAME_DICT, 
value.substring(vp, nextPos));
+                        int nextPos = findWord(value, pValue);
+                        intValue = checkWord(WEEK_DAY_NAME_DICT, 
value.substring(pValue, nextPos));
                         intValue++;
                         weekday = intValue;
-                        datePartUsed = true;
+                        partUsed |= weekdayPart;
                         break;
                     }
                     case 'a': {
-                        int nextPos = findWord(value, vp);
-                        intValue = checkWord(WEEK_DAY_NAME_DICT, 
value.substring(vp, nextPos));
+                        int nextPos = findWord(value, pValue);
+                        intValue = checkWord(WEEK_DAY_NAME_DICT, 
value.substring(pValue, nextPos));
                         intValue++;
                         weekday = intValue;
-                        datePartUsed = true;
+                        partUsed |= weekdayPart;
                         break;
                     }
                     case 'w':
-                        tmp = vp + Math.min(1, vend - vp);
-                        intValue = strToLong(value.substring(vp, tmp));
+                        tmp = pValue + Math.min(1, endValue - pValue);
+                        intValue = strToLong(value.substring(pValue, tmp));
                         if (intValue >= 7) {
                             throw new InvalidFormatException("invalid day of 
week: " + intValue);
                         }
@@ -1391,97 +1404,97 @@ public class DateLiteral extends LiteralExpr {
                             intValue = 7;
                         }
                         weekday = intValue;
-                        vp = tmp;
-                        datePartUsed = true;
+                        pValue = tmp;
+                        partUsed |= weekdayPart;
                         break;
                     case 'j':
-                        tmp = vp + Math.min(3, vend - vp);
-                        intValue = strToLong(value.substring(vp, tmp));
+                        tmp = pValue + Math.min(3, endValue - pValue);
+                        intValue = strToLong(value.substring(pValue, tmp));
                         yearday = intValue;
-                        vp = tmp;
-                        datePartUsed = true;
+                        pValue = tmp;
+                        partUsed |= yeardayPart;
                         break;
                     case 'u':
                     case 'v':
                     case 'U':
                     case 'V':
-                        sundayFirst = (format.charAt(fp - 1) == 'U' || 
format.charAt(fp - 1) == 'V');
+                        sundayFirst = (format.charAt(pFormat - 1) == 'U' || 
format.charAt(pFormat - 1) == 'V');
                         // Used to check if there is %x or %X
-                        strictWeekNumber = (format.charAt(fp - 1) == 'V' || 
format.charAt(fp - 1) == 'v');
-                        tmp = vp + Math.min(2, vend - vp);
-                        intValue = Long.valueOf(value.substring(vp, tmp));
+                        strictWeekNumber = (format.charAt(pFormat - 1) == 'V' 
|| format.charAt(pFormat - 1) == 'v');
+                        tmp = pValue + Math.min(2, endValue - pValue);
+                        intValue = Long.valueOf(value.substring(pValue, tmp));
                         weekNum = intValue;
                         if (weekNum > 53 || (strictWeekNumber && weekNum == 
0)) {
                             throw new InvalidFormatException("invalid num of 
week: " + weekNum);
                         }
-                        vp = tmp;
-                        datePartUsed = true;
+                        pValue = tmp;
+                        partUsed |= weekNumPart;
                         break;
                     // strict week number, must be used with %V or %v
                     case 'x':
                     case 'X':
-                        strictWeekNumberYearType = (format.charAt(fp - 1) == 
'X');
-                        tmp = vp + Math.min(4, vend - vp);
-                        intValue = Long.valueOf(value.substring(vp, tmp));
+                        strictWeekNumberYearType = (format.charAt(pFormat - 1) 
== 'X');
+                        tmp = pValue + Math.min(4, endValue - pValue);
+                        intValue = Long.valueOf(value.substring(pValue, tmp));
                         strictWeekNumberYear = intValue;
-                        vp = tmp;
-                        datePartUsed = true;
+                        pValue = tmp;
+                        partUsed |= weekNumPart;
                         break;
                     case 'r':
-                        tmp = fromDateFormatStr("%I:%i:%S %p", 
value.substring(vp, vend), true);
-                        vp = tmp;
-                        timePartUsed = true;
+                        tmp = fromDateFormatStr("%I:%i:%S %p", 
value.substring(pValue, endValue), true);
+                        pValue = tmp;
+                        partUsed |= timePart;
                         break;
                     case 'T':
-                        tmp = fromDateFormatStr("%H:%i:%S", 
value.substring(vp, vend), true);
-                        vp = tmp;
-                        timePartUsed = true;
+                        tmp = fromDateFormatStr("%H:%i:%S", 
value.substring(pValue, endValue), true);
+                        pValue = tmp;
+                        partUsed |= timePart;
                         break;
                     case '.':
-                        while (vp < vend && 
Character.toString(value.charAt(vp)).matches("\\p{Punct}")) {
-                            vp++;
+                        while (pValue < endValue && 
Character.toString(value.charAt(pValue)).matches("\\p{Punct}")) {
+                            pValue++;
                         }
                         break;
                     case '@':
-                        while (vp < vend && 
Character.isLetter(value.charAt(vp))) {
-                            vp++;
+                        while (pValue < endValue && 
Character.isLetter(value.charAt(pValue))) {
+                            pValue++;
                         }
                         break;
                     case '#':
-                        while (vp < vend && 
Character.isDigit(value.charAt(vp))) {
-                            vp++;
+                        while (pValue < endValue && 
Character.isDigit(value.charAt(pValue))) {
+                            pValue++;
                         }
                         break;
                     case '%': // %%, escape the %
-                        if ('%' != value.charAt(vp)) {
-                            throw new InvalidFormatException("invalid char 
after %: " + value.charAt(vp));
+                        if ('%' != value.charAt(pValue)) {
+                            throw new InvalidFormatException("invalid char 
after %: " + value.charAt(pValue));
                         }
-                        vp++;
+                        pValue++;
                         break;
                     default:
-                        throw new InvalidFormatException("Invalid format 
pattern: " + f);
+                        throw new InvalidFormatException("Invalid format 
pattern: " + now);
                 }
-            } else if (format.charAt(fp) != ' ') {
-                if (format.charAt(fp) != value.charAt(vp)) {
-                    throw new InvalidFormatException("Invalid char: " + 
value.charAt(vp) + ", expected: "
-                            + format.charAt(fp));
+            } else if (format.charAt(pFormat) != ' ') {
+                if (format.charAt(pFormat) != value.charAt(pValue)) {
+                    throw new InvalidFormatException("Invalid char: " + 
value.charAt(pValue) + ", expected: "
+                            + format.charAt(pFormat));
                 }
-                fp++;
-                vp++;
+                pFormat++;
+                pValue++;
             } else {
-                fp++;
+                pFormat++;
             }
         }
 
         // continue to iterate pattern if has
         // to find out if it has time part.
-        while (fp < fend) {
-            f = format.charAt(fp);
-            if (f == '%' && fp + 1 < fend) {
-                fp++;
-                f = format.charAt(fp);
-                fp++;
-                switch (f) {
+        while (pFormat < endFormat) {
+            now = format.charAt(pFormat);
+            if (now == '%' && pFormat + 1 < endFormat) {
+                pFormat++;
+                now = format.charAt(pFormat);
+                pFormat++;
+                switch (now) {
                     case 'H':
                     case 'h':
                     case 'I':
@@ -1493,25 +1506,29 @@ public class DateLiteral extends LiteralExpr {
                     case 'S':
                     case 'p':
                     case 'T':
-                        timePartUsed = true;
+                        partUsed |= timePart;
                         break;
                     default:
                         break;
                 }
             } else {
-                fp++;
+                pFormat++;
             }
         }
 
-        if (usaTime) {
+        if (partUsed == 0) {
+            throw new InvalidFormatException("Nothing for legal Date: " + 
value);
+        }
+
+        if (hourSystem12) {
             if (this.hour > 12 || this.hour < 1) {
                 throw new InvalidFormatException("Invalid hour: " + hour);
             }
-            this.hour = (this.hour % 12) + dayPart;
+            this.hour = (this.hour % 12) + halfDay;
         }
 
         if (hasSubVal) {
-            return vp;
+            return pValue;
         }
 
         // Year day
@@ -1540,21 +1557,21 @@ public class DateLiteral extends LiteralExpr {
             getDateFromDaynr(days);
         }
 
-        if (!timePartUsed && year > 0) {
-            if (format.equals("%Y")) {
-                month = day = 1;
-            } else if (format.equals("%Y-%m")) {
+        // complete default month/day
+        if ((partUsed & ~normalDatePart) == 0) { // only date here
+            if ((partUsed & dayPart) == 0) {
                 day = 1;
+                if ((partUsed & monthPart) == 0) {
+                    month = 1;
+                }
             }
         }
 
         // Compute timestamp type
-        // TODO(Gabriel): we still use old version datetime/date and change 
this to new version when
-        //  we think it's stable enough
-        if (datePartUsed) {
-            if (microSecondPartUsed) {
+        if ((partUsed & datePart) != 0) { // Ymd part only
+            if ((partUsed & fracPart) != 0) {
                 this.type = Type.DATETIMEV2_WITH_MAX_SCALAR;
-            } else if (timePartUsed) {
+            } else if ((partUsed & timePart) != 0) {
                 this.type = ScalarType.getDefaultDateType(Type.DATETIME);
             } else {
                 this.type = ScalarType.getDefaultDateType(Type.DATE);
diff --git a/regression-test/data/correctness/test_str_to_date.out 
b/regression-test/data/correctness/test_str_to_date.out
index 5aa1f75a2cc..633b590ad98 100644
--- a/regression-test/data/correctness/test_str_to_date.out
+++ b/regression-test/data/correctness/test_str_to_date.out
@@ -13,15 +13,18 @@
 -- !select4 --
 2020-12-03T11:45:14
 
--- !add_1 --
+-- !short_nereids_1 --
 2023-01-01
 
--- !add_2 --
+-- !short_nereids_2 --
 2023-12-01
 
--- !add_3 --
+-- !short_nereids_3 --
 2023-01-01
 
+-- !short_nereids_4 --
+2020-02-01
+
 -- !select5 --
 2019-12-01     yyyy-MM-dd      2019-12-01T00:00
 20201203       yyyyMMdd        2020-12-03T00:00
@@ -36,12 +39,15 @@
 -- !select8 --
 2020-12-03T11:45:14
 
--- !add_4 --
+-- !short_legacy_1 --
 2023-01-01
 
--- !add_5 --
+-- !short_legacy_2 --
 2023-12-01
 
--- !add_6 --
+-- !short_legacy_3 --
 2023-01-01
 
+-- !short_legacy_4 --
+2020-02-01
+
diff --git a/regression-test/data/datatype_p0/date/test_invalid_date.out 
b/regression-test/data/datatype_p0/date/test_invalid_date.out
deleted file mode 100644
index 80b3c3963d3..00000000000
--- a/regression-test/data/datatype_p0/date/test_invalid_date.out
+++ /dev/null
@@ -1,7 +0,0 @@
--- This file is automatically generated. You should know what you did if you 
want to edit this
--- !sql1 --
-\N
-
--- !sql2 --
-\N
-
diff --git a/regression-test/suites/correctness/test_str_to_date.groovy 
b/regression-test/suites/correctness/test_str_to_date.groovy
index 43c80d4f4ba..300f9cb9216 100644
--- a/regression-test/suites/correctness/test_str_to_date.groovy
+++ b/regression-test/suites/correctness/test_str_to_date.groovy
@@ -49,9 +49,10 @@ sql """ set enable_nereids_planner=true ,  
enable_fallback_to_original_planner=f
     qt_select4 """
         SELECT STR_TO_DATE('2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss');
     """
-    qt_add_1 " select STR_TO_DATE('2023', '%Y') "
-    qt_add_2 " select STR_TO_DATE('2023-12', '%Y-%m') "
-    qt_add_3 " select STR_TO_DATE('2023-12', '%Y')"
+    qt_short_nereids_1 " select STR_TO_DATE('2023', '%Y') "
+    qt_short_nereids_2 " select STR_TO_DATE('2023-12', '%Y-%m') "
+    qt_short_nereids_3 " select STR_TO_DATE('2023-12', '%Y')"
+    qt_short_nereids_4 " select STR_TO_DATE('2020%2', '%Y%%%m')"
 
 
 sql """ set enable_nereids_planner=false;"""
@@ -67,7 +68,8 @@ sql """ set enable_nereids_planner=false;"""
     qt_select8 """
         SELECT STR_TO_DATE('2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss');
     """
-    qt_add_4 " select STR_TO_DATE('2023', '%Y') "
-    qt_add_5 " select STR_TO_DATE('2023-12', '%Y-%m') "
-    qt_add_6 " select STR_TO_DATE('2023-12', '%Y')"
+    qt_short_legacy_1 " select STR_TO_DATE('2023', '%Y') "
+    qt_short_legacy_2 " select STR_TO_DATE('2023-12', '%Y-%m') "
+    qt_short_legacy_3 " select STR_TO_DATE('2023-12', '%Y')"
+    qt_short_legacy_4 " select STR_TO_DATE('2020%2', '%Y%%%m')"
 }
diff --git a/regression-test/suites/datatype_p0/date/test_invalid_date.groovy 
b/regression-test/suites/datatype_p0/date/test_invalid_date.groovy
deleted file mode 100644
index 5a7af470357..00000000000
--- a/regression-test/suites/datatype_p0/date/test_invalid_date.groovy
+++ /dev/null
@@ -1,37 +0,0 @@
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("test_invalid_date") {
-    def tbName = "test_invalid_date"
-    sql "DROP TABLE IF EXISTS ${tbName}"
-    sql """
-            CREATE TABLE IF NOT EXISTS ${tbName} (
-                c0 int,
-                c1 char(10),
-                c2 datev1,
-                c3 datev2
-            )
-            UNIQUE KEY(c0)
-            DISTRIBUTED BY HASH(c0) BUCKETS 5 properties("replication_num" = 
"1");
-        """
-    sql "insert into ${tbName} values(1, 'test1', '2000-01-01', '2000-01-01')"
-
-    qt_sql1 "select str_to_date('202301', '%Y%m');"
-    qt_sql2 "select str_to_date('202301', '%Y%m') from ${tbName}"
-    sql "DROP TABLE ${tbName}"
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to