This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f1961d54906 [enhance](function) refactor from_format_str and support
more format (#30452)
f1961d54906 is described below
commit f1961d5490680b593c5f8a107db59ed2fb3afb7a
Author: zclllyybb <[email protected]>
AuthorDate: Thu Feb 1 19:07:54 2024 +0800
[enhance](function) refactor from_format_str and support more format
(#30452)
---
be/src/vec/functions/function.cpp | 11 +-
be/src/vec/functions/function.h | 16 +-
be/src/vec/functions/function_bitmap.cpp | 2 +-
be/src/vec/functions/nullif.cpp | 2 +-
be/src/vec/runtime/vdatetime_value.cpp | 278 +++++++++---------
be/src/vec/runtime/vdatetime_value.h | 2 +-
be/test/vec/function/function_array_index_test.cpp | 14 +-
be/test/vec/function/function_test_util.h | 19 +-
.../date-time-functions/date-format.md | 10 +-
.../date-time-functions/date-format.md | 4 +-
.../org/apache/doris/analysis/DateLiteral.java | 311 +++++++++++----------
.../data/correctness/test_str_to_date.out | 18 +-
.../data/datatype_p0/date/test_invalid_date.out | 7 -
.../suites/correctness/test_str_to_date.groovy | 14 +-
.../datatype_p0/date/test_invalid_date.groovy | 37 ---
15 files changed, 374 insertions(+), 371 deletions(-)
diff --git a/be/src/vec/functions/function.cpp
b/be/src/vec/functions/function.cpp
index 8df03496c11..e0f785b0ef6 100644
--- a/be/src/vec/functions/function.cpp
+++ b/be/src/vec/functions/function.cpp
@@ -94,13 +94,13 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const
Block& block, const Colum
return ColumnNullable::create(src_not_nullable, result_null_map_column);
}
-bool get_null_presence(const Block& block, const ColumnNumbers& args) {
+bool have_null_column(const Block& block, const ColumnNumbers& args) {
return std::ranges::any_of(args, [&block](const auto& elem) {
return block.get_by_position(elem).type->is_nullable();
});
}
-bool get_null_presence(const ColumnsWithTypeAndName& args) {
+bool have_null_column(const ColumnsWithTypeAndName& args) {
return std::ranges::any_of(args, [](const auto& elem) { return
elem.type->is_nullable(); });
}
@@ -202,20 +202,21 @@ Status
PreparedFunctionImpl::default_implementation_for_nulls(
return Status::OK();
}
- if (get_null_presence(block, args)) {
+ if (have_null_column(block, args)) {
bool need_to_default = need_replace_null_data_to_default();
if (context) {
need_to_default &= context->check_overflow_for_decimal();
}
+ // extract nested column from nulls
ColumnNumbers new_args;
for (auto arg : args) {
new_args.push_back(block.columns());
block.insert(block.get_by_position(arg).get_nested(need_to_default));
DCHECK(!block.get_by_position(new_args.back()).column->is_nullable());
}
-
RETURN_IF_ERROR(execute_without_low_cardinality_columns(context,
block, new_args, result,
block.rows(),
dry_run));
+ // after run with nested, wrap them in null.
block.get_by_position(result).column = wrap_in_nullable(
block.get_by_position(result).column, block, args, result,
input_rows_count);
@@ -267,7 +268,7 @@ DataTypePtr
FunctionBuilderImpl::get_return_type_without_low_cardinality(
check_number_of_arguments(arguments.size());
if (!arguments.empty() && use_default_implementation_for_nulls()) {
- if (get_null_presence(arguments)) {
+ if (have_null_column(arguments)) {
ColumnNumbers numbers(arguments.size());
std::iota(numbers.begin(), numbers.end(), 0);
auto [nested_block, _] =
diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h
index 73f49a237ae..df5d8d60942 100644
--- a/be/src/vec/functions/function.h
+++ b/be/src/vec/functions/function.h
@@ -65,8 +65,8 @@ concept HasGetVariadicArgumentTypesImpl = requires(T t) {
{ t.get_variadic_argument_types_impl() } -> std::same_as<DataTypes>;
};
-bool get_null_presence(const Block& block, const ColumnNumbers& args);
-bool get_null_presence(const ColumnsWithTypeAndName& args);
+bool have_null_column(const Block& block, const ColumnNumbers& args);
+bool have_null_column(const ColumnsWithTypeAndName& args);
/// The simplest executable object.
/// Motivation:
@@ -288,9 +288,11 @@ public:
bool is_variadic() const override { return false; }
- /// Default implementation. Will check only in non-variadic case.
+ // Default implementation. Will check only in non-variadic case.
void check_number_of_arguments(size_t number_of_arguments) const override;
-
+ // the return type should be same with what FE plans.
+ // it returns: `get_return_type_impl` if
`use_default_implementation_for_nulls` = false
+ // `get_return_type_impl` warpped in NULL if
`use_default_implementation_for_nulls` = true and input has NULL
DataTypePtr get_return_type(const ColumnsWithTypeAndName& arguments) const;
DataTypes get_variadic_argument_types() const override {
@@ -300,7 +302,9 @@ public:
ColumnNumbers get_arguments_that_are_always_constant() const override {
return {}; }
protected:
- /// Get the result type by argument type. If the function does not apply
to these arguments, throw an exception.
+ // Get the result type by argument type. If the function does not apply to
these arguments, throw an exception.
+ // the get_return_type_impl and its overrides should only return the
nested type if `use_default_implementation_for_nulls` is true.
+ // whether to wrap in nullable type will be automatically decided.
virtual DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName&
arguments) const {
DataTypes data_types(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i) data_types[i] =
arguments[i].type;
@@ -317,7 +321,7 @@ protected:
* if some of arguments are Nullable(Nothing) then don't call
get_return_type(), call build_impl() with return_type = Nullable(Nothing),
* if some of arguments are Nullable, then:
* - Nullable types are substituted with nested types for
get_return_type() function
- * - wrap get_return_type() result in Nullable type and pass to
build_impl
+ * - WRAP get_return_type() RESULT IN NULLABLE type and pass to
build_impl
*
* Otherwise build returns build_impl(arguments,
get_return_type(arguments));
*/
diff --git a/be/src/vec/functions/function_bitmap.cpp
b/be/src/vec/functions/function_bitmap.cpp
index 88147ec6cd9..6886d53004b 100644
--- a/be/src/vec/functions/function_bitmap.cpp
+++ b/be/src/vec/functions/function_bitmap.cpp
@@ -700,7 +700,7 @@ Status execute_bitmap_op_count_null_to_zero(
size_t input_rows_count,
const std::function<Status(FunctionContext*, Block&, const
ColumnNumbers&, size_t, size_t)>&
exec_impl_func) {
- if (get_null_presence(block, arguments)) {
+ if (have_null_column(block, arguments)) {
auto [temporary_block, new_args, new_result] =
create_block_with_nested_columns(block, arguments, result);
RETURN_IF_ERROR(exec_impl_func(context, temporary_block, new_args,
new_result,
diff --git a/be/src/vec/functions/nullif.cpp b/be/src/vec/functions/nullif.cpp
index 315ca52d1bc..928fb1c0767 100644
--- a/be/src/vec/functions/nullif.cpp
+++ b/be/src/vec/functions/nullif.cpp
@@ -75,7 +75,7 @@ public:
}
if (!arguments.empty()) {
- if (get_null_presence(arguments)) {
+ if (have_null_column(arguments)) {
return
make_nullable(std::make_shared<doris::vectorized::DataTypeUInt8>());
}
}
diff --git a/be/src/vec/runtime/vdatetime_value.cpp
b/be/src/vec/runtime/vdatetime_value.cpp
index 58c6c25f9ba..334779f9390 100644
--- a/be/src/vec/runtime/vdatetime_value.cpp
+++ b/be/src/vec/runtime/vdatetime_value.cpp
@@ -19,20 +19,18 @@
#include <cctz/civil_time.h>
#include <cctz/time_zone.h>
-#include <ctype.h>
#include <glog/logging.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
+
+#include <cctype>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
// IWYU pragma: no_include <bits/chrono.h>
#include <algorithm>
-#include <cctype>
#include <chrono> // IWYU pragma: keep
// IWYU pragma: no_include <bits/std_abs.h>
#include <cmath>
-#include <cstring>
-#include <exception>
-#include <string>
+#include <cstdint>
#include <string_view>
#include "common/compiler_util.h"
@@ -393,10 +391,7 @@ bool VecDateTimeValue::from_time_int64(int64_t value) {
return false;
}
_second = value % 100;
- if (_second > TIME_MAX_SECOND) {
- return false;
- }
- return true;
+ return _second <= TIME_MAX_SECOND;
}
char* VecDateTimeValue::append_date_buffer(char* to) const {
@@ -686,7 +681,7 @@ bool VecDateTimeValue::to_format_string(const char* format,
int len, char* to) c
}
char buf[64];
char* cursor = buf;
- char* pos = NULL;
+ char* pos = nullptr;
const char* ptr = format;
const char* end = format + len;
char ch = '\0';
@@ -1145,12 +1140,9 @@ static bool str_to_int64(const char* ptr, const char**
endptr, int64_t* ret) {
uint64_t value_3 = 0;
// Check overflow.
- if (value_1 > cutoff_1 ||
- (value_1 == cutoff_1 &&
- (value_2 > cutoff_2 || (value_2 == cutoff_2 && value_3 > cutoff_3))))
{
- return false;
- }
- return true;
+ return value_1 <= cutoff_1 &&
+ (value_1 != cutoff_1 ||
+ (value_2 <= cutoff_2 && (value_2 != cutoff_2 || value_3 <=
cutoff_3)));
}
static int min(int a, int b) {
@@ -1161,7 +1153,7 @@ static int find_in_lib(const char* lib[], const char*
str, const char* end) {
int pos = 0;
int find_count = 0;
int find_pos = 0;
- for (; lib[pos] != NULL; ++pos) {
+ for (; lib[pos] != nullptr; ++pos) {
const char* i = str;
const char* j = lib[pos];
while (i < end && *j) {
@@ -1199,26 +1191,41 @@ static int check_word(const char* lib[], const char*
str, const char* end, const
// change this method should also change that.
bool VecDateTimeValue::from_date_format_str(const char* format, int
format_len, const char* value,
int value_len, const char**
sub_val_end) {
+ if (value_len <= 0) [[unlikely]] {
+ return false;
+ }
const char* ptr = format;
const char* end = format + format_len;
const char* val = value;
const char* val_end = value + value_len;
- bool date_part_used = false;
- bool time_part_used = false;
- bool frac_part_used = false;
- bool already_set_time_part = false;
-
- int day_part = 0;
+ bool already_set_time_part = false; // skip time part in the end's setting.
+
+ uint32_t part_used = 0;
+ constexpr int YEAR_PART = 1U << 0;
+ constexpr int MONTH_PART = 1U << 1;
+ constexpr int DAY_PART = 1U << 2;
+ constexpr int NORMAL_DATE_PART = YEAR_PART | MONTH_PART | DAY_PART;
+ constexpr int WEEKDAY_PART = 1U << 3;
+ constexpr int YEARDAY_PART = 1U << 4;
+ constexpr int WEEK_NUM_PART = 1U << 5;
+ constexpr int SPECIAL_DATE_PART = WEEKDAY_PART | YEARDAY_PART |
WEEK_NUM_PART;
+ [[maybe_unused]] constexpr int DATE_PART = NORMAL_DATE_PART |
SPECIAL_DATE_PART;
+ constexpr int HOUR_PART = 1U << 6;
+ constexpr int MINUTE_PART = 1U << 7;
+ constexpr int SECOND_PART = 1U << 8;
+ constexpr int TIME_PART = HOUR_PART | MINUTE_PART | SECOND_PART;
+
+ int half_day = 0; // 0 for am/none, 12 for pm.
int weekday = -1;
int yearday = -1;
- int week_num = -1;
+ int week_num = -1; // week idx in one year
bool strict_week_number = false;
bool sunday_first = false;
bool strict_week_number_year_type = false;
int strict_week_number_year = -1;
- bool usa_time = false;
+ bool hour_system_12 = false;
auto [year, month, day, hour, minute, second] = std::tuple {0, 0, 0, 0, 0,
0};
while (ptr < end && val < val_end) {
@@ -1231,7 +1238,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
// Check switch
if (*ptr == '%' && ptr + 1 < end) {
- const char* tmp = NULL;
+ const char* tmp = nullptr;
int64_t int_value = 0;
ptr++;
switch (*ptr++) {
@@ -1245,7 +1252,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
int_value += int_value >= 70 ? 1900 : 2000;
year = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= YEAR_PART;
break;
case 'Y':
// Year, numeric, four digits
@@ -1258,7 +1265,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
year = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= YEAR_PART;
break;
// Month
case 'm':
@@ -1269,7 +1276,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
month = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= MONTH_PART;
break;
case 'M':
int_value = check_word(const_cast<const char**>(s_month_name),
val, val_end, &val);
@@ -1277,6 +1284,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
return false;
}
month = int_value;
+ part_used |= MONTH_PART;
break;
case 'b':
int_value = check_word(s_ab_month_name, val, val_end, &val);
@@ -1284,6 +1292,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
return false;
}
month = int_value;
+ part_used |= MONTH_PART;
break;
// Day
case 'd':
@@ -1294,7 +1303,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
day = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= DAY_PART;
break;
case 'D':
tmp = val + min(2, val_end - val);
@@ -1303,13 +1312,14 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
day = int_value;
val = tmp + min(2, val_end - tmp);
- date_part_used = true;
+ part_used |= DAY_PART;
break;
// Hour
case 'h':
case 'I':
case 'l':
- usa_time = true;
+ hour_system_12 = true;
+ part_used |= HOUR_PART;
// Fall through
case 'k':
case 'H':
@@ -1319,7 +1329,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
hour = int_value;
val = tmp;
- time_part_used = true;
+ part_used |= HOUR_PART;
break;
// Minute
case 'i':
@@ -1329,7 +1339,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
minute = int_value;
val = tmp;
- time_part_used = true;
+ part_used |= MINUTE_PART;
break;
// Second
case 's':
@@ -1340,7 +1350,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
second = int_value;
val = tmp;
- time_part_used = true;
+ part_used |= SECOND_PART;
break;
// Micro second
case 'f':
@@ -1351,16 +1361,15 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
val = tmp;
break;
- // AM/PM
+ // AM/PM, only meaningful for 12-hour system.
case 'p':
- if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' ||
!usa_time) {
+ if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' ||
!hour_system_12) {
return false;
}
if (toupper(*val) == 'P') {
// PM
- day_part = 12;
+ half_day = 12;
}
- time_part_used = true;
val += 2;
break;
// Weekday
@@ -1371,7 +1380,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
int_value++;
weekday = int_value;
- date_part_used = true;
+ part_used |= WEEKDAY_PART;
break;
case 'a':
int_value = check_word(s_ab_day_name, val, val_end, &val);
@@ -1380,7 +1389,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
int_value++;
weekday = int_value;
- date_part_used = true;
+ part_used |= WEEKDAY_PART;
break;
case 'w':
tmp = val + min(1, val_end - val);
@@ -1395,7 +1404,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
weekday = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= WEEKDAY_PART;
break;
case 'j':
tmp = val + min(3, val_end - val);
@@ -1404,7 +1413,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
yearday = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= YEARDAY_PART;
break;
case 'u':
case 'v':
@@ -1422,7 +1431,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
return false;
}
val = tmp;
- date_part_used = true;
+ part_used |= WEEK_NUM_PART;
break;
// strict week number, must be used with %V or %v
case 'x':
@@ -1434,7 +1443,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
strict_week_number_year = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= WEEK_NUM_PART;
break;
case 'r': {
VecDateTimeValue tmp_val;
@@ -1445,7 +1454,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
this->_minute = tmp_val._minute;
this->_second = tmp_val._second;
val = tmp;
- time_part_used = true;
+ part_used |= TIME_PART;
already_set_time_part = true;
break;
}
@@ -1457,7 +1466,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
this->_hour = tmp_val._hour;
this->_minute = tmp_val._minute;
this->_second = tmp_val._second;
- time_part_used = true;
+ part_used |= TIME_PART;
already_set_time_part = true;
val = tmp;
break;
@@ -1497,8 +1506,7 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
}
- // continue to iterate pattern if has
- // to find out if it has time part.
+ // for compatible with mysql, like something have %H:%i:%s format but no
relative content...
while (ptr < end) {
if (*ptr == '%' && ptr + 1 < end) {
ptr++;
@@ -1511,10 +1519,11 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
case 'l':
case 'r':
case 's':
+ case 'f':
case 'S':
case 'p':
case 'T':
- time_part_used = true;
+ part_used |= TIME_PART;
break;
default:
break;
@@ -1524,33 +1533,29 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
}
}
- if (usa_time) {
+ if (!part_used) {
+ return false;
+ }
+
+ if (hour_system_12) {
if (hour > 12 || hour < 1) {
return false;
}
- hour = (hour % 12) + day_part;
+ hour = (hour % 12) + half_day;
}
if (sub_val_end) {
*sub_val_end = val;
}
// Compute timestamp type
- if (frac_part_used) {
- if (date_part_used) {
+ if (part_used & DATE_PART) {
+ if (part_used & TIME_PART) {
_type = TIME_DATETIME;
} else {
- _type = TIME_TIME;
+ _type = TIME_DATE;
}
} else {
- if (date_part_used) {
- if (time_part_used) {
- _type = TIME_DATETIME;
- } else {
- _type = TIME_DATE;
- }
- } else {
- _type = TIME_TIME;
- }
+ _type = TIME_TIME;
}
_neg = false;
@@ -1592,12 +1597,13 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
if (already_set_date_part && already_set_time_part) {
return true;
}
- // for two special date cases, complete default month/day
- if (!time_part_used && year > 0) {
- if (std::string_view {format, end} == "%Y") {
- month = day = 1;
- } else if (std::string_view {format, end} == "%Y-%m") {
+ // complete default month/day
+ if (!(part_used & ~NORMAL_DATE_PART)) { // Ymd part only
+ if (!(part_used & DAY_PART)) {
day = 1;
+ if (!(part_used & MONTH_PART)) {
+ month = 1;
+ }
}
}
@@ -1614,7 +1620,9 @@ bool VecDateTimeValue::from_date_format_str(const char*
format, int format_len,
template <TimeUnit unit, bool need_check>
bool VecDateTimeValue::date_add_interval(const TimeInterval& interval) {
if constexpr (need_check) {
- if (!is_valid_date()) return false;
+ if (!is_valid_date()) {
+ return false;
+ }
}
int sign = interval.is_neg ? -1 : 1;
@@ -1771,7 +1779,7 @@ void VecDateTimeValue::from_unixtime(int64_t timestamp,
const cctz::time_zone& c
const char* VecDateTimeValue::month_name() const {
if (_month < 1 || _month > 12) {
- return NULL;
+ return nullptr;
}
return s_month_name[_month];
}
@@ -1779,14 +1787,14 @@ const char* VecDateTimeValue::month_name() const {
const char* VecDateTimeValue::day_name() const {
int day = weekday();
if (day < 0 || day >= 7) {
- return NULL;
+ return nullptr;
}
return s_day_name[day];
}
VecDateTimeValue VecDateTimeValue::local_time() {
VecDateTimeValue value;
- value.from_unixtime(time(NULL), TimezoneUtils::default_time_zone);
+ value.from_unixtime(time(nullptr), TimezoneUtils::default_time_zone);
return value;
}
@@ -2170,17 +2178,33 @@ void DateV2Value<T>::set_zero() {
template <typename T>
bool DateV2Value<T>::from_date_format_str(const char* format, int format_len,
const char* value,
int value_len, const char**
sub_val_end) {
+ if (value_len <= 0) [[unlikely]] {
+ return false;
+ }
const char* ptr = format;
const char* end = format + format_len;
const char* val = value;
const char* val_end = value + value_len;
- bool date_part_used = false;
- bool time_part_used = false;
- bool frac_part_used = false;
- bool already_set_time_part = false;
-
- int day_part = 0;
+ bool already_set_time_part = false; // skip time part in the end's setting.
+
+ uint32_t part_used = 0;
+ constexpr int YEAR_PART = 1U << 0;
+ constexpr int MONTH_PART = 1U << 1;
+ constexpr int DAY_PART = 1U << 2;
+ constexpr int NORMAL_DATE_PART = YEAR_PART | MONTH_PART | DAY_PART;
+ constexpr int WEEKDAY_PART = 1U << 3;
+ constexpr int YEARDAY_PART = 1U << 4;
+ constexpr int WEEK_NUM_PART = 1U << 5;
+ constexpr int SPECIAL_DATE_PART = WEEKDAY_PART | YEARDAY_PART |
WEEK_NUM_PART;
+ [[maybe_unused]] constexpr int DATE_PART = NORMAL_DATE_PART |
SPECIAL_DATE_PART;
+ constexpr int HOUR_PART = 1U << 6;
+ constexpr int MINUTE_PART = 1U << 7;
+ constexpr int SECOND_PART = 1U << 8;
+ constexpr int FRAC_PART = 1U << 9;
+ constexpr int TIME_PART = HOUR_PART | MINUTE_PART | SECOND_PART |
FRAC_PART;
+
+ int half_day = 0; // 0 for am/none, 12 for pm.
int weekday = -1;
int yearday = -1;
int week_num = -1;
@@ -2189,7 +2213,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
bool sunday_first = false;
bool strict_week_number_year_type = false;
int strict_week_number_year = -1;
- bool usa_time = false;
+ bool hour_system_12 = false;
auto [year, month, day, hour, minute, second, microsecond] = std::tuple
{0, 0, 0, 0, 0, 0, 0};
while (ptr < end && val < val_end) {
@@ -2202,7 +2226,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
// Check switch
if (*ptr == '%' && ptr + 1 < end) {
- const char* tmp = NULL;
+ const char* tmp = nullptr;
int64_t int_value = 0;
ptr++;
switch (*ptr++) {
@@ -2216,7 +2240,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
int_value += int_value >= 70 ? 1900 : 2000;
year = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= YEAR_PART;
break;
case 'Y':
// Year, numeric, four digits
@@ -2229,7 +2253,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
year = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= YEAR_PART;
break;
// Month
case 'm':
@@ -2240,7 +2264,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
month = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= MONTH_PART;
break;
case 'M':
int_value = check_word(const_cast<const char**>(s_month_name),
val, val_end, &val);
@@ -2248,6 +2272,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
return false;
}
month = int_value;
+ part_used |= MONTH_PART;
break;
case 'b':
int_value = check_word(s_ab_month_name, val, val_end, &val);
@@ -2255,6 +2280,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
return false;
}
month = int_value;
+ part_used |= MONTH_PART;
break;
// Day
case 'd':
@@ -2265,7 +2291,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
day = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= DAY_PART;
break;
case 'D':
tmp = val + min(2, val_end - val);
@@ -2274,13 +2300,14 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
day = int_value;
val = tmp + min(2, val_end - tmp);
- date_part_used = true;
+ part_used |= DAY_PART;
break;
// Hour
case 'h':
case 'I':
case 'l':
- usa_time = true;
+ hour_system_12 = true;
+ part_used |= HOUR_PART;
// Fall through
case 'k':
case 'H':
@@ -2290,7 +2317,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
hour = int_value;
val = tmp;
- time_part_used = true;
+ part_used |= HOUR_PART;
break;
// Minute
case 'i':
@@ -2300,7 +2327,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
minute = int_value;
val = tmp;
- time_part_used = true;
+ part_used |= MINUTE_PART;
break;
// Second
case 's':
@@ -2311,7 +2338,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
second = int_value;
val = tmp;
- time_part_used = true;
+ part_used |= SECOND_PART;
break;
// Micro second
case 'f':
@@ -2333,21 +2360,19 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
if constexpr (is_datetime) {
microsecond = int_value * int_exp10(6 - min(6, tmp - val));
- frac_part_used = true;
+ part_used |= FRAC_PART;
}
val = tmp;
- time_part_used = true;
break;
// AM/PM
case 'p':
- if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' ||
!usa_time) {
+ if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' ||
!hour_system_12) {
return false;
}
if (toupper(*val) == 'P') {
// PM
- day_part = 12;
+ half_day = 12;
}
- time_part_used = true;
val += 2;
break;
// Weekday
@@ -2358,7 +2383,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
int_value++;
weekday = int_value;
- date_part_used = true;
+ part_used |= WEEKDAY_PART;
break;
case 'a':
int_value = check_word(s_ab_day_name, val, val_end, &val);
@@ -2367,7 +2392,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
int_value++;
weekday = int_value;
- date_part_used = true;
+ part_used |= WEEKDAY_PART;
break;
case 'w':
tmp = val + min(1, val_end - val);
@@ -2382,7 +2407,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
weekday = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= WEEKDAY_PART;
break;
case 'j':
tmp = val + min(3, val_end - val);
@@ -2391,7 +2416,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
yearday = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= YEARDAY_PART;
break;
case 'u':
case 'v':
@@ -2409,7 +2434,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
return false;
}
val = tmp;
- date_part_used = true;
+ part_used |= WEEK_NUM_PART;
break;
// strict week number, must be used with %V or %v
case 'x':
@@ -2421,7 +2446,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
strict_week_number_year = int_value;
val = tmp;
- date_part_used = true;
+ part_used |= WEEK_NUM_PART;
break;
case 'r': {
if constexpr (is_datetime) {
@@ -2434,7 +2459,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
this->date_v2_value_.minute_ = tmp_val.minute();
this->date_v2_value_.second_ = tmp_val.second();
val = tmp;
- time_part_used = true;
+ part_used |= TIME_PART;
already_set_time_part = true;
break;
} else {
@@ -2450,7 +2475,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
this->date_v2_value_.hour_ = tmp_val.hour();
this->date_v2_value_.minute_ = tmp_val.minute();
this->date_v2_value_.second_ = tmp_val.second();
- time_part_used = true;
+ part_used |= TIME_PART;
already_set_time_part = true;
val = tmp;
break;
@@ -2493,12 +2518,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
}
- // ptr == format means input value string is "", not do parse format
failed here
- if (ptr == format) {
- return false;
- }
- // continue to iterate pattern if has
- // to find out if it has time part.
+ // for compatible with mysql, like something have %H:%i:%s format but no
relative content...
while (ptr < end) {
if (*ptr == '%' && ptr + 1 < end) {
ptr++;
@@ -2515,7 +2535,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
case 'S':
case 'p':
case 'T':
- time_part_used = true;
+ part_used |= TIME_PART;
break;
default:
break;
@@ -2525,25 +2545,27 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
}
- if (usa_time) {
+ if (!part_used) {
+ return false;
+ }
+
+ if (hour_system_12) {
if (hour > 12 || hour < 1) {
return false;
}
- hour = (hour % 12) + day_part;
+ hour = (hour % 12) + half_day;
}
if (sub_val_end) {
*sub_val_end = val;
}
// Compute timestamp type
- if (frac_part_used) {
+ if (part_used & FRAC_PART) {
if constexpr (!is_datetime) {
- LOG(WARNING) << "Microsecond is not allowed for date type!";
return false;
}
- } else if (time_part_used) {
+ } else if (part_used & TIME_PART) {
if constexpr (!is_datetime) {
- LOG(WARNING) << "Time part is not allowed for date type!";
return false;
}
}
@@ -2582,7 +2604,9 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
// so we only need to set date part
// 3. if both are true, means all part of date_time be set, no need
check_range_and_set_time
bool already_set_date_part = yearday > 0 || (week_num >= 0 && weekday > 0);
- if (already_set_date_part && already_set_time_part) return true;
+ if (already_set_date_part && already_set_time_part) {
+ return true;
+ }
if (already_set_date_part) {
if constexpr (is_datetime) {
return check_range_and_set_time(date_v2_value_.year_,
date_v2_value_.month_,
@@ -2592,13 +2616,13 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
date_v2_value_.day_, 0, 0, 0, 0);
}
}
-
- // for two special date cases, complete default month/day
- if (!time_part_used && year > 0) {
- if (std::string_view {format, end} == "%Y") {
- month = day = 1;
- } else if (std::string_view {format, end} == "%Y-%m") {
+ // complete default month/day
+ if (!(part_used & ~NORMAL_DATE_PART)) { // Ymd part only
+ if (!(part_used & DAY_PART)) {
day = 1;
+ if (!(part_used & MONTH_PART)) {
+ month = 1;
+ }
}
}
@@ -2613,7 +2637,7 @@ bool DateV2Value<T>::from_date_format_str(const char*
format, int format_len, co
}
if constexpr (is_datetime) {
return check_range_and_set_time(year, month, day, hour, minute,
second, microsecond,
- time_part_used && !date_part_used);
+ !(part_used & ~TIME_PART));
} else {
return check_range_and_set_time(year, month, day, 0, 0, 0, 0);
}
diff --git a/be/src/vec/runtime/vdatetime_value.h
b/be/src/vec/runtime/vdatetime_value.h
index ad3619aba53..7b138b2bc3a 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -206,7 +206,7 @@ static constexpr uint32_t DATEV2_YEAR_WIDTH = 23;
static constexpr uint32_t DATETIMEV2_YEAR_WIDTH = 18;
static constexpr uint32_t DATETIMEV2_MONTH_WIDTH = 4;
-static RE2 time_zone_offset_format_reg("^[+-]{1}\\d{2}\\:\\d{2}$");
+static RE2 time_zone_offset_format_reg(R"(^[+-]{1}\d{2}\:\d{2}$)");
uint8_t mysql_week_mode(uint32_t mode);
diff --git a/be/test/vec/function/function_array_index_test.cpp
b/be/test/vec/function/function_array_index_test.cpp
index 7f496dc1cce..92c7901bbfe 100644
--- a/be/test/vec/function/function_array_index_test.cpp
+++ b/be/test/vec/function/function_array_index_test.cpp
@@ -114,10 +114,8 @@ TEST(function_array_index_test, array_contains) {
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date,
TypeIndex::Date};
- Array vec = {str_to_date_time("2022-01-02", false),
str_to_date_time("", false),
- str_to_date_time("2022-07-08", false)};
+ Array vec = {str_to_date_time("2022-01-02", false),
str_to_date_time("2022-07-08", false)};
DataSet data_set = {{{vec, std::string("2022-01-02")}, UInt8(1)},
- {{vec, std::string("")}, UInt8(1)},
{{vec, std::string("2022-01-03")}, UInt8(0)},
{{Null(), std::string("2022-01-04")}, Null()},
{{empty_arr, std::string("2022-01-02")},
UInt8(0)}};
@@ -129,10 +127,9 @@ TEST(function_array_index_test, array_contains) {
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime,
TypeIndex::DateTime};
- Array vec = {str_to_date_time("2022-01-02 00:00:00"),
str_to_date_time(""),
+ Array vec = {str_to_date_time("2022-01-02 00:00:00"),
str_to_date_time("2022-07-08 00:00:00")};
DataSet data_set = {{{vec, std::string("2022-01-02 00:00:00")},
UInt8(1)},
- {{vec, std::string("")}, UInt8(1)},
{{vec, std::string("2022-01-03 00:00:00")},
UInt8(0)},
{{Null(), std::string("2022-01-04 00:00:00")},
Null()},
{{empty_arr, std::string("2022-01-02 00:00:00")},
UInt8(0)}};
@@ -217,10 +214,8 @@ TEST(function_array_index_test, array_position) {
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date,
TypeIndex::Date};
- Array vec = {str_to_date_time("2022-01-02", false),
str_to_date_time("", false),
- str_to_date_time("2022-07-08", false)};
+ Array vec = {str_to_date_time("2022-01-02", false),
str_to_date_time("2022-07-08", false)};
DataSet data_set = {{{vec, std::string("2022-01-02")}, Int64(1)},
- {{vec, std::string("")}, Int64(2)},
{{vec, std::string("2022-01-03")}, Int64(0)},
{{Null(), std::string("2022-01-04")}, Null()},
{{empty_arr, std::string("2022-01-02")},
Int64(0)}};
@@ -232,10 +227,9 @@ TEST(function_array_index_test, array_position) {
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::DateTime,
TypeIndex::DateTime};
- Array vec = {str_to_date_time("2022-01-02 00:00:00"),
str_to_date_time(""),
+ Array vec = {str_to_date_time("2022-01-02 00:00:00"),
str_to_date_time("2022-07-08 00:00:00")};
DataSet data_set = {{{vec, std::string("2022-01-02 00:00:00")},
Int64(1)},
- {{vec, std::string("")}, Int64(2)},
{{vec, std::string("2022-01-03 00:00:00")},
Int64(0)},
{{Null(), std::string("2022-01-04 00:00:00")},
Null()},
{{empty_arr, std::string("2022-01-02 00:00:00")},
Int64(0)}};
diff --git a/be/test/vec/function/function_test_util.h
b/be/test/vec/function/function_test_util.h
index f488690f51d..44cb954e3c3 100644
--- a/be/test/vec/function/function_test_util.h
+++ b/be/test/vec/function/function_test_util.h
@@ -52,18 +52,13 @@
#include "vec/data_types/data_type_number.h"
#include "vec/functions/simple_function_factory.h"
-namespace doris {
-namespace vectorized {
+namespace doris::vectorized {
+
class DataTypeJsonb;
class DataTypeTime;
class TableFunction;
template <typename T>
class DataTypeDecimal;
-} // namespace vectorized
-} // namespace doris
-
-namespace doris::vectorized {
-
using InputDataSet = std::vector<std::vector<AnyType>>; // without result
using CellSet = std::vector<AnyType>;
using Expect = AnyType;
@@ -71,6 +66,7 @@ using Row = std::pair<CellSet, Expect>;
using DataSet = std::vector<Row>;
using InputTypeSet = std::vector<AnyType>;
+// FIXME: should use exception or expected to deal null value.w
int64_t str_to_date_time(std::string datetime_str, bool data_time = true);
uint32_t str_to_date_v2(std::string datetime_str, std::string datetime_format);
uint64_t str_to_datetime_v2(std::string datetime_str, std::string
datetime_format);
@@ -300,7 +296,7 @@ Status check_function(const std::string& func_name, const
InputTypeSet& input_ty
if constexpr (std::is_same_v<ReturnType, DataTypeJsonb>) {
const auto& expect_data = any_cast<String>(data_set[i].second);
auto s = column->get_data_at(i);
- if (expect_data.size() == 0) {
+ if (expect_data.empty()) {
// zero size result means invalid
EXPECT_EQ(0, s.size) << " invalid result size should be 0
at row " << i;
} else {
@@ -321,7 +317,8 @@ Status check_function(const std::string& func_name, const
InputTypeSet& input_ty
} else if constexpr (std::is_same_v<ReturnType,
DataTypeBitMap>) {
const ColumnBitmap* bitmap_col = nullptr;
if constexpr (nullable) {
- auto nullable_column = assert_cast<const
ColumnNullable*>(column.get());
+ const auto* nullable_column =
+ assert_cast<const
ColumnNullable*>(column.get());
bitmap_col = assert_cast<const ColumnBitmap*>(
nullable_column->get_nested_column_ptr().get());
} else {
@@ -344,7 +341,9 @@ Status check_function(const std::string& func_name, const
InputTypeSet& input_ty
if constexpr (nullable) {
bool is_null = data_set[i].second.type() == &typeid(Null);
EXPECT_EQ(is_null, column->is_null_at(i)) << " at row " << i;
- if (!is_null) check_column_data();
+ if (!is_null) {
+ check_column_data();
+ }
} else {
check_column_data();
}
diff --git
a/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md
b/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md
index ab86ec15c2b..54f2acd7597 100644
--- a/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md
+++ b/docs/en/docs/sql-manual/sql-functions/date-time-functions/date-format.md
@@ -70,23 +70,23 @@ The formats available are:
% m | month, numerical value (00-12)
-%p | AM or PM
+% p | AM or PM, only available on 12-hours system
-% R | Time, 12 - hour (hh: mm: SS AM or PM)
+% R | Time, 12-hour (hh:mm:ss), could be with or without AM/PM marking
% S | seconds (00-59)
% s | seconds (00-59)
-% T | Time, 24 - hour (hh: mm: ss)
+% T | Time, 24-hour (hh:mm:ss)
% U | Week (00-53) Sunday is the first day of the week
-% U | Week (00 - 53) Monday is the first day of the week
+% U | Week (00-53) Monday is the first day of the week
% V | Week (01-53) Sunday is the first day of the week, and% X is used.
-% v | Week (01 - 53) Monday is the first day of the week, and% x is used
+% v | Week (01-53) Monday is the first day of the week, and% x is used
% W | Sunday
diff --git
a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md
b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md
index 69a6315d729..a5fb25f2986 100644
---
a/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md
+++
b/docs/zh-CN/docs/sql-manual/sql-functions/date-time-functions/date-format.md
@@ -70,9 +70,9 @@ date 参数是合法的日期。format 规定日期/时间的输出格式。
%m | 月,数值(00-12)
-%p | AM 或 PM
+%p | AM 或 PM,仅在采用12小时制时可用。
-%r | 时间,12-小时(hh:mm:ss AM 或 PM)
+%r | 时间,12-小时(hh:mm:ss),可以包含或不包含AM/PM。
%S | 秒(00-59)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
index 2f7f183b1ad..37171b70fac 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
@@ -1203,16 +1203,28 @@ public class DateLiteral extends LiteralExpr {
// this method is exaclty same as from_date_format_str() in
be/src/runtime/datetime_value.cpp
// change this method should also change that.
public int fromDateFormatStr(String format, String value, boolean
hasSubVal) throws InvalidFormatException {
- int fp = 0; // pointer to the current format string
- int fend = format.length(); // end of format string
- int vp = 0; // pointer to the date string value
- int vend = value.length(); // end of date string value
-
- boolean datePartUsed = false;
- boolean timePartUsed = false;
- boolean microSecondPartUsed = false;
-
- int dayPart = 0;
+ int pFormat = 0; // pointer to the current format string
+ int endFormat = format.length(); // end of format string
+ int pValue = 0; // pointer to the date string value
+ int endValue = value.length(); // end of date string value
+
+ int partUsed = 0;
+ final int yearPart = 1 << 0;
+ final int monthPart = 1 << 1;
+ final int dayPart = 1 << 2;
+ final int weekdayPart = 1 << 3;
+ final int yeardayPart = 1 << 4;
+ final int weekNumPart = 1 << 5;
+ final int normalDatePart = yearPart | monthPart | dayPart;
+ final int specialDatePart = weekdayPart | yeardayPart | weekNumPart;
+ final int datePart = normalDatePart | specialDatePart;
+ final int hourPart = 1 << 6;
+ final int minutePart = 1 << 7;
+ final int secondPart = 1 << 8;
+ final int fracPart = 1 << 9;
+ final int timePart = hourPart | minutePart | secondPart | fracPart;
+
+ int halfDay = 0; // 0 for am/none, 12 for pm.
long weekday = -1;
long yearday = -1;
long weekNum = -1;
@@ -1221,169 +1233,170 @@ public class DateLiteral extends LiteralExpr {
boolean sundayFirst = false;
boolean strictWeekNumberYearType = false;
long strictWeekNumberYear = -1;
- boolean usaTime = false;
+ boolean hourSystem12 = false; // hour in [0..12] and with am/pm
- char f;
- while (fp < fend && vp < vend) {
+ char now;
+ while (pFormat < endFormat && pValue < endValue) {
// Skip space character
- while (vp < vend && Character.isSpaceChar(value.charAt(vp))) {
- vp++;
+ while (pValue < endValue &&
Character.isSpaceChar(value.charAt(pValue))) {
+ pValue++;
}
- if (vp >= vend) {
+ if (pValue >= endValue) {
break;
}
// Check switch
- f = format.charAt(fp);
- if (f == '%' && fp + 1 < fend) {
+ now = format.charAt(pFormat);
+ if (now == '%' && pFormat + 1 < endFormat) {
int tmp = 0;
long intValue = 0;
- fp++;
- f = format.charAt(fp);
- fp++;
- switch (f) {
+ pFormat++;
+ now = format.charAt(pFormat);
+ pFormat++;
+ switch (now) {
// Year
case 'y':
// Year, numeric (two digits)
- tmp = vp + Math.min(2, vend - vp);
- intValue = strToLong(value.substring(vp, tmp));
+ tmp = pValue + Math.min(2, endValue - pValue);
+ intValue = strToLong(value.substring(pValue, tmp));
intValue += intValue >= 70 ? 1900 : 2000;
this.year = intValue;
- vp = tmp;
- datePartUsed = true;
+ pValue = tmp;
+ partUsed |= yearPart;
break;
case 'Y':
// Year, numeric, four digits
- tmp = vp + Math.min(4, vend - vp);
- intValue = strToLong(value.substring(vp, tmp));
- if (tmp - vp <= 2) {
+ tmp = pValue + Math.min(4, endValue - pValue);
+ intValue = strToLong(value.substring(pValue, tmp));
+ if (tmp - pValue <= 2) {
intValue += intValue >= 70 ? 1900 : 2000;
}
this.year = intValue;
- vp = tmp;
- datePartUsed = true;
+ pValue = tmp;
+ partUsed |= yearPart;
break;
// Month
case 'm':
case 'c':
- tmp = vp + Math.min(2, vend - vp);
- intValue = strToLong(value.substring(vp, tmp));
+ tmp = pValue + Math.min(2, endValue - pValue);
+ intValue = strToLong(value.substring(pValue, tmp));
this.month = intValue;
- vp = tmp;
- datePartUsed = true;
+ pValue = tmp;
+ partUsed |= monthPart;
break;
case 'M': {
- int nextPos = findWord(value, vp);
- intValue = checkWord(MONTH_NAME_DICT,
value.substring(vp, nextPos));
+ int nextPos = findWord(value, pValue);
+ intValue = checkWord(MONTH_NAME_DICT,
value.substring(pValue, nextPos));
this.month = intValue;
- vp = nextPos;
+ pValue = nextPos;
+ partUsed |= monthPart;
break;
}
case 'b': {
- int nextPos = findWord(value, vp);
- intValue = checkWord(MONTH_ABBR_NAME_DICT,
value.substring(vp, nextPos));
+ int nextPos = findWord(value, pValue);
+ intValue = checkWord(MONTH_ABBR_NAME_DICT,
value.substring(pValue, nextPos));
this.month = intValue;
- vp = nextPos;
+ pValue = nextPos;
+ partUsed |= monthPart;
break;
}
// Day
case 'd':
case 'e':
- tmp = vp + Math.min(2, vend - vp);
- intValue = strToLong(value.substring(vp, tmp));
+ tmp = pValue + Math.min(2, endValue - pValue);
+ intValue = strToLong(value.substring(pValue, tmp));
this.day = intValue;
- vp = tmp;
- datePartUsed = true;
+ pValue = tmp;
+ partUsed |= dayPart;
break;
case 'D':
- tmp = vp + Math.min(2, vend - vp);
- intValue = strToLong(value.substring(vp, tmp));
+ tmp = pValue + Math.min(2, endValue - pValue);
+ intValue = strToLong(value.substring(pValue, tmp));
this.day = intValue;
- vp = tmp + Math.min(2, vend - tmp);
- datePartUsed = true;
+ pValue = tmp + Math.min(2, endValue - tmp);
+ partUsed |= dayPart;
break;
// Hour
case 'h':
case 'I':
case 'l':
- usaTime = true;
+ hourSystem12 = true;
+ partUsed |= hourPart;
case 'k': // CHECKSTYLE IGNORE THIS LINE: Fall through
case 'H':
- tmp = findNumber(value, vp, 2);
- intValue = strToLong(value.substring(vp, tmp));
+ tmp = findNumber(value, pValue, 2);
+ intValue = strToLong(value.substring(pValue, tmp));
this.hour = intValue;
- vp = tmp;
- timePartUsed = true;
+ pValue = tmp;
+ partUsed |= hourPart;
break;
// Minute
case 'i':
- tmp = vp + Math.min(2, vend - vp);
- intValue = strToLong(value.substring(vp, tmp));
+ tmp = pValue + Math.min(2, endValue - pValue);
+ intValue = strToLong(value.substring(pValue, tmp));
this.minute = intValue;
- vp = tmp;
- timePartUsed = true;
+ pValue = tmp;
+ partUsed |= minutePart;
break;
// Second
case 's':
case 'S':
- tmp = vp + Math.min(2, vend - vp);
- intValue = strToLong(value.substring(vp, tmp));
+ tmp = pValue + Math.min(2, endValue - pValue);
+ intValue = strToLong(value.substring(pValue, tmp));
this.second = intValue;
- vp = tmp;
- timePartUsed = true;
+ pValue = tmp;
+ partUsed |= secondPart;
break;
// Micro second
case 'f':
- // FIXME: fix same with BE
- tmp = vp;
+ tmp = pValue;
// when there's still something to the end, fix the
scale of ms.
- while (tmp < vend &&
Character.isDigit(value.charAt(tmp))) {
+ while (tmp < endValue &&
Character.isDigit(value.charAt(tmp))) {
tmp += 1;
}
- if (tmp - vp > 6) {
- int tmp2 = vp + 6;
- intValue = strToLong(value.substring(vp, tmp2));
+ if (tmp - pValue > 6) {
+ int tmp2 = pValue + 6;
+ intValue = strToLong(value.substring(pValue,
tmp2));
} else {
- intValue = strToLong(value.substring(vp, tmp));
+ intValue = strToLong(value.substring(pValue, tmp));
}
- this.microsecond = (long) (intValue * Math.pow(10, 6 -
Math.min(6, tmp - vp)));
- timePartUsed = true;
- microSecondPartUsed = true;
- vp = tmp;
+ this.microsecond = (long) (intValue * Math.pow(10, 6 -
Math.min(6, tmp - pValue)));
+ partUsed |= fracPart;
+ pValue = tmp;
break;
// AM/PM
case 'p':
- if ((vend - vp) < 2 ||
Character.toUpperCase(value.charAt(vp + 1)) != 'M' || !usaTime) {
+ if ((endValue - pValue) < 2 ||
Character.toUpperCase(value.charAt(pValue + 1)) != 'M'
+ || !hourSystem12) {
throw new InvalidFormatException("Invalid %p
format");
}
- if (Character.toUpperCase(value.charAt(vp)) == 'P') {
+ if (Character.toUpperCase(value.charAt(pValue)) ==
'P') {
// PM
- dayPart = 12;
+ halfDay = 12;
}
- timePartUsed = true;
- vp += 2;
+ pValue += 2;
break;
// Weekday
case 'W': {
- int nextPos = findWord(value, vp);
- intValue = checkWord(WEEK_DAY_NAME_DICT,
value.substring(vp, nextPos));
+ int nextPos = findWord(value, pValue);
+ intValue = checkWord(WEEK_DAY_NAME_DICT,
value.substring(pValue, nextPos));
intValue++;
weekday = intValue;
- datePartUsed = true;
+ partUsed |= weekdayPart;
break;
}
case 'a': {
- int nextPos = findWord(value, vp);
- intValue = checkWord(WEEK_DAY_NAME_DICT,
value.substring(vp, nextPos));
+ int nextPos = findWord(value, pValue);
+ intValue = checkWord(WEEK_DAY_NAME_DICT,
value.substring(pValue, nextPos));
intValue++;
weekday = intValue;
- datePartUsed = true;
+ partUsed |= weekdayPart;
break;
}
case 'w':
- tmp = vp + Math.min(1, vend - vp);
- intValue = strToLong(value.substring(vp, tmp));
+ tmp = pValue + Math.min(1, endValue - pValue);
+ intValue = strToLong(value.substring(pValue, tmp));
if (intValue >= 7) {
throw new InvalidFormatException("invalid day of
week: " + intValue);
}
@@ -1391,97 +1404,97 @@ public class DateLiteral extends LiteralExpr {
intValue = 7;
}
weekday = intValue;
- vp = tmp;
- datePartUsed = true;
+ pValue = tmp;
+ partUsed |= weekdayPart;
break;
case 'j':
- tmp = vp + Math.min(3, vend - vp);
- intValue = strToLong(value.substring(vp, tmp));
+ tmp = pValue + Math.min(3, endValue - pValue);
+ intValue = strToLong(value.substring(pValue, tmp));
yearday = intValue;
- vp = tmp;
- datePartUsed = true;
+ pValue = tmp;
+ partUsed |= yeardayPart;
break;
case 'u':
case 'v':
case 'U':
case 'V':
- sundayFirst = (format.charAt(fp - 1) == 'U' ||
format.charAt(fp - 1) == 'V');
+ sundayFirst = (format.charAt(pFormat - 1) == 'U' ||
format.charAt(pFormat - 1) == 'V');
// Used to check if there is %x or %X
- strictWeekNumber = (format.charAt(fp - 1) == 'V' ||
format.charAt(fp - 1) == 'v');
- tmp = vp + Math.min(2, vend - vp);
- intValue = Long.valueOf(value.substring(vp, tmp));
+ strictWeekNumber = (format.charAt(pFormat - 1) == 'V'
|| format.charAt(pFormat - 1) == 'v');
+ tmp = pValue + Math.min(2, endValue - pValue);
+ intValue = Long.valueOf(value.substring(pValue, tmp));
weekNum = intValue;
if (weekNum > 53 || (strictWeekNumber && weekNum ==
0)) {
throw new InvalidFormatException("invalid num of
week: " + weekNum);
}
- vp = tmp;
- datePartUsed = true;
+ pValue = tmp;
+ partUsed |= weekNumPart;
break;
// strict week number, must be used with %V or %v
case 'x':
case 'X':
- strictWeekNumberYearType = (format.charAt(fp - 1) ==
'X');
- tmp = vp + Math.min(4, vend - vp);
- intValue = Long.valueOf(value.substring(vp, tmp));
+ strictWeekNumberYearType = (format.charAt(pFormat - 1)
== 'X');
+ tmp = pValue + Math.min(4, endValue - pValue);
+ intValue = Long.valueOf(value.substring(pValue, tmp));
strictWeekNumberYear = intValue;
- vp = tmp;
- datePartUsed = true;
+ pValue = tmp;
+ partUsed |= weekNumPart;
break;
case 'r':
- tmp = fromDateFormatStr("%I:%i:%S %p",
value.substring(vp, vend), true);
- vp = tmp;
- timePartUsed = true;
+ tmp = fromDateFormatStr("%I:%i:%S %p",
value.substring(pValue, endValue), true);
+ pValue = tmp;
+ partUsed |= timePart;
break;
case 'T':
- tmp = fromDateFormatStr("%H:%i:%S",
value.substring(vp, vend), true);
- vp = tmp;
- timePartUsed = true;
+ tmp = fromDateFormatStr("%H:%i:%S",
value.substring(pValue, endValue), true);
+ pValue = tmp;
+ partUsed |= timePart;
break;
case '.':
- while (vp < vend &&
Character.toString(value.charAt(vp)).matches("\\p{Punct}")) {
- vp++;
+ while (pValue < endValue &&
Character.toString(value.charAt(pValue)).matches("\\p{Punct}")) {
+ pValue++;
}
break;
case '@':
- while (vp < vend &&
Character.isLetter(value.charAt(vp))) {
- vp++;
+ while (pValue < endValue &&
Character.isLetter(value.charAt(pValue))) {
+ pValue++;
}
break;
case '#':
- while (vp < vend &&
Character.isDigit(value.charAt(vp))) {
- vp++;
+ while (pValue < endValue &&
Character.isDigit(value.charAt(pValue))) {
+ pValue++;
}
break;
case '%': // %%, escape the %
- if ('%' != value.charAt(vp)) {
- throw new InvalidFormatException("invalid char
after %: " + value.charAt(vp));
+ if ('%' != value.charAt(pValue)) {
+ throw new InvalidFormatException("invalid char
after %: " + value.charAt(pValue));
}
- vp++;
+ pValue++;
break;
default:
- throw new InvalidFormatException("Invalid format
pattern: " + f);
+ throw new InvalidFormatException("Invalid format
pattern: " + now);
}
- } else if (format.charAt(fp) != ' ') {
- if (format.charAt(fp) != value.charAt(vp)) {
- throw new InvalidFormatException("Invalid char: " +
value.charAt(vp) + ", expected: "
- + format.charAt(fp));
+ } else if (format.charAt(pFormat) != ' ') {
+ if (format.charAt(pFormat) != value.charAt(pValue)) {
+ throw new InvalidFormatException("Invalid char: " +
value.charAt(pValue) + ", expected: "
+ + format.charAt(pFormat));
}
- fp++;
- vp++;
+ pFormat++;
+ pValue++;
} else {
- fp++;
+ pFormat++;
}
}
// continue to iterate pattern if has
// to find out if it has time part.
- while (fp < fend) {
- f = format.charAt(fp);
- if (f == '%' && fp + 1 < fend) {
- fp++;
- f = format.charAt(fp);
- fp++;
- switch (f) {
+ while (pFormat < endFormat) {
+ now = format.charAt(pFormat);
+ if (now == '%' && pFormat + 1 < endFormat) {
+ pFormat++;
+ now = format.charAt(pFormat);
+ pFormat++;
+ switch (now) {
case 'H':
case 'h':
case 'I':
@@ -1493,25 +1506,29 @@ public class DateLiteral extends LiteralExpr {
case 'S':
case 'p':
case 'T':
- timePartUsed = true;
+ partUsed |= timePart;
break;
default:
break;
}
} else {
- fp++;
+ pFormat++;
}
}
- if (usaTime) {
+ if (partUsed == 0) {
+ throw new InvalidFormatException("Nothing for legal Date: " +
value);
+ }
+
+ if (hourSystem12) {
if (this.hour > 12 || this.hour < 1) {
throw new InvalidFormatException("Invalid hour: " + hour);
}
- this.hour = (this.hour % 12) + dayPart;
+ this.hour = (this.hour % 12) + halfDay;
}
if (hasSubVal) {
- return vp;
+ return pValue;
}
// Year day
@@ -1540,21 +1557,21 @@ public class DateLiteral extends LiteralExpr {
getDateFromDaynr(days);
}
- if (!timePartUsed && year > 0) {
- if (format.equals("%Y")) {
- month = day = 1;
- } else if (format.equals("%Y-%m")) {
+ // complete default month/day
+ if ((partUsed & ~normalDatePart) == 0) { // only date here
+ if ((partUsed & dayPart) == 0) {
day = 1;
+ if ((partUsed & monthPart) == 0) {
+ month = 1;
+ }
}
}
// Compute timestamp type
- // TODO(Gabriel): we still use old version datetime/date and change
this to new version when
- // we think it's stable enough
- if (datePartUsed) {
- if (microSecondPartUsed) {
+ if ((partUsed & datePart) != 0) { // Ymd part only
+ if ((partUsed & fracPart) != 0) {
this.type = Type.DATETIMEV2_WITH_MAX_SCALAR;
- } else if (timePartUsed) {
+ } else if ((partUsed & timePart) != 0) {
this.type = ScalarType.getDefaultDateType(Type.DATETIME);
} else {
this.type = ScalarType.getDefaultDateType(Type.DATE);
diff --git a/regression-test/data/correctness/test_str_to_date.out
b/regression-test/data/correctness/test_str_to_date.out
index 5aa1f75a2cc..633b590ad98 100644
--- a/regression-test/data/correctness/test_str_to_date.out
+++ b/regression-test/data/correctness/test_str_to_date.out
@@ -13,15 +13,18 @@
-- !select4 --
2020-12-03T11:45:14
--- !add_1 --
+-- !short_nereids_1 --
2023-01-01
--- !add_2 --
+-- !short_nereids_2 --
2023-12-01
--- !add_3 --
+-- !short_nereids_3 --
2023-01-01
+-- !short_nereids_4 --
+2020-02-01
+
-- !select5 --
2019-12-01 yyyy-MM-dd 2019-12-01T00:00
20201203 yyyyMMdd 2020-12-03T00:00
@@ -36,12 +39,15 @@
-- !select8 --
2020-12-03T11:45:14
--- !add_4 --
+-- !short_legacy_1 --
2023-01-01
--- !add_5 --
+-- !short_legacy_2 --
2023-12-01
--- !add_6 --
+-- !short_legacy_3 --
2023-01-01
+-- !short_legacy_4 --
+2020-02-01
+
diff --git a/regression-test/data/datatype_p0/date/test_invalid_date.out
b/regression-test/data/datatype_p0/date/test_invalid_date.out
deleted file mode 100644
index 80b3c3963d3..00000000000
--- a/regression-test/data/datatype_p0/date/test_invalid_date.out
+++ /dev/null
@@ -1,7 +0,0 @@
--- This file is automatically generated. You should know what you did if you
want to edit this
--- !sql1 --
-\N
-
--- !sql2 --
-\N
-
diff --git a/regression-test/suites/correctness/test_str_to_date.groovy
b/regression-test/suites/correctness/test_str_to_date.groovy
index 43c80d4f4ba..300f9cb9216 100644
--- a/regression-test/suites/correctness/test_str_to_date.groovy
+++ b/regression-test/suites/correctness/test_str_to_date.groovy
@@ -49,9 +49,10 @@ sql """ set enable_nereids_planner=true ,
enable_fallback_to_original_planner=f
qt_select4 """
SELECT STR_TO_DATE('2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss');
"""
- qt_add_1 " select STR_TO_DATE('2023', '%Y') "
- qt_add_2 " select STR_TO_DATE('2023-12', '%Y-%m') "
- qt_add_3 " select STR_TO_DATE('2023-12', '%Y')"
+ qt_short_nereids_1 " select STR_TO_DATE('2023', '%Y') "
+ qt_short_nereids_2 " select STR_TO_DATE('2023-12', '%Y-%m') "
+ qt_short_nereids_3 " select STR_TO_DATE('2023-12', '%Y')"
+ qt_short_nereids_4 " select STR_TO_DATE('2020%2', '%Y%%%m')"
sql """ set enable_nereids_planner=false;"""
@@ -67,7 +68,8 @@ sql """ set enable_nereids_planner=false;"""
qt_select8 """
SELECT STR_TO_DATE('2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss');
"""
- qt_add_4 " select STR_TO_DATE('2023', '%Y') "
- qt_add_5 " select STR_TO_DATE('2023-12', '%Y-%m') "
- qt_add_6 " select STR_TO_DATE('2023-12', '%Y')"
+ qt_short_legacy_1 " select STR_TO_DATE('2023', '%Y') "
+ qt_short_legacy_2 " select STR_TO_DATE('2023-12', '%Y-%m') "
+ qt_short_legacy_3 " select STR_TO_DATE('2023-12', '%Y')"
+ qt_short_legacy_4 " select STR_TO_DATE('2020%2', '%Y%%%m')"
}
diff --git a/regression-test/suites/datatype_p0/date/test_invalid_date.groovy
b/regression-test/suites/datatype_p0/date/test_invalid_date.groovy
deleted file mode 100644
index 5a7af470357..00000000000
--- a/regression-test/suites/datatype_p0/date/test_invalid_date.groovy
+++ /dev/null
@@ -1,37 +0,0 @@
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-suite("test_invalid_date") {
- def tbName = "test_invalid_date"
- sql "DROP TABLE IF EXISTS ${tbName}"
- sql """
- CREATE TABLE IF NOT EXISTS ${tbName} (
- c0 int,
- c1 char(10),
- c2 datev1,
- c3 datev2
- )
- UNIQUE KEY(c0)
- DISTRIBUTED BY HASH(c0) BUCKETS 5 properties("replication_num" =
"1");
- """
- sql "insert into ${tbName} values(1, 'test1', '2000-01-01', '2000-01-01')"
-
- qt_sql1 "select str_to_date('202301', '%Y%m');"
- qt_sql2 "select str_to_date('202301', '%Y%m') from ${tbName}"
- sql "DROP TABLE ${tbName}"
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]