This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 22723d0f2 IMPALA-7086: Cache timezone in *_utc_timestamp()
22723d0f2 is described below
commit 22723d0f276468a25553f007dc65b21d79bd821d
Author: Mihaly Szjatinya <[email protected]>
AuthorDate: Fri Sep 6 14:47:43 2024 +0200
IMPALA-7086: Cache timezone in *_utc_timestamp()
Added Prepare - Close routine around from/to_utc standard functions.
This gives a consistent time improvement for constant timezones.
Given sample table with 600M timestamp rows, on all-default
environment the query below gives a stable 2-3 seconds improvement.
SELECT count(*) FROM a_table
where from_utc_timestamp(ts, "a_timezone") > "a_date";
Averaged results for Release, SET MT_OP=1, SET DISABLE_CODEGEN=TRUE:
from_utc: 16,53s -> 12,53s
to_utc: 14,02 - > 11,53
Change-Id: Icdf5ff82c5d0554333aef1bc3bba034a4cf48230
Reviewed-on: http://gerrit.cloudera.org:8080/21735
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/exprs/timestamp-functions.cc | 61 +++++++++++++++++++---------
be/src/exprs/timestamp-functions.h | 6 +++
common/function-registry/impala_functions.py | 8 +++-
3 files changed, 54 insertions(+), 21 deletions(-)
diff --git a/be/src/exprs/timestamp-functions.cc
b/be/src/exprs/timestamp-functions.cc
index e9a1e60a3..96fcaa3e8 100644
--- a/be/src/exprs/timestamp-functions.cc
+++ b/be/src/exprs/timestamp-functions.cc
@@ -84,23 +84,47 @@ const map<string, int> TimestampFunctions::DAYNAME_MAP = {
{"sat", 6}, {"saturday", 6},
};
-TimestampVal TimestampFunctions::FromUtc(FunctionContext* context,
- const TimestampVal& ts_val, const StringVal& tz_string_val) {
- if (ts_val.is_null || tz_string_val.is_null) return TimestampVal::null();
- const TimestampValue ts_value = TimestampValue::FromTimestampVal(ts_val);
- if (UNLIKELY(!ts_value.HasDateAndTime())) return TimestampVal::null();
+void TimestampFunctions::FromUtcAndToUtcPrepare(FunctionContext* context,
+ FunctionContext::FunctionStateScope scope) {
+ if (scope != FunctionContext::FRAGMENT_LOCAL) return;
+ const Timezone* timezone = nullptr;
+ if (context->IsArgConstant(1)) {
+ StringVal tz_string_val =
*reinterpret_cast<StringVal*>(context->GetConstantArg(1));
+ const StringValue& tz_string_value =
StringValue::FromStringVal(tz_string_val);
+ timezone = TimezoneDatabase::FindTimezone(
+ string(tz_string_value.Ptr(), tz_string_value.Len()));
+ }
+ context->SetFunctionState(scope, (void *)(timezone));
+}
- const StringValue& tz_string_value =
StringValue::FromStringVal(tz_string_val);
- const Timezone* timezone = TimezoneDatabase::FindTimezone(
- string(tz_string_value.Ptr(), tz_string_value.Len()));
+const Timezone* GetTimezone(FunctionContext* context,
+ const StringValue& tz_string_value) {
+ void* state = context->GetFunctionState(FunctionContext::FRAGMENT_LOCAL);
+ const Timezone* timezone = reinterpret_cast<Timezone*>(state);
+ DCHECK(timezone == nullptr || context->IsArgConstant(1));
+ if (timezone == nullptr) {
+ timezone = TimezoneDatabase::FindTimezone(
+ string(tz_string_value.Ptr(), tz_string_value.Len()));
+ }
if (UNLIKELY(timezone == nullptr)) {
// Although this is an error, Hive ignores it. We will issue a warning but
otherwise
// ignore the error too.
stringstream ss;
ss << "Unknown timezone '" << tz_string_value << "'" << endl;
context->AddWarning(ss.str().c_str());
- return ts_val;
}
+ return timezone;
+}
+
+TimestampVal TimestampFunctions::FromUtc(FunctionContext* context,
+ const TimestampVal& ts_val, const StringVal& tz_string_val) {
+ if (ts_val.is_null || tz_string_val.is_null) return TimestampVal::null();
+ const TimestampValue ts_value = TimestampValue::FromTimestampVal(ts_val);
+ if (UNLIKELY(!ts_value.HasDateAndTime())) return TimestampVal::null();
+
+ const StringValue& tz_string_value =
StringValue::FromStringVal(tz_string_val);
+ const Timezone* timezone = GetTimezone(context, tz_string_value);
+ if (UNLIKELY(timezone == nullptr)) return ts_val;
TimestampValue ts_value_ret = ts_value;
ts_value_ret.UtcToLocal(*timezone);
@@ -124,16 +148,8 @@ TimestampVal TimestampFunctions::ToUtc(FunctionContext*
context,
if (!ts_value.HasDateAndTime()) return TimestampVal::null();
const StringValue& tz_string_value =
StringValue::FromStringVal(tz_string_val);
- const Timezone* timezone = TimezoneDatabase::FindTimezone(
- string(tz_string_value.Ptr(), tz_string_value.Len()));
- if (UNLIKELY(timezone == nullptr)) {
- // Although this is an error, Hive ignores it. We will issue a warning but
otherwise
- // ignore the error too.
- stringstream ss;
- ss << "Unknown timezone '" << tz_string_value << "'" << endl;
- context->AddWarning(ss.str().c_str());
- return ts_val;
- }
+ const Timezone* timezone = GetTimezone(context, tz_string_value);
+ if (UNLIKELY(timezone == nullptr)) return ts_val;
TimestampValue ts_value_ret = ts_value;
ts_value_ret.LocalToUtc(*timezone);
@@ -150,6 +166,13 @@ TimestampVal TimestampFunctions::ToUtc(FunctionContext*
context,
return ts_val_ret;
}
+void TimestampFunctions::FromUtcAndToUtcClose(FunctionContext* context,
+ FunctionContext::FunctionStateScope scope) {
+ if (scope == FunctionContext::FRAGMENT_LOCAL) {
+ context->SetFunctionState(scope, nullptr);
+ }
+}
+
TimestampVal TimestampFunctions::ToUtcUnambiguous(FunctionContext* context,
const TimestampVal& ts_val, const StringVal& tz_string_val,
const BooleanVal& expect_pre_bool_val) {
diff --git a/be/src/exprs/timestamp-functions.h
b/be/src/exprs/timestamp-functions.h
index 4c4dd914a..ac5805190 100644
--- a/be/src/exprs/timestamp-functions.h
+++ b/be/src/exprs/timestamp-functions.h
@@ -153,6 +153,12 @@ class TimestampFunctions {
static TimestampVal UnixMicrosToUtcTimestamp(FunctionContext* context,
const BigIntVal& unix_time_micros);
+ // Find and initialize timezone object if it's name is a constant.
+ static void FromUtcAndToUtcPrepare(FunctionContext* context,
+ FunctionContext::FunctionStateScope scope);
+ static void FromUtcAndToUtcClose(FunctionContext* context,
+ FunctionContext::FunctionStateScope scope);
+
/// Convert a timestamp to or from a particular timezone based time.
static TimestampVal FromUtc(FunctionContext* context,
const TimestampVal& ts_val, const StringVal& tz_string_val);
diff --git a/common/function-registry/impala_functions.py
b/common/function-registry/impala_functions.py
index 8f61b80bb..76a3dc4e1 100644
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -251,9 +251,13 @@ visible_functions = [
[['now', 'current_timestamp'], 'TIMESTAMP', [],
'_ZN6impala18TimestampFunctions3NowEPN10impala_udf15FunctionContextE'],
[['utc_timestamp'], 'TIMESTAMP', [],
'_ZN6impala18TimestampFunctions12UtcTimestampEPN10impala_udf15FunctionContextE'],
[['from_utc_timestamp'], 'TIMESTAMP', ['TIMESTAMP', 'STRING'],
- "impala::TimestampFunctions::FromUtc"],
+ 'impala::TimestampFunctions::FromUtc',
+
'_ZN6impala18TimestampFunctions22FromUtcAndToUtcPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
+
'_ZN6impala18TimestampFunctions20FromUtcAndToUtcCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
[['to_utc_timestamp'], 'TIMESTAMP', ['TIMESTAMP', 'STRING'],
- "impala::TimestampFunctions::ToUtc"],
+ 'impala::TimestampFunctions::ToUtc',
+
'_ZN6impala18TimestampFunctions22FromUtcAndToUtcPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
+
'_ZN6impala18TimestampFunctions20FromUtcAndToUtcCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
[['timeofday'], 'STRING', [],"impala::TimestampFunctions::TimeOfDay"],
[['timestamp_cmp'], 'INT', ['TIMESTAMP', 'TIMESTAMP'],
"impala::TimestampFunctions::TimestampCmp"],