This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 22723d0f2 IMPALA-7086: Cache timezone in *_utc_timestamp()
22723d0f2 is described below

commit 22723d0f276468a25553f007dc65b21d79bd821d
Author: Mihaly Szjatinya <[email protected]>
AuthorDate: Fri Sep 6 14:47:43 2024 +0200

    IMPALA-7086: Cache timezone in *_utc_timestamp()
    
    Added Prepare - Close routine around from/to_utc standard functions.
    This gives a consistent time improvement for constant timezones.
    
    Given sample table with 600M timestamp rows, on all-default
    environment the query below gives a stable 2-3 seconds improvement.
    SELECT count(*) FROM a_table
    where from_utc_timestamp(ts, "a_timezone") > "a_date";
    
    Averaged results for Release, SET MT_OP=1, SET DISABLE_CODEGEN=TRUE:
    from_utc: 16,53s -> 12,53s
    to_utc: 14,02 - > 11,53
    
    Change-Id: Icdf5ff82c5d0554333aef1bc3bba034a4cf48230
    Reviewed-on: http://gerrit.cloudera.org:8080/21735
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exprs/timestamp-functions.cc          | 61 +++++++++++++++++++---------
 be/src/exprs/timestamp-functions.h           |  6 +++
 common/function-registry/impala_functions.py |  8 +++-
 3 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/be/src/exprs/timestamp-functions.cc 
b/be/src/exprs/timestamp-functions.cc
index e9a1e60a3..96fcaa3e8 100644
--- a/be/src/exprs/timestamp-functions.cc
+++ b/be/src/exprs/timestamp-functions.cc
@@ -84,23 +84,47 @@ const map<string, int> TimestampFunctions::DAYNAME_MAP = {
     {"sat", 6}, {"saturday", 6},
 };
 
-TimestampVal TimestampFunctions::FromUtc(FunctionContext* context,
-    const TimestampVal& ts_val, const StringVal& tz_string_val) {
-  if (ts_val.is_null || tz_string_val.is_null) return TimestampVal::null();
-  const TimestampValue ts_value = TimestampValue::FromTimestampVal(ts_val);
-  if (UNLIKELY(!ts_value.HasDateAndTime())) return TimestampVal::null();
+void TimestampFunctions::FromUtcAndToUtcPrepare(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  if (scope != FunctionContext::FRAGMENT_LOCAL) return;
+  const Timezone* timezone = nullptr;
+  if (context->IsArgConstant(1)) {
+    StringVal tz_string_val = 
*reinterpret_cast<StringVal*>(context->GetConstantArg(1));
+    const StringValue& tz_string_value = 
StringValue::FromStringVal(tz_string_val);
+    timezone = TimezoneDatabase::FindTimezone(
+        string(tz_string_value.Ptr(), tz_string_value.Len()));
+  }
+  context->SetFunctionState(scope, (void *)(timezone));
+}
 
-  const StringValue& tz_string_value = 
StringValue::FromStringVal(tz_string_val);
-  const Timezone* timezone = TimezoneDatabase::FindTimezone(
-      string(tz_string_value.Ptr(), tz_string_value.Len()));
+const Timezone* GetTimezone(FunctionContext* context,
+    const StringValue& tz_string_value) {
+  void* state = context->GetFunctionState(FunctionContext::FRAGMENT_LOCAL);
+  const Timezone* timezone = reinterpret_cast<Timezone*>(state);
+  DCHECK(timezone == nullptr || context->IsArgConstant(1));
+  if (timezone == nullptr) {
+    timezone = TimezoneDatabase::FindTimezone(
+        string(tz_string_value.Ptr(), tz_string_value.Len()));
+  }
   if (UNLIKELY(timezone == nullptr)) {
     // Although this is an error, Hive ignores it. We will issue a warning but 
otherwise
     // ignore the error too.
     stringstream ss;
     ss << "Unknown timezone '" << tz_string_value << "'" << endl;
     context->AddWarning(ss.str().c_str());
-    return ts_val;
   }
+  return timezone;
+}
+
+TimestampVal TimestampFunctions::FromUtc(FunctionContext* context,
+    const TimestampVal& ts_val, const StringVal& tz_string_val) {
+  if (ts_val.is_null || tz_string_val.is_null) return TimestampVal::null();
+  const TimestampValue ts_value = TimestampValue::FromTimestampVal(ts_val);
+  if (UNLIKELY(!ts_value.HasDateAndTime())) return TimestampVal::null();
+
+  const StringValue& tz_string_value = 
StringValue::FromStringVal(tz_string_val);
+  const Timezone* timezone = GetTimezone(context, tz_string_value);
+  if (UNLIKELY(timezone == nullptr)) return ts_val;
 
   TimestampValue ts_value_ret = ts_value;
   ts_value_ret.UtcToLocal(*timezone);
@@ -124,16 +148,8 @@ TimestampVal TimestampFunctions::ToUtc(FunctionContext* 
context,
   if (!ts_value.HasDateAndTime()) return TimestampVal::null();
 
   const StringValue& tz_string_value = 
StringValue::FromStringVal(tz_string_val);
-  const Timezone* timezone = TimezoneDatabase::FindTimezone(
-      string(tz_string_value.Ptr(), tz_string_value.Len()));
-  if (UNLIKELY(timezone == nullptr)) {
-    // Although this is an error, Hive ignores it. We will issue a warning but 
otherwise
-    // ignore the error too.
-    stringstream ss;
-    ss << "Unknown timezone '" << tz_string_value << "'" << endl;
-    context->AddWarning(ss.str().c_str());
-    return ts_val;
-  }
+  const Timezone* timezone = GetTimezone(context, tz_string_value);
+  if (UNLIKELY(timezone == nullptr)) return ts_val;
 
   TimestampValue ts_value_ret = ts_value;
   ts_value_ret.LocalToUtc(*timezone);
@@ -150,6 +166,13 @@ TimestampVal TimestampFunctions::ToUtc(FunctionContext* 
context,
   return ts_val_ret;
 }
 
+void TimestampFunctions::FromUtcAndToUtcClose(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  if (scope == FunctionContext::FRAGMENT_LOCAL) {
+    context->SetFunctionState(scope, nullptr);
+  }
+}
+
 TimestampVal TimestampFunctions::ToUtcUnambiguous(FunctionContext* context,
     const TimestampVal& ts_val, const StringVal& tz_string_val,
     const BooleanVal& expect_pre_bool_val) {
diff --git a/be/src/exprs/timestamp-functions.h 
b/be/src/exprs/timestamp-functions.h
index 4c4dd914a..ac5805190 100644
--- a/be/src/exprs/timestamp-functions.h
+++ b/be/src/exprs/timestamp-functions.h
@@ -153,6 +153,12 @@ class TimestampFunctions {
   static TimestampVal UnixMicrosToUtcTimestamp(FunctionContext* context,
       const BigIntVal& unix_time_micros);
 
+  // Find and initialize timezone object if it's name is a constant.
+  static void FromUtcAndToUtcPrepare(FunctionContext* context,
+      FunctionContext::FunctionStateScope scope);
+  static void FromUtcAndToUtcClose(FunctionContext* context,
+      FunctionContext::FunctionStateScope scope);
+
   /// Convert a timestamp to or from a particular timezone based time.
   static TimestampVal FromUtc(FunctionContext* context,
     const TimestampVal& ts_val, const StringVal& tz_string_val);
diff --git a/common/function-registry/impala_functions.py 
b/common/function-registry/impala_functions.py
index 8f61b80bb..76a3dc4e1 100644
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -251,9 +251,13 @@ visible_functions = [
   [['now', 'current_timestamp'], 'TIMESTAMP', [], 
'_ZN6impala18TimestampFunctions3NowEPN10impala_udf15FunctionContextE'],
   [['utc_timestamp'], 'TIMESTAMP', [], 
'_ZN6impala18TimestampFunctions12UtcTimestampEPN10impala_udf15FunctionContextE'],
   [['from_utc_timestamp'], 'TIMESTAMP', ['TIMESTAMP', 'STRING'],
-   "impala::TimestampFunctions::FromUtc"],
+   'impala::TimestampFunctions::FromUtc',
+   
'_ZN6impala18TimestampFunctions22FromUtcAndToUtcPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
+   
'_ZN6impala18TimestampFunctions20FromUtcAndToUtcCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
   [['to_utc_timestamp'], 'TIMESTAMP', ['TIMESTAMP', 'STRING'],
-   "impala::TimestampFunctions::ToUtc"],
+   'impala::TimestampFunctions::ToUtc',
+   
'_ZN6impala18TimestampFunctions22FromUtcAndToUtcPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
+   
'_ZN6impala18TimestampFunctions20FromUtcAndToUtcCloseEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE'],
   [['timeofday'], 'STRING', [],"impala::TimestampFunctions::TimeOfDay"],
   [['timestamp_cmp'], 'INT', ['TIMESTAMP', 'TIMESTAMP'],
    "impala::TimestampFunctions::TimestampCmp"],

Reply via email to