This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch branch-4.4.1 in repository https://gitbox.apache.org/repos/asf/impala.git
commit 88bc00ccd8313b8540b9239d5fc5988bf4d205cd Author: Joe McDonnell <[email protected]> AuthorDate: Mon May 20 14:30:15 2024 -0700 IMPALA-12800: Skip O(n^2) ExprSubstitutionMap::verify() for release builds ExprSubstitutionMap::compose() and combine() call verify() to check the new ExprSubstitutionMap for duplicates. This algorithm is O(n^2) and can add significant overhead to SQLs with a large number of expressions or inline views. This changes verify() to skip the check for release builds (keeping it for debug builds). In a query with 20+ layers of inline views and thousands of expressions, turning off the verify() call cuts the execution time from 51 minutes to 18 minutes. This doesn't fully solve slowness in ExprSubstitutionMap. Further improvement would require Expr to support hash-based algorithms, which is a much larger change. Testing: - Manual performance comparison with/without the verify() call Change-Id: Ieeacfec6a5b487076ce5b19747319630616411f0 Reviewed-on: http://gerrit.cloudera.org:8080/21444 Reviewed-by: Joe McDonnell <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/util/backend-gflag-util.cc | 5 +++++ common/thrift/BackendGflags.thrift | 2 ++ fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java | 5 +++++ fe/src/main/java/org/apache/impala/service/BackendConfig.java | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/be/src/util/backend-gflag-util.cc b/be/src/util/backend-gflag-util.cc index 7e6127a90..48bb739dc 100644 --- a/be/src/util/backend-gflag-util.cc +++ b/be/src/util/backend-gflag-util.cc @@ -486,6 +486,11 @@ Status PopulateThriftBackendGflags(TBackendGflags& cfg) { cfg.__set_dbcp_max_conn_pool_size(FLAGS_dbcp_max_conn_pool_size); cfg.__set_dbcp_max_wait_millis_for_conn(FLAGS_dbcp_max_wait_millis_for_conn); cfg.__set_dbcp_data_source_idle_timeout(FLAGS_dbcp_data_source_idle_timeout_s); +#ifdef NDEBUG + cfg.__set_is_release_build(true); +#else + cfg.__set_is_release_build(false); +#endif return Status::OK(); } diff --git a/common/thrift/BackendGflags.thrift b/common/thrift/BackendGflags.thrift index 73ff305db..122d6ea5e 100644 --- a/common/thrift/BackendGflags.thrift +++ b/common/thrift/BackendGflags.thrift @@ -304,4 +304,6 @@ struct TBackendGflags { 136: required i32 dbcp_max_wait_millis_for_conn 137: required i32 dbcp_data_source_idle_timeout + + 138: required bool is_release_build } diff --git a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java index 74262490c..0f6b7d5dc 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java +++ b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java @@ -27,6 +27,8 @@ import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import org.apache.impala.service.BackendConfig; + /** * Map of expression substitutions: lhs[i] gets substituted with rhs[i]. * To support expression substitution across query blocks, rhs exprs must already be @@ -168,6 +170,9 @@ public final class ExprSubstitutionMap { * and that all rhs exprs are analyzed. */ private void verify() { + // This is an O(n^2) algorithm, so skip this on release builds to avoid the overhead. + if (BackendConfig.INSTANCE.isReleaseBuild()) return; + for (int i = 0; i < lhs_.size(); ++i) { for (int j = i + 1; j < lhs_.size(); ++j) { if (lhs_.get(i).equals(lhs_.get(j))) { diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java b/fe/src/main/java/org/apache/impala/service/BackendConfig.java index d2a1d7504..6325176bc 100644 --- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java +++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java @@ -521,4 +521,8 @@ public class BackendConfig { public int getDbcpDataSourceIdleTimeoutInSeconds() { return backendCfg_.dbcp_data_source_idle_timeout; } + + public boolean isReleaseBuild() { + return backendCfg_.is_release_build; + } }
