This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c1a9a23737dc3f5849e629c41e094b0b04e9ad02
Author: Daniel Becker <[email protected]>
AuthorDate: Fri Sep 30 14:51:08 2022 +0200

    IMPALA-11623: Put *-ir.cc files into their own libraries to avoid extra
    recompilation
    
    Currently, modifying most files incurs a rebuild of the LLVM IR, which
    is a slow serial step.
    
    This change splits off the *-ir.cc files into their own libraries that
    have no dependency relation with the original libraries from which they
    are separated. The build of the LLVM IR depends only on the new IR
    libraries, so if a non-IR C++ source file is modified, the LLVM IR does
    not need to be rebuilt.
    
    Note that the LLVM IR build does not actually need or use the new IR
    libraries - it is a completely separate procedure in which the *-ir.cc
    files are included in impala-ir.cc and compiled with Clang into LLVM
    bitcode. We add the dependency on the IR libraries to trigger the
    rebuilding of the LLVM IR whenever an *-ir.cc file is modified.
    
    However, the new IR libraries still need to be compiled into the main
    Impala executable.
    
    Also, dependence on the target 'gen_ir_descriptions' was not consistent.
    As it is a fast target and needed by most other targets, 'gen-deps' now
    depends on it to keep things safer and simpler. Direct dependencies on
    'gen_ir_descriptions' were removed from that targets that also depend on
    'gen-deps'.
    
    Testing:
     - manually verified for each modified target library that the LLVM IR
       is not recompiled when a non-IR .cc file is modified, but is
       recompiled if the corresponding header or *-ir.cc file is modified.
    
    Change-Id: I63b9285bac6494a19f614d0ebc694a91bdf7a8a0
    Reviewed-on: http://gerrit.cloudera.org:8080/19083
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 CMakeLists.txt                   |  2 +-
 be/CMakeLists.txt                |  7 ++++++
 be/src/codegen/CMakeLists.txt    |  6 ++---
 be/src/exec/CMakeLists.txt       | 23 ++++++++++--------
 be/src/exec/avro/CMakeLists.txt  |  9 ++++++--
 be/src/exec/kudu/CMakeLists.txt  |  9 ++++++--
 be/src/exprs/CMakeLists.txt      | 50 ++++++++++++++++++++++------------------
 be/src/gutil/CMakeLists.txt      |  2 +-
 be/src/runtime/CMakeLists.txt    | 14 +++++++----
 be/src/udf/CMakeLists.txt        | 12 ++++++++--
 be/src/util/CMakeLists.txt       | 14 +++++++----
 be/src/util/cache/CMakeLists.txt |  4 ++--
 12 files changed, 97 insertions(+), 55 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0908ebcea..611c5a533 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -502,7 +502,7 @@ add_subdirectory(shell)
 
 # Build target for all generated files which most backend code depends on
 add_custom_target(gen-deps ALL DEPENDS thrift-deps proto-deps fb-deps
-  kudu-util-proto-deps kudu-rpc-proto-deps kudu-security-proto-deps)
+  kudu-util-proto-deps kudu-rpc-proto-deps kudu-security-proto-deps 
gen_ir_descriptions)
 
 add_custom_target(tarballs ALL DEPENDS shell_tarball)
 
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 5d1fa4d9c..22ac08fc3 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -482,15 +482,19 @@ set (IMPALA_LIBS
   CodeGen
   Common
   Exec
+  ExecIr
   ExecAvro
+  ExecAvroIr
   ExecHBase
   ExecKudu
+  ExecKuduIr
   ExecOrc
   ExecParquet
   ExecRcfile
   ExecSequence
   ExecText
   Exprs
+  ExprsIr
   GlobalFlags
   histogram_proto
   ImpalaThrift
@@ -503,6 +507,7 @@ set (IMPALA_LIBS
   rpc_introspection_proto
   pb_util_proto
   Runtime
+  RuntimeIr
   Scheduling
   security
   Service
@@ -511,7 +516,9 @@ set (IMPALA_LIBS
   ThriftSaslTransport
   token_proto
   Udf
+  UdfIr
   Util
+  UtilIr
   UtilCache
 )
 
diff --git a/be/src/codegen/CMakeLists.txt b/be/src/codegen/CMakeLists.txt
index 92999b91a..ac00a1c2b 100644
--- a/be/src/codegen/CMakeLists.txt
+++ b/be/src/codegen/CMakeLists.txt
@@ -36,7 +36,7 @@ add_library(CodeGen
   ${IR_C_FILE}
   ${LEGACY_AVX_IR_C_FILE}
 )
-add_dependencies(CodeGen gen-deps gen_ir_descriptions)
+add_dependencies(CodeGen gen-deps)
 
 add_library(CodeGenTests STATIC
   instruction-counter-test.cc
@@ -85,7 +85,7 @@ add_custom_command(
           ${CLANG_INCLUDE_FLAGS} ${IR_INPUT_FILES} -o ${IR_TMP_OUTPUT_FILE}
   COMMAND ${LLVM_OPT_EXECUTABLE} ${LLVM_OPT_IR_FLAGS} < ${IR_TMP_OUTPUT_FILE} 
> ${IR_OUTPUT_FILE}
   COMMAND rm ${IR_TMP_OUTPUT_FILE}
-  DEPENDS Exec ExecAvro ExecKudu Exprs Runtime Udf Util ${IR_INPUT_FILES}
+  DEPENDS ExecIr ExecAvroIr ExecKuduIr ExprsIr RuntimeIr UdfIr UtilIr 
${IR_INPUT_FILES}
 )
 
 add_custom_command(
@@ -95,7 +95,7 @@ add_custom_command(
   COMMAND ${LLVM_OPT_EXECUTABLE} ${LLVM_OPT_IR_FLAGS} < 
${LEGACY_AVX_IR_TMP_OUTPUT_FILE}
           > ${LEGACY_AVX_IR_OUTPUT_FILE}
   COMMAND rm ${LEGACY_AVX_IR_TMP_OUTPUT_FILE}
-  DEPENDS Exec ExecAvro ExecKudu Exprs Runtime Udf Util ${IR_INPUT_FILES}
+  DEPENDS ExecIr ExecAvroIr ExecKuduIr ExprsIr RuntimeIr UdfIr UtilIr 
${IR_INPUT_FILES}
 )
 
 add_custom_target(compile_to_ir DEPENDS ${IR_OUTPUT_FILE})
diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt
index ce0b2e8e1..52fd3b95b 100644
--- a/be/src/exec/CMakeLists.txt
+++ b/be/src/exec/CMakeLists.txt
@@ -30,6 +30,20 @@ set(LIBRARY_OUTPUT_PATH 
"${BUILD_OUTPUT_ROOT_DIRECTORY}/exec")
 # where to put generated binaries
 set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exec")
 
+add_library(ExecIr
+  grouping-aggregator-ir.cc
+  hdfs-columnar-scanner-ir.cc
+  hdfs-scanner-ir.cc
+  non-grouping-aggregator-ir.cc
+  partitioned-hash-join-builder-ir.cc
+  partitioned-hash-join-node-ir.cc
+  select-node-ir.cc
+  topn-node-ir.cc
+  union-node-ir.cc
+)
+
+add_dependencies(ExecIr gen-deps)
+
 add_library(Exec
   acid-metadata-utils.cc
   aggregation-node.cc
@@ -52,16 +66,13 @@ add_library(Exec
   file-metadata-utils.cc
   filter-context.cc
   grouping-aggregator.cc
-  grouping-aggregator-ir.cc
   grouping-aggregator-partition.cc
   hash-table.cc
   hdfs-columnar-scanner.cc
-  hdfs-columnar-scanner-ir.cc
   hdfs-scan-node.cc
   hdfs-scan-node-base.cc
   hdfs-scan-node-mt.cc
   hdfs-scanner.cc
-  hdfs-scanner-ir.cc
   hdfs-table-sink.cc
   hdfs-table-writer.cc
   hdfs-text-table-writer.cc
@@ -70,27 +81,21 @@ add_library(Exec
   nested-loop-join-builder.cc
   nested-loop-join-node.cc
   non-grouping-aggregator.cc
-  non-grouping-aggregator-ir.cc
   partial-sort-node.cc
   partitioned-hash-join-builder.cc
-  partitioned-hash-join-builder-ir.cc
   partitioned-hash-join-node.cc
-  partitioned-hash-join-node-ir.cc
   plan-root-sink.cc
   read-write-util.cc
   scan-node.cc
   scanner-context.cc
   select-node.cc
-  select-node-ir.cc
   singular-row-src-node.cc
   sort-node.cc
   streaming-aggregation-node.cc
   subplan-node.cc
   text-converter.cc
   topn-node.cc
-  topn-node-ir.cc
   union-node.cc
-  union-node-ir.cc
   unnest-node.cc
 )
 
diff --git a/be/src/exec/avro/CMakeLists.txt b/be/src/exec/avro/CMakeLists.txt
index 1f4bfd233..8c7ac8f56 100644
--- a/be/src/exec/avro/CMakeLists.txt
+++ b/be/src/exec/avro/CMakeLists.txt
@@ -21,9 +21,14 @@ set(LIBRARY_OUTPUT_PATH 
"${BUILD_OUTPUT_ROOT_DIRECTORY}/exec/avro")
 # where to put generated binaries
 set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exec/avro")
 
+add_library(ExecAvroIr
+  hdfs-avro-scanner-ir.cc
+)
+
+add_dependencies(ExecAvroIr gen-deps)
+
 add_library(ExecAvro
   hdfs-avro-scanner.cc
-  hdfs-avro-scanner-ir.cc
 )
 
 add_dependencies(ExecAvro gen-deps)
@@ -33,4 +38,4 @@ add_library(ExecAvroTests STATIC
 )
 add_dependencies(ExecAvroTests gen-deps)
 
-ADD_UNIFIED_BE_LSAN_TEST(hdfs-avro-scanner-test HdfsAvroScannerTest.*)
\ No newline at end of file
+ADD_UNIFIED_BE_LSAN_TEST(hdfs-avro-scanner-test HdfsAvroScannerTest.*)
diff --git a/be/src/exec/kudu/CMakeLists.txt b/be/src/exec/kudu/CMakeLists.txt
index befc7a738..72cebf431 100644
--- a/be/src/exec/kudu/CMakeLists.txt
+++ b/be/src/exec/kudu/CMakeLists.txt
@@ -21,6 +21,12 @@ set(LIBRARY_OUTPUT_PATH 
"${BUILD_OUTPUT_ROOT_DIRECTORY}/exec/kudu")
 # where to put generated binaries
 set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exec/kudu")
 
+add_library(ExecKuduIr
+  kudu-util-ir.cc
+)
+
+add_dependencies(ExecKuduIr gen-deps)
+
 add_library(ExecKudu
   kudu-scanner.cc
   kudu-scan-node.cc
@@ -28,7 +34,6 @@ add_library(ExecKudu
   kudu-scan-node-mt.cc
   kudu-table-sink.cc
   kudu-util.cc
-  kudu-util-ir.cc
 )
 
-add_dependencies(ExecKudu gen-deps)
\ No newline at end of file
+add_dependencies(ExecKudu gen-deps)
diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt
index a28218ed8..08a385591 100644
--- a/be/src/exprs/CMakeLists.txt
+++ b/be/src/exprs/CMakeLists.txt
@@ -24,61 +24,65 @@ set(EXECUTABLE_OUTPUT_PATH 
"${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs")
 
 set(MURMURHASH_SRC_DIR "${CMAKE_SOURCE_DIR}/be/src/thirdparty/murmurhash")
 
-add_library(Exprs
-  agg-fn.cc
-  agg-fn-evaluator.cc
+add_library(ExprsIr
   agg-fn-evaluator-ir.cc
   aggregate-functions-ir.cc
-  anyval-util.cc
   bit-byte-functions-ir.cc
-  case-expr.cc
-  cast-format-expr.cc
   cast-functions-ir.cc
-  compound-predicates.cc
   compound-predicates-ir.cc
-  conditional-functions.cc
   conditional-functions-ir.cc
-  datasketches-common.cc
   datasketches-functions-ir.cc
   date-functions-ir.cc
   decimal-functions-ir.cc
   decimal-operators-ir.cc
-  expr.cc
-  hive-udf-call.cc
   hive-udf-call-ir.cc
   iceberg-functions-ir.cc
   in-predicate-ir.cc
-  is-not-empty-predicate.cc
   is-null-predicate-ir.cc
-  kudu-partition-expr.cc
   kudu-partition-expr-ir.cc
-  like-predicate.cc
   like-predicate-ir.cc
-  literal.cc
   mask-functions-ir.cc
   math-functions-ir.cc
+  operators-ir.cc
+  scalar-expr-ir.cc
+  string-functions-ir.cc
+  timestamp-functions-ir.cc
+  tuple-is-null-predicate-ir.cc
+  udf-builtins-ir.cc
+  utility-functions-ir.cc
+)
+add_dependencies(ExprsIr gen-deps)
+
+add_library(Exprs
+  agg-fn.cc
+  agg-fn-evaluator.cc
+  anyval-util.cc
+  case-expr.cc
+  cast-format-expr.cc
+  compound-predicates.cc
+  conditional-functions.cc
+  datasketches-common.cc
+  expr.cc
+  hive-udf-call.cc
+  is-not-empty-predicate.cc
+  kudu-partition-expr.cc
+  like-predicate.cc
+  literal.cc
   ${MURMURHASH_SRC_DIR}/MurmurHash3.cpp
   null-literal.cc
-  operators-ir.cc
   scalar-expr.cc
   scalar-expr-evaluator.cc
-  scalar-expr-ir.cc
   slot-ref.cc
   string-functions.cc
-  string-functions-ir.cc
   timestamp-functions.cc
-  timestamp-functions-ir.cc
   timezone_db.cc
   tuple-is-null-predicate.cc
-  tuple-is-null-predicate-ir.cc
   scalar-fn-call.cc
   udf-builtins.cc
-  udf-builtins-ir.cc
   utility-functions.cc
-  utility-functions-ir.cc
   valid-tuple-id.cc
 )
-add_dependencies(Exprs gen-deps gen_ir_descriptions)
+add_dependencies(Exprs gen-deps)
 
 add_library(ExprsTests STATIC
   datasketches-test.cc
diff --git a/be/src/gutil/CMakeLists.txt b/be/src/gutil/CMakeLists.txt
index 78ba14b6f..8ea5283bd 100644
--- a/be/src/gutil/CMakeLists.txt
+++ b/be/src/gutil/CMakeLists.txt
@@ -80,5 +80,5 @@ add_kudu_test(strings/string_util-test)
 add_library(GUtilTests STATIC
   sysinfo-test.cc
 )
-add_dependencies(GUtilTests gen-deps gen_ir_descriptions)
+add_dependencies(GUtilTests gen-deps)
 ADD_UNIFIED_BE_LSAN_TEST(sysinfo-test "SysInfoTest.*")
diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt
index 5322b7b6d..b04e04eb1 100644
--- a/be/src/runtime/CMakeLists.txt
+++ b/be/src/runtime/CMakeLists.txt
@@ -27,6 +27,15 @@ set(EXECUTABLE_OUTPUT_PATH 
"${BUILD_OUTPUT_ROOT_DIRECTORY}/runtime")
 # Mark the protobuf file as generated
 set_source_files_properties(${ROW_BATCH_PROTO_SRCS} PROPERTIES GENERATED TRUE)
 
+add_library(RuntimeIr
+  krpc-data-stream-sender-ir.cc
+  raw-value-ir.cc
+  runtime-filter-ir.cc
+  sorter-ir.cc
+  tuple-ir.cc
+)
+add_dependencies(RuntimeIr gen-deps)
+
 add_library(Runtime
   blocking-row-batch-queue.cc
   buffered-tuple-stream.cc
@@ -54,7 +63,6 @@ add_library(Runtime
   krpc-data-stream-mgr.cc
   krpc-data-stream-recvr.cc
   krpc-data-stream-sender.cc
-  krpc-data-stream-sender-ir.cc
   lib-cache.cc
   mem-tracker.cc
   mem-pool.cc
@@ -65,25 +73,21 @@ add_library(Runtime
   test-env.cc
   types.cc
   raw-value.cc
-  raw-value-ir.cc
   reservation-manager.cc
   row-batch.cc
   ${ROW_BATCH_PROTO_SRCS}
   runtime-filter.cc
   runtime-filter-bank.cc
-  runtime-filter-ir.cc
   runtime-state.cc
   scanner-mem-limiter.cc
   sorted-run-merger.cc
   sorter.cc
-  sorter-ir.cc
   spillable-row-batch-queue.cc
   string-value.cc
   thread-resource-mgr.cc
   timestamp-parse-util.cc
   timestamp-value.cc
   tuple.cc
-  tuple-ir.cc
   tuple-row.cc
   tmp-file-mgr.cc
 )
diff --git a/be/src/udf/CMakeLists.txt b/be/src/udf/CMakeLists.txt
index 0470716a8..b06ea8459 100644
--- a/be/src/udf/CMakeLists.txt
+++ b/be/src/udf/CMakeLists.txt
@@ -22,13 +22,21 @@ set(LIBRARY_OUTPUT_PATH 
"${BUILD_OUTPUT_ROOT_DIRECTORY}/udf")
 # where to put generated binaries
 set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/udf")
 
+add_library(UdfIr udf-ir.cc)
+add_dependencies(UdfIr gen-deps)
+
+set(UDF_SRC_FILES
+  udf.cc
+  udf-test-harness.cc
+)
+
 # Build this library twice. Once to be linked into the main impalad. This 
version
 # can have dependencies on our other libs. The second version is shipped as 
part
 # of the UDF sdk, which can't use other libs.
-add_library(Udf udf.cc udf-ir.cc udf-test-harness.cc)
+add_library(Udf ${UDF_SRC_FILES})
 add_dependencies(Udf gen-deps)
 
-add_library(ImpalaUdf udf.cc udf-ir.cc udf-test-harness.cc)
+add_library(ImpalaUdf ${UDF_SRC_FILES})
 add_dependencies(ImpalaUdf gen-deps)
 set_target_properties(ImpalaUdf PROPERTIES COMPILE_FLAGS 
"-DIMPALA_UDF_SDK_BUILD")
 
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index fcf0b5eb6..d040c8571 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -27,6 +27,13 @@ set(LIBRARY_OUTPUT_PATH 
"${BUILD_OUTPUT_ROOT_DIRECTORY}/util")
 # where to put generated binaries
 set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/util")
 
+add_library(UtilIr
+  bloom-filter-ir.cc
+  in-list-filter-ir.cc
+  min-max-filter-ir.cc
+)
+add_dependencies(UtilIr gen-deps)
+
 set(UTIL_SRCS
   auth-util.cc
   avro-util.cc
@@ -36,7 +43,6 @@ set(UTIL_SRCS
   bit-packing.cc
   bit-util.cc
   bloom-filter.cc
-  bloom-filter-ir.cc
   cgroup-util.cc
   coding-util.cc
   codec.cc
@@ -62,7 +68,6 @@ set(UTIL_SRCS
   impalad-metrics.cc
   impala-bloom-filter-buffer-allocator.cc
   in-list-filter.cc
-  in-list-filter-ir.cc
   jni-util.cc
   json-util.cc
   jwt-util.cc
@@ -75,7 +80,6 @@ set(UTIL_SRCS
   memusage-path-handlers.cc
   metrics.cc
   min-max-filter.cc
-  min-max-filter-ir.cc
   minidump.cc
   mpfit-util.cc
   network-util.cc
@@ -139,7 +143,7 @@ else()
 endif()
 
 add_library(Util ${UTIL_SRCS})
-add_dependencies(Util gen-deps gen_ir_descriptions)
+add_dependencies(Util gen-deps)
 
 add_library(UtilTests STATIC
   benchmark-test.cc
@@ -191,7 +195,7 @@ add_library(UtilTests STATIC
   uid-util-test.cc
   zip-util-test.cc
 )
-add_dependencies(UtilTests gen-deps gen_ir_descriptions)
+add_dependencies(UtilTests gen-deps)
 
 # Squeasel requires C99 compatibility to build.
 SET_SOURCE_FILES_PROPERTIES(${SQUEASEL_SRC_DIR}/squeasel.c
diff --git a/be/src/util/cache/CMakeLists.txt b/be/src/util/cache/CMakeLists.txt
index 7cc1999b7..3cf8409c3 100644
--- a/be/src/util/cache/CMakeLists.txt
+++ b/be/src/util/cache/CMakeLists.txt
@@ -26,14 +26,14 @@ add_library(UtilCache
   lirs-cache.cc
   rl-cache.cc
 )
-add_dependencies(UtilCache gen-deps gen_ir_descriptions)
+add_dependencies(UtilCache gen-deps)
 
 add_library(UtilCacheTests STATIC
   cache-test.cc
   lirs-cache-test.cc
   rl-cache-test.cc
 )
-add_dependencies(UtilCacheTests gen-deps gen_ir_descriptions)
+add_dependencies(UtilCacheTests gen-deps)
 
 add_executable(cache-bench cache-bench.cc)
 target_link_libraries(cache-bench ${IMPALA_TEST_LINK_LIBS})

Reply via email to