This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit c1a9a23737dc3f5849e629c41e094b0b04e9ad02 Author: Daniel Becker <[email protected]> AuthorDate: Fri Sep 30 14:51:08 2022 +0200 IMPALA-11623: Put *-ir.cc files into their own libraries to avoid extra recompilation Currently, modifying most files incurs a rebuild of the LLVM IR, which is a slow serial step. This change splits off the *-ir.cc files into their own libraries that have no dependency relation with the original libraries from which they are separated. The build of the LLVM IR depends only on the new IR libraries, so if a non-IR C++ source file is modified, the LLVM IR does not need to be rebuilt. Note that the LLVM IR build does not actually need or use the new IR libraries - it is a completely separate procedure in which the *-ir.cc files are included in impala-ir.cc and compiled with Clang into LLVM bitcode. We add the dependency on the IR libraries to trigger the rebuilding of the LLVM IR whenever an *-ir.cc file is modified. However, the new IR libraries still need to be compiled into the main Impala executable. Also, dependence on the target 'gen_ir_descriptions' was not consistent. As it is a fast target and needed by most other targets, 'gen-deps' now depends on it to keep things safer and simpler. Direct dependencies on 'gen_ir_descriptions' were removed from that targets that also depend on 'gen-deps'. Testing: - manually verified for each modified target library that the LLVM IR is not recompiled when a non-IR .cc file is modified, but is recompiled if the corresponding header or *-ir.cc file is modified. Change-Id: I63b9285bac6494a19f614d0ebc694a91bdf7a8a0 Reviewed-on: http://gerrit.cloudera.org:8080/19083 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- CMakeLists.txt | 2 +- be/CMakeLists.txt | 7 ++++++ be/src/codegen/CMakeLists.txt | 6 ++--- be/src/exec/CMakeLists.txt | 23 ++++++++++-------- be/src/exec/avro/CMakeLists.txt | 9 ++++++-- be/src/exec/kudu/CMakeLists.txt | 9 ++++++-- be/src/exprs/CMakeLists.txt | 50 ++++++++++++++++++++++------------------ be/src/gutil/CMakeLists.txt | 2 +- be/src/runtime/CMakeLists.txt | 14 +++++++---- be/src/udf/CMakeLists.txt | 12 ++++++++-- be/src/util/CMakeLists.txt | 14 +++++++---- be/src/util/cache/CMakeLists.txt | 4 ++-- 12 files changed, 97 insertions(+), 55 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0908ebcea..611c5a533 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -502,7 +502,7 @@ add_subdirectory(shell) # Build target for all generated files which most backend code depends on add_custom_target(gen-deps ALL DEPENDS thrift-deps proto-deps fb-deps - kudu-util-proto-deps kudu-rpc-proto-deps kudu-security-proto-deps) + kudu-util-proto-deps kudu-rpc-proto-deps kudu-security-proto-deps gen_ir_descriptions) add_custom_target(tarballs ALL DEPENDS shell_tarball) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 5d1fa4d9c..22ac08fc3 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -482,15 +482,19 @@ set (IMPALA_LIBS CodeGen Common Exec + ExecIr ExecAvro + ExecAvroIr ExecHBase ExecKudu + ExecKuduIr ExecOrc ExecParquet ExecRcfile ExecSequence ExecText Exprs + ExprsIr GlobalFlags histogram_proto ImpalaThrift @@ -503,6 +507,7 @@ set (IMPALA_LIBS rpc_introspection_proto pb_util_proto Runtime + RuntimeIr Scheduling security Service @@ -511,7 +516,9 @@ set (IMPALA_LIBS ThriftSaslTransport token_proto Udf + UdfIr Util + UtilIr UtilCache ) diff --git a/be/src/codegen/CMakeLists.txt b/be/src/codegen/CMakeLists.txt index 92999b91a..ac00a1c2b 100644 --- a/be/src/codegen/CMakeLists.txt +++ b/be/src/codegen/CMakeLists.txt @@ -36,7 +36,7 @@ add_library(CodeGen ${IR_C_FILE} ${LEGACY_AVX_IR_C_FILE} ) -add_dependencies(CodeGen gen-deps gen_ir_descriptions) +add_dependencies(CodeGen gen-deps) add_library(CodeGenTests STATIC instruction-counter-test.cc @@ -85,7 +85,7 @@ add_custom_command( ${CLANG_INCLUDE_FLAGS} ${IR_INPUT_FILES} -o ${IR_TMP_OUTPUT_FILE} COMMAND ${LLVM_OPT_EXECUTABLE} ${LLVM_OPT_IR_FLAGS} < ${IR_TMP_OUTPUT_FILE} > ${IR_OUTPUT_FILE} COMMAND rm ${IR_TMP_OUTPUT_FILE} - DEPENDS Exec ExecAvro ExecKudu Exprs Runtime Udf Util ${IR_INPUT_FILES} + DEPENDS ExecIr ExecAvroIr ExecKuduIr ExprsIr RuntimeIr UdfIr UtilIr ${IR_INPUT_FILES} ) add_custom_command( @@ -95,7 +95,7 @@ add_custom_command( COMMAND ${LLVM_OPT_EXECUTABLE} ${LLVM_OPT_IR_FLAGS} < ${LEGACY_AVX_IR_TMP_OUTPUT_FILE} > ${LEGACY_AVX_IR_OUTPUT_FILE} COMMAND rm ${LEGACY_AVX_IR_TMP_OUTPUT_FILE} - DEPENDS Exec ExecAvro ExecKudu Exprs Runtime Udf Util ${IR_INPUT_FILES} + DEPENDS ExecIr ExecAvroIr ExecKuduIr ExprsIr RuntimeIr UdfIr UtilIr ${IR_INPUT_FILES} ) add_custom_target(compile_to_ir DEPENDS ${IR_OUTPUT_FILE}) diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index ce0b2e8e1..52fd3b95b 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -30,6 +30,20 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exec") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exec") +add_library(ExecIr + grouping-aggregator-ir.cc + hdfs-columnar-scanner-ir.cc + hdfs-scanner-ir.cc + non-grouping-aggregator-ir.cc + partitioned-hash-join-builder-ir.cc + partitioned-hash-join-node-ir.cc + select-node-ir.cc + topn-node-ir.cc + union-node-ir.cc +) + +add_dependencies(ExecIr gen-deps) + add_library(Exec acid-metadata-utils.cc aggregation-node.cc @@ -52,16 +66,13 @@ add_library(Exec file-metadata-utils.cc filter-context.cc grouping-aggregator.cc - grouping-aggregator-ir.cc grouping-aggregator-partition.cc hash-table.cc hdfs-columnar-scanner.cc - hdfs-columnar-scanner-ir.cc hdfs-scan-node.cc hdfs-scan-node-base.cc hdfs-scan-node-mt.cc hdfs-scanner.cc - hdfs-scanner-ir.cc hdfs-table-sink.cc hdfs-table-writer.cc hdfs-text-table-writer.cc @@ -70,27 +81,21 @@ add_library(Exec nested-loop-join-builder.cc nested-loop-join-node.cc non-grouping-aggregator.cc - non-grouping-aggregator-ir.cc partial-sort-node.cc partitioned-hash-join-builder.cc - partitioned-hash-join-builder-ir.cc partitioned-hash-join-node.cc - partitioned-hash-join-node-ir.cc plan-root-sink.cc read-write-util.cc scan-node.cc scanner-context.cc select-node.cc - select-node-ir.cc singular-row-src-node.cc sort-node.cc streaming-aggregation-node.cc subplan-node.cc text-converter.cc topn-node.cc - topn-node-ir.cc union-node.cc - union-node-ir.cc unnest-node.cc ) diff --git a/be/src/exec/avro/CMakeLists.txt b/be/src/exec/avro/CMakeLists.txt index 1f4bfd233..8c7ac8f56 100644 --- a/be/src/exec/avro/CMakeLists.txt +++ b/be/src/exec/avro/CMakeLists.txt @@ -21,9 +21,14 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exec/avro") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exec/avro") +add_library(ExecAvroIr + hdfs-avro-scanner-ir.cc +) + +add_dependencies(ExecAvroIr gen-deps) + add_library(ExecAvro hdfs-avro-scanner.cc - hdfs-avro-scanner-ir.cc ) add_dependencies(ExecAvro gen-deps) @@ -33,4 +38,4 @@ add_library(ExecAvroTests STATIC ) add_dependencies(ExecAvroTests gen-deps) -ADD_UNIFIED_BE_LSAN_TEST(hdfs-avro-scanner-test HdfsAvroScannerTest.*) \ No newline at end of file +ADD_UNIFIED_BE_LSAN_TEST(hdfs-avro-scanner-test HdfsAvroScannerTest.*) diff --git a/be/src/exec/kudu/CMakeLists.txt b/be/src/exec/kudu/CMakeLists.txt index befc7a738..72cebf431 100644 --- a/be/src/exec/kudu/CMakeLists.txt +++ b/be/src/exec/kudu/CMakeLists.txt @@ -21,6 +21,12 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exec/kudu") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exec/kudu") +add_library(ExecKuduIr + kudu-util-ir.cc +) + +add_dependencies(ExecKuduIr gen-deps) + add_library(ExecKudu kudu-scanner.cc kudu-scan-node.cc @@ -28,7 +34,6 @@ add_library(ExecKudu kudu-scan-node-mt.cc kudu-table-sink.cc kudu-util.cc - kudu-util-ir.cc ) -add_dependencies(ExecKudu gen-deps) \ No newline at end of file +add_dependencies(ExecKudu gen-deps) diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt index a28218ed8..08a385591 100644 --- a/be/src/exprs/CMakeLists.txt +++ b/be/src/exprs/CMakeLists.txt @@ -24,61 +24,65 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs") set(MURMURHASH_SRC_DIR "${CMAKE_SOURCE_DIR}/be/src/thirdparty/murmurhash") -add_library(Exprs - agg-fn.cc - agg-fn-evaluator.cc +add_library(ExprsIr agg-fn-evaluator-ir.cc aggregate-functions-ir.cc - anyval-util.cc bit-byte-functions-ir.cc - case-expr.cc - cast-format-expr.cc cast-functions-ir.cc - compound-predicates.cc compound-predicates-ir.cc - conditional-functions.cc conditional-functions-ir.cc - datasketches-common.cc datasketches-functions-ir.cc date-functions-ir.cc decimal-functions-ir.cc decimal-operators-ir.cc - expr.cc - hive-udf-call.cc hive-udf-call-ir.cc iceberg-functions-ir.cc in-predicate-ir.cc - is-not-empty-predicate.cc is-null-predicate-ir.cc - kudu-partition-expr.cc kudu-partition-expr-ir.cc - like-predicate.cc like-predicate-ir.cc - literal.cc mask-functions-ir.cc math-functions-ir.cc + operators-ir.cc + scalar-expr-ir.cc + string-functions-ir.cc + timestamp-functions-ir.cc + tuple-is-null-predicate-ir.cc + udf-builtins-ir.cc + utility-functions-ir.cc +) +add_dependencies(ExprsIr gen-deps) + +add_library(Exprs + agg-fn.cc + agg-fn-evaluator.cc + anyval-util.cc + case-expr.cc + cast-format-expr.cc + compound-predicates.cc + conditional-functions.cc + datasketches-common.cc + expr.cc + hive-udf-call.cc + is-not-empty-predicate.cc + kudu-partition-expr.cc + like-predicate.cc + literal.cc ${MURMURHASH_SRC_DIR}/MurmurHash3.cpp null-literal.cc - operators-ir.cc scalar-expr.cc scalar-expr-evaluator.cc - scalar-expr-ir.cc slot-ref.cc string-functions.cc - string-functions-ir.cc timestamp-functions.cc - timestamp-functions-ir.cc timezone_db.cc tuple-is-null-predicate.cc - tuple-is-null-predicate-ir.cc scalar-fn-call.cc udf-builtins.cc - udf-builtins-ir.cc utility-functions.cc - utility-functions-ir.cc valid-tuple-id.cc ) -add_dependencies(Exprs gen-deps gen_ir_descriptions) +add_dependencies(Exprs gen-deps) add_library(ExprsTests STATIC datasketches-test.cc diff --git a/be/src/gutil/CMakeLists.txt b/be/src/gutil/CMakeLists.txt index 78ba14b6f..8ea5283bd 100644 --- a/be/src/gutil/CMakeLists.txt +++ b/be/src/gutil/CMakeLists.txt @@ -80,5 +80,5 @@ add_kudu_test(strings/string_util-test) add_library(GUtilTests STATIC sysinfo-test.cc ) -add_dependencies(GUtilTests gen-deps gen_ir_descriptions) +add_dependencies(GUtilTests gen-deps) ADD_UNIFIED_BE_LSAN_TEST(sysinfo-test "SysInfoTest.*") diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index 5322b7b6d..b04e04eb1 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -27,6 +27,15 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/runtime") # Mark the protobuf file as generated set_source_files_properties(${ROW_BATCH_PROTO_SRCS} PROPERTIES GENERATED TRUE) +add_library(RuntimeIr + krpc-data-stream-sender-ir.cc + raw-value-ir.cc + runtime-filter-ir.cc + sorter-ir.cc + tuple-ir.cc +) +add_dependencies(RuntimeIr gen-deps) + add_library(Runtime blocking-row-batch-queue.cc buffered-tuple-stream.cc @@ -54,7 +63,6 @@ add_library(Runtime krpc-data-stream-mgr.cc krpc-data-stream-recvr.cc krpc-data-stream-sender.cc - krpc-data-stream-sender-ir.cc lib-cache.cc mem-tracker.cc mem-pool.cc @@ -65,25 +73,21 @@ add_library(Runtime test-env.cc types.cc raw-value.cc - raw-value-ir.cc reservation-manager.cc row-batch.cc ${ROW_BATCH_PROTO_SRCS} runtime-filter.cc runtime-filter-bank.cc - runtime-filter-ir.cc runtime-state.cc scanner-mem-limiter.cc sorted-run-merger.cc sorter.cc - sorter-ir.cc spillable-row-batch-queue.cc string-value.cc thread-resource-mgr.cc timestamp-parse-util.cc timestamp-value.cc tuple.cc - tuple-ir.cc tuple-row.cc tmp-file-mgr.cc ) diff --git a/be/src/udf/CMakeLists.txt b/be/src/udf/CMakeLists.txt index 0470716a8..b06ea8459 100644 --- a/be/src/udf/CMakeLists.txt +++ b/be/src/udf/CMakeLists.txt @@ -22,13 +22,21 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/udf") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/udf") +add_library(UdfIr udf-ir.cc) +add_dependencies(UdfIr gen-deps) + +set(UDF_SRC_FILES + udf.cc + udf-test-harness.cc +) + # Build this library twice. Once to be linked into the main impalad. This version # can have dependencies on our other libs. The second version is shipped as part # of the UDF sdk, which can't use other libs. -add_library(Udf udf.cc udf-ir.cc udf-test-harness.cc) +add_library(Udf ${UDF_SRC_FILES}) add_dependencies(Udf gen-deps) -add_library(ImpalaUdf udf.cc udf-ir.cc udf-test-harness.cc) +add_library(ImpalaUdf ${UDF_SRC_FILES}) add_dependencies(ImpalaUdf gen-deps) set_target_properties(ImpalaUdf PROPERTIES COMPILE_FLAGS "-DIMPALA_UDF_SDK_BUILD") diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index fcf0b5eb6..d040c8571 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -27,6 +27,13 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/util") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/util") +add_library(UtilIr + bloom-filter-ir.cc + in-list-filter-ir.cc + min-max-filter-ir.cc +) +add_dependencies(UtilIr gen-deps) + set(UTIL_SRCS auth-util.cc avro-util.cc @@ -36,7 +43,6 @@ set(UTIL_SRCS bit-packing.cc bit-util.cc bloom-filter.cc - bloom-filter-ir.cc cgroup-util.cc coding-util.cc codec.cc @@ -62,7 +68,6 @@ set(UTIL_SRCS impalad-metrics.cc impala-bloom-filter-buffer-allocator.cc in-list-filter.cc - in-list-filter-ir.cc jni-util.cc json-util.cc jwt-util.cc @@ -75,7 +80,6 @@ set(UTIL_SRCS memusage-path-handlers.cc metrics.cc min-max-filter.cc - min-max-filter-ir.cc minidump.cc mpfit-util.cc network-util.cc @@ -139,7 +143,7 @@ else() endif() add_library(Util ${UTIL_SRCS}) -add_dependencies(Util gen-deps gen_ir_descriptions) +add_dependencies(Util gen-deps) add_library(UtilTests STATIC benchmark-test.cc @@ -191,7 +195,7 @@ add_library(UtilTests STATIC uid-util-test.cc zip-util-test.cc ) -add_dependencies(UtilTests gen-deps gen_ir_descriptions) +add_dependencies(UtilTests gen-deps) # Squeasel requires C99 compatibility to build. SET_SOURCE_FILES_PROPERTIES(${SQUEASEL_SRC_DIR}/squeasel.c diff --git a/be/src/util/cache/CMakeLists.txt b/be/src/util/cache/CMakeLists.txt index 7cc1999b7..3cf8409c3 100644 --- a/be/src/util/cache/CMakeLists.txt +++ b/be/src/util/cache/CMakeLists.txt @@ -26,14 +26,14 @@ add_library(UtilCache lirs-cache.cc rl-cache.cc ) -add_dependencies(UtilCache gen-deps gen_ir_descriptions) +add_dependencies(UtilCache gen-deps) add_library(UtilCacheTests STATIC cache-test.cc lirs-cache-test.cc rl-cache-test.cc ) -add_dependencies(UtilCacheTests gen-deps gen_ir_descriptions) +add_dependencies(UtilCacheTests gen-deps) add_executable(cache-bench cache-bench.cc) target_link_libraries(cache-bench ${IMPALA_TEST_LINK_LIBS})
