This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 104c1ad5540ba655f54dd1f49a0ffc7c49367efb Author: stiga-huang <[email protected]> AuthorDate: Sun Feb 12 09:02:46 2023 +0800 IMPALA-11916: Replace base::IsAarch64 with constant base::IsAarch64() returns true for Aarch64 platforms and returns false for x86 platforms. It shows up in a perf test and consumes some percents. Note that it's used in the hot path of hash computation. This patch replaces this method with a constant, IS_AARCH64. Defines the constant in cpu-info.h since the original place is in a file ported from gutil. Test: - Redo the perf test and don't see base::IsAarch64 anymore. Change-Id: Id6d62de63a0cb7b94244a1d4f8dcc6b2e65b6d9c Reviewed-on: http://gerrit.cloudera.org:8080/19492 Reviewed-by: Wenzhe Zhou <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/codegen/llvm-codegen-test.cc | 3 +-- be/src/codegen/llvm-codegen.cc | 2 +- be/src/gutil/sysinfo.cc | 8 -------- be/src/gutil/sysinfo.h | 3 --- be/src/util/bit-util-test.cc | 7 +++---- be/src/util/cpu-info.h | 6 ++++++ be/src/util/hash-util.h | 17 ++++++++--------- 7 files changed, 19 insertions(+), 27 deletions(-) diff --git a/be/src/codegen/llvm-codegen-test.cc b/be/src/codegen/llvm-codegen-test.cc index 0e33aa912..43a1663e2 100644 --- a/be/src/codegen/llvm-codegen-test.cc +++ b/be/src/codegen/llvm-codegen-test.cc @@ -28,7 +28,6 @@ #include "runtime/string-value.h" #include "runtime/test-env.h" #include "service/fe-support.h" -#include "gutil/sysinfo.h" #include "util/cpu-info.h" #include "util/filesystem-util.h" #include "util/hash-util.h" @@ -533,7 +532,7 @@ TEST_F(LlvmCodeGenTest, CpuAttrWhitelist) { // arm does not have sse2 EXPECT_EQ(std::unordered_set<string>( {"-dummy1", "-dummy2", "-dummy3", "-dummy4", - base::IsAarch64() ? "-sse2" : "+sse2", "-lzcnt"}), + IS_AARCH64 ? "-sse2" : "+sse2", "-lzcnt"}), LlvmCodeGen::ApplyCpuAttrWhitelist( {"+dummy1", "+dummy2", "-dummy3", "+dummy4", "+sse2", "-lzcnt"})); // IMPALA-6291: Test that all AVX512 attributes are disabled. diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc index f0bfbaf7a..d31eee772 100644 --- a/be/src/codegen/llvm-codegen.cc +++ b/be/src/codegen/llvm-codegen.cc @@ -1769,7 +1769,7 @@ void LlvmCodeGen::ClearHashFns() { // ret i32 %12 // } llvm::Function* LlvmCodeGen::GetHashFunction(int num_bytes) { - if (base::IsAarch64() || IsCPUFeatureEnabled(CpuInfo::SSE4_2)) { + if (IS_AARCH64 || IsCPUFeatureEnabled(CpuInfo::SSE4_2)) { if (num_bytes == -1) { // -1 indicates variable length, just return the generic loop based // hash fn. diff --git a/be/src/gutil/sysinfo.cc b/be/src/gutil/sysinfo.cc index adc755440..01f259737 100644 --- a/be/src/gutil/sysinfo.cc +++ b/be/src/gutil/sysinfo.cc @@ -469,12 +469,4 @@ int MaxCPUIndex(void) { return cpuinfo_max_cpu_index; } -bool IsAarch64(void) { -#ifdef __aarch64__ - return true; -#else - return false; -#endif -} - } // namespace base diff --git a/be/src/gutil/sysinfo.h b/be/src/gutil/sysinfo.h index df37b720a..d46cfe550 100644 --- a/be/src/gutil/sysinfo.h +++ b/be/src/gutil/sysinfo.h @@ -65,8 +65,5 @@ extern double CyclesPerSecond(void); // Exposed for testing. extern int ParseMaxCpuIndex(const char* str); -// Return current platform is aarch64 or not -extern bool IsAarch64(); - } // namespace base #endif /* #ifndef _SYSINFO_H_ */ diff --git a/be/src/util/bit-util-test.cc b/be/src/util/bit-util-test.cc index f602603ba..d1707a8d4 100644 --- a/be/src/util/bit-util-test.cc +++ b/be/src/util/bit-util-test.cc @@ -27,7 +27,6 @@ #include "runtime/multi-precision.h" #include "testutil/gtest-util.h" -#include "gutil/sysinfo.h" #include "util/arithmetic-util.h" #include "util/bit-util.h" #include "util/cpu-info.h" @@ -135,7 +134,7 @@ TEST(BitUtil, TrailingBits) { void TestByteSwapSimd_Unit(const int64_t CpuFlag) { void (*bswap_fptr)(const uint8_t* src, uint8_t* dst) = NULL; int buf_size = 0; - if (base::IsAarch64() || CpuFlag == CpuInfo::SSSE3) { + if (IS_AARCH64 || CpuFlag == CpuInfo::SSSE3) { buf_size = 16; bswap_fptr = SimdByteSwap::ByteSwap128; } else { @@ -180,7 +179,7 @@ void TestByteSwapSimd(const int64_t CpuFlag, const int buf_size) { std::iota(src_buf, src_buf + buf_size, 0); int start_size = 0; - if (base::IsAarch64() || CpuFlag == CpuInfo::SSSE3) { + if (IS_AARCH64 || CpuFlag == CpuInfo::SSSE3) { start_size = 16; } else if (CpuFlag == CpuInfo::AVX2) { start_size = 32; @@ -189,7 +188,7 @@ void TestByteSwapSimd(const int64_t CpuFlag, const int buf_size) { for (int i = start_size; i < buf_size; ++i) { // Initialize dst buffer and swap i bytes. memset(dst_buf, 0, buf_size); - if (base::IsAarch64() || CpuFlag == CpuInfo::SSSE3) { + if (IS_AARCH64 || CpuFlag == CpuInfo::SSSE3) { SimdByteSwap::ByteSwapSimd<16>(src_buf, i, dst_buf); } else if (CpuFlag == CpuInfo::AVX2) { SimdByteSwap::ByteSwapSimd<32>(src_buf, i, dst_buf); diff --git a/be/src/util/cpu-info.h b/be/src/util/cpu-info.h index 7b8cdb830..f3154b110 100644 --- a/be/src/util/cpu-info.h +++ b/be/src/util/cpu-info.h @@ -27,6 +27,12 @@ namespace impala { +#ifdef __aarch64__ +#define IS_AARCH64 true +#else +#define IS_AARCH64 false +#endif + /// CpuInfo is an interface to query for cpu information at runtime. The caller can /// ask for the sizes of the caches and what hardware features are supported. /// On Linux, this information is pulled from a couple of sys files (/proc/cpuinfo and diff --git a/be/src/util/hash-util.h b/be/src/util/hash-util.h index 714cbe376..bb60997ae 100644 --- a/be/src/util/hash-util.h +++ b/be/src/util/hash-util.h @@ -21,7 +21,6 @@ #include "common/logging.h" #include "common/compiler-util.h" -#include "gutil/sysinfo.h" #include "util/cpu-info.h" #include "util/sse-util.h" @@ -39,7 +38,7 @@ class HashUtil { /// The resulting hashes are correlated. /// TODO: update this to also use SSE4_crc32_u64 and SSE4_crc32_u16 where appropriate. static uint32_t CrcHash(const void* data, int32_t bytes, uint32_t hash) { - DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64()); + DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64); uint32_t words = bytes / sizeof(uint32_t); bytes = bytes % sizeof(uint32_t); @@ -63,7 +62,7 @@ class HashUtil { /// CrcHash() specialized for 1-byte data static inline uint32_t CrcHash1(const void* v, uint32_t hash) { - DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64()); + DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64); const uint8_t* s = reinterpret_cast<const uint8_t*>(v); hash = SSE4_crc32_u8(hash, *s); hash = (hash << 16) | (hash >> 16); @@ -72,7 +71,7 @@ class HashUtil { /// CrcHash() specialized for 2-byte data static inline uint32_t CrcHash2(const void* v, uint32_t hash) { - DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64()); + DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64); const uint16_t* s = reinterpret_cast<const uint16_t*>(v); hash = SSE4_crc32_u16(hash, *s); hash = (hash << 16) | (hash >> 16); @@ -81,7 +80,7 @@ class HashUtil { /// CrcHash() specialized for 4-byte data static inline uint32_t CrcHash4(const void* v, uint32_t hash) { - DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64()); + DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64); const uint32_t* p = reinterpret_cast<const uint32_t*>(v); hash = SSE4_crc32_u32(hash, *p); hash = (hash << 16) | (hash >> 16); @@ -90,7 +89,7 @@ class HashUtil { /// CrcHash() specialized for 8-byte data static inline uint32_t CrcHash8(const void* v, uint32_t hash) { - DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64()); + DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64); const uint64_t* p = reinterpret_cast<const uint64_t*>(v); hash = SSE4_crc32_u64(hash, *p); hash = (hash << 16) | (hash >> 16); @@ -99,7 +98,7 @@ class HashUtil { /// CrcHash() specialized for 12-byte data static inline uint32_t CrcHash12(const void* v, uint32_t hash) { - DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64()); + DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64); const uint64_t* p = reinterpret_cast<const uint64_t*>(v); hash = SSE4_crc32_u64(hash, *p); ++p; @@ -110,7 +109,7 @@ class HashUtil { /// CrcHash() specialized for 16-byte data static inline uint32_t CrcHash16(const void* v, uint32_t hash) { - DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64()); + DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64); const uint64_t* p = reinterpret_cast<const uint64_t*>(v); hash = SSE4_crc32_u64(hash, *p); ++p; @@ -202,7 +201,7 @@ class HashUtil { /// Seed values for different steps of the query execution should use different seeds /// to prevent accidental key collisions. (See IMPALA-219 for more details). static uint32_t Hash(const void* data, int32_t bytes, uint32_t seed) { - if (base::IsAarch64() || LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) { + if (IS_AARCH64 || LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) { return CrcHash(data, bytes, seed); } else { return MurmurHash2_64(data, bytes, seed);
