kazu updated this revision to Diff 490348. kazu added a comment. Updated the patch to leave findFirstSet and findLastSet untouched.
Added a comment about the behavior on 0/max input. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D141798/new/ https://reviews.llvm.org/D141798 Files: llvm/include/llvm/Support/MathExtras.h llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp llvm/lib/Transforms/IPO/LowerTypeTests.cpp llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp mlir/lib/Bytecode/Reader/BytecodeReader.cpp
Index: mlir/lib/Bytecode/Reader/BytecodeReader.cpp =================================================================== --- mlir/lib/Bytecode/Reader/BytecodeReader.cpp +++ mlir/lib/Bytecode/Reader/BytecodeReader.cpp @@ -281,8 +281,7 @@ // here because we only care about the first byte, and so that be actually // get ctz intrinsic calls when possible (the `uint8_t` overload uses a loop // implementation). - uint32_t numBytes = - llvm::countTrailingZeros<uint32_t>(result, llvm::ZB_Undefined); + uint32_t numBytes = llvm::countTrailingZeros<uint32_t>(result); assert(numBytes > 0 && numBytes <= 7 && "unexpected number of trailing zeros in varint encoding"); Index: llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp =================================================================== --- llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -259,8 +259,7 @@ if (I < B.size()) BitsUsed |= B[I]; if (BitsUsed != 0xff) - return (MinByte + I) * 8 + - countTrailingZeros(uint8_t(~BitsUsed), ZB_Undefined); + return (MinByte + I) * 8 + countTrailingZeros(uint8_t(~BitsUsed)); } } else { // Find a free (Size/8) byte region in each member of Used. Index: llvm/lib/Transforms/IPO/LowerTypeTests.cpp =================================================================== --- llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -172,7 +172,7 @@ BSI.AlignLog2 = 0; if (Mask != 0) - BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined); + BSI.AlignLog2 = countTrailingZeros(Mask); // Build the compressed bitset while normalizing the offsets against the // computed alignment. Index: llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp =================================================================== --- llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -180,7 +180,7 @@ // or ADDIW. If there are trailing zeros, try generating a sign extended // constant with no trailing zeros and use a final SLLI to restore them. if ((Val & 0xfff) != 0 && (Val & 1) == 0 && Res.size() >= 2) { - unsigned TrailingZeros = countTrailingZeros((uint64_t)Val, ZB_Undefined); + unsigned TrailingZeros = countTrailingZeros((uint64_t)Val); int64_t ShiftedVal = Val >> TrailingZeros; // If we can use C.LI+C.SLLI instead of LUI+ADDI(W) prefer that since // its more compressible. But only if LUI+ADDI(W) isn't fusable. @@ -202,7 +202,7 @@ if (Val > 0 && Res.size() > 2) { assert(ActiveFeatures[RISCV::Feature64Bit] && "Expected RV32 to only need 2 instructions"); - unsigned LeadingZeros = countLeadingZeros((uint64_t)Val, ZB_Undefined); + unsigned LeadingZeros = countLeadingZeros((uint64_t)Val); uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros; // Fill in the bits that will be shifted out with 1s. An example where this // helps is trailing one masks with 32 or more ones. This will generate Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2451,8 +2451,7 @@ unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); if ((PsInputBits & 0x7F) == 0 || ((PsInputBits & 0xF) == 0 && (PsInputBits >> 11 & 1))) - Info->markPSInputEnabled( - countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); + Info->markPSInputEnabled(countTrailingZeros(Info->getPSInputAddr())); } } else if (IsKernel) { assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()); Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -701,8 +701,7 @@ if ((PsInputBits & 0x7F) == 0 || ((PsInputBits & 0xF) == 0 && (PsInputBits >> 11 & 1))) - Info->markPSInputEnabled( - countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); + Info->markPSInputEnabled(countTrailingZeros(Info->getPSInputAddr())); } } Index: llvm/include/llvm/Support/MathExtras.h =================================================================== --- llvm/include/llvm/Support/MathExtras.h +++ llvm/include/llvm/Support/MathExtras.h @@ -41,9 +41,7 @@ /// The returned value is undefined. ZB_Undefined, /// The returned value is numeric_limits<T>::max() - ZB_Max, - /// The returned value is numeric_limits<T>::digits - ZB_Width + ZB_Max }; /// Mathematical constants. @@ -84,7 +82,7 @@ namespace detail { template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter { - static unsigned count(T Val, ZeroBehavior) { + static unsigned count(T Val) { if (!Val) return std::numeric_limits<T>::digits; if (Val & 0x1) @@ -108,8 +106,8 @@ #if defined(__GNUC__) || defined(_MSC_VER) template <typename T> struct TrailingZerosCounter<T, 4> { - static unsigned count(T Val, ZeroBehavior ZB) { - if (ZB != ZB_Undefined && Val == 0) + static unsigned count(T Val) { + if (Val == 0) return 32; #if __has_builtin(__builtin_ctz) || defined(__GNUC__) @@ -124,8 +122,8 @@ #if !defined(_MSC_VER) || defined(_M_X64) template <typename T> struct TrailingZerosCounter<T, 8> { - static unsigned count(T Val, ZeroBehavior ZB) { - if (ZB != ZB_Undefined && Val == 0) + static unsigned count(T Val) { + if (Val == 0) return 64; #if __has_builtin(__builtin_ctzll) || defined(__GNUC__) @@ -146,18 +144,16 @@ /// /// Only unsigned integral types are allowed. /// -/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are -/// valid arguments. -template <typename T> -unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { +/// Returns std::numeric_limits<T>::digits on an input of 0. +template <typename T> unsigned countTrailingZeros(T Val) { static_assert(std::is_unsigned_v<T>, "Only unsigned integral types are allowed."); - return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB); + return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val); } namespace detail { template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter { - static unsigned count(T Val, ZeroBehavior) { + static unsigned count(T Val) { if (!Val) return std::numeric_limits<T>::digits; @@ -176,8 +172,8 @@ #if defined(__GNUC__) || defined(_MSC_VER) template <typename T> struct LeadingZerosCounter<T, 4> { - static unsigned count(T Val, ZeroBehavior ZB) { - if (ZB != ZB_Undefined && Val == 0) + static unsigned count(T Val) { + if (Val == 0) return 32; #if __has_builtin(__builtin_clz) || defined(__GNUC__) @@ -192,8 +188,8 @@ #if !defined(_MSC_VER) || defined(_M_X64) template <typename T> struct LeadingZerosCounter<T, 8> { - static unsigned count(T Val, ZeroBehavior ZB) { - if (ZB != ZB_Undefined && Val == 0) + static unsigned count(T Val) { + if (Val == 0) return 64; #if __has_builtin(__builtin_clzll) || defined(__GNUC__) @@ -214,13 +210,11 @@ /// /// Only unsigned integral types are allowed. /// -/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are -/// valid arguments. -template <typename T> -unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { +/// Returns std::numeric_limits<T>::digits on an input of 0. +template <typename T> unsigned countLeadingZeros(T Val) { static_assert(std::is_unsigned_v<T>, "Only unsigned integral types are allowed."); - return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB); + return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val); } /// Get the index of the first set bit starting from the least @@ -234,7 +228,7 @@ if (ZB == ZB_Max && Val == 0) return std::numeric_limits<T>::max(); - return countTrailingZeros(Val, ZB_Undefined); + return countTrailingZeros(Val); } /// Create a bitmask with the N right-most bits set to 1, and all other @@ -277,8 +271,7 @@ // Use ^ instead of - because both gcc and llvm can remove the associated ^ // in the __builtin_clz intrinsic on x86. - return countLeadingZeros(Val, ZB_Undefined) ^ - (std::numeric_limits<T>::digits - 1); + return countLeadingZeros(Val) ^ (std::numeric_limits<T>::digits - 1); } /// Macro compressed bit reversal table for 256 bits. @@ -470,13 +463,11 @@ /// Ex. countLeadingOnes(0xFF0FFF00) == 8. /// Only unsigned integral types are allowed. /// -/// \param ZB the behavior on an input of all ones. Only ZB_Width and -/// ZB_Undefined are valid arguments. -template <typename T> -unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { +/// Returns std::numeric_limits<T>::digits on an input of all ones. +template <typename T> unsigned countLeadingOnes(T Value) { static_assert(std::is_unsigned_v<T>, "Only unsigned integral types are allowed."); - return countLeadingZeros<T>(~Value, ZB); + return countLeadingZeros<T>(~Value); } /// Count the number of ones from the least significant bit to the first @@ -485,13 +476,11 @@ /// Ex. countTrailingOnes(0x00FF00FF) == 8. /// Only unsigned integral types are allowed. /// -/// \param ZB the behavior on an input of all ones. Only ZB_Width and -/// ZB_Undefined are valid arguments. -template <typename T> -unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { +/// Returns std::numeric_limits<T>::digits on an input of all ones. +template <typename T> unsigned countTrailingOnes(T Value) { static_assert(std::is_unsigned_v<T>, "Only unsigned integral types are allowed."); - return countTrailingZeros<T>(~Value, ZB); + return countTrailingZeros<T>(~Value); } /// Count the number of set bits in a value. @@ -622,7 +611,7 @@ /// Essentially, it is a floor operation across the domain of powers of two. inline uint64_t PowerOf2Floor(uint64_t A) { if (!A) return 0; - return 1ull << (63 - countLeadingZeros(A, ZB_Undefined)); + return 1ull << (63 - countLeadingZeros(A)); } /// Returns the power of two which is greater than or equal to the given value.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits