Author: Sarah Spall Date: 2024-11-06T07:31:39-08:00 New Revision: fb90733e196039b0a77f43af98c42c9267a31e07
URL: https://github.com/llvm/llvm-project/commit/fb90733e196039b0a77f43af98c42c9267a31e07 DIFF: https://github.com/llvm/llvm-project/commit/fb90733e196039b0a77f43af98c42c9267a31e07.diff LOG: [HLSL] implement elementwise firstbithigh hlsl builtin (#111082) Implements elementwise firstbithigh hlsl builtin. Implements firstbituhigh intrinsic for spirv and directx, which handles unsigned integers Implements firstbitshigh intrinsic for spirv and directx, which handles signed integers. Fixes #113486 Closes #99115 Added: clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl llvm/test/CodeGen/DirectX/firstbithigh.ll llvm/test/CodeGen/DirectX/firstbitshigh_error.ll llvm/test/CodeGen/DirectX/firstbituhigh_error.ll llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll Modified: clang/include/clang/Basic/Builtins.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/CodeGen/CGHLSLRuntime.h clang/lib/Headers/hlsl/hlsl_intrinsics.h clang/lib/Sema/SemaHLSL.cpp llvm/include/llvm/IR/IntrinsicsDirectX.td llvm/include/llvm/IR/IntrinsicsSPIRV.td llvm/lib/Target/DirectX/DXIL.td llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index f5d45fa7b90a40..87a798183d6e19 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4798,6 +4798,12 @@ def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> { let Prototype = "int(unsigned int, unsigned int, int)"; } +def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_firstbithigh"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + def HLSLFrac : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_elementwise_frac"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 82770a75af23e4..5c3df5124517d6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18773,6 +18773,15 @@ static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) { return RT.getUDotIntrinsic(); } +Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) { + if (QT->hasSignedIntegerRepresentation()) { + return RT.getFirstBitSHighIntrinsic(); + } + + assert(QT->hasUnsignedIntegerRepresentation()); + return RT.getFirstBitUHighIntrinsic(); +} + Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -18872,6 +18881,15 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr, "hlsl.dot4add.i8packed"); } + case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: { + + Value *X = EmitScalarExpr(E->getArg(0)); + + return Builder.CreateIntrinsic( + /*ReturnType=*/ConvertType(E->getType()), + getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()), + ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh"); + } case Builtin::BI__builtin_hlsl_lerp: { Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 5e09ab694a79b8..ff810cc535c087 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -92,6 +92,8 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane) + GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh) + GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh) GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 6cd93c5e9a592d..12d3aa418449c0 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -1008,6 +1008,78 @@ float3 exp2(float3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) float4 exp2(float4); +//===----------------------------------------------------------------------===// +// firstbithigh builtins +//===----------------------------------------------------------------------===// + +/// \fn T firstbithigh(T Val) +/// \brief Returns the location of the first set bit starting from the highest +/// order bit and working downward, per component. +/// \param Val the input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(uint16_t4); +#endif + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint firstbithigh(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint2 firstbithigh(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint3 firstbithigh(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh) +uint4 firstbithigh(uint64_t4); + //===----------------------------------------------------------------------===// // floor builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 298b7ad4f9e687..65b0d9cd65637f 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1947,6 +1947,31 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } + case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: { + if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall)) + return true; + + const Expr *Arg = TheCall->getArg(0); + QualType ArgTy = Arg->getType(); + QualType EltTy = ArgTy; + + QualType ResTy = SemaRef.Context.UnsignedIntTy; + + if (auto *VecTy = EltTy->getAs<VectorType>()) { + EltTy = VecTy->getElementType(); + ResTy = SemaRef.Context.getVectorType(ResTy, VecTy->getNumElements(), + VecTy->getVectorKind()); + } + + if (!EltTy->isIntegerType()) { + Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 1 << /* integer ty */ 6 << ArgTy; + return true; + } + + TheCall->setType(ResTy); + break; + } case Builtin::BI__builtin_hlsl_select: { if (SemaRef.checkArgCount(TheCall, 3)) return true; diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl new file mode 100644 index 00000000000000..debf6b6d3e3f5a --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl @@ -0,0 +1,153 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s -DTARGET=spv + +#ifdef __HLSL_ENABLE_16_BIT +// CHECK-LABEL: test_firstbithigh_ushort +// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i16 +uint test_firstbithigh_ushort(uint16_t p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_ushort2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16 +uint2 test_firstbithigh_ushort2(uint16_t2 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_ushort3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16 +uint3 test_firstbithigh_ushort3(uint16_t3 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_ushort4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16 +uint4 test_firstbithigh_ushort4(uint16_t4 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_short +// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i16 +uint test_firstbithigh_short(int16_t p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_short2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16 +uint2 test_firstbithigh_short2(int16_t2 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_short3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16 +uint3 test_firstbithigh_short3(int16_t3 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_short4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16 +uint4 test_firstbithigh_short4(int16_t4 p0) { + return firstbithigh(p0); +} +#endif // __HLSL_ENABLE_16_BIT + +// CHECK-LABEL: test_firstbithigh_uint +// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32 +uint test_firstbithigh_uint(uint p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_uint2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32 +uint2 test_firstbithigh_uint2(uint2 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_uint3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32 +uint3 test_firstbithigh_uint3(uint3 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_uint4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32 +uint4 test_firstbithigh_uint4(uint4 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_ulong +// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i64 +uint test_firstbithigh_ulong(uint64_t p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_ulong2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64 +uint2 test_firstbithigh_ulong2(uint64_t2 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_ulong3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64 +uint3 test_firstbithigh_ulong3(uint64_t3 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_ulong4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64 +uint4 test_firstbithigh_ulong4(uint64_t4 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_int +// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32 +uint test_firstbithigh_int(int p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_int2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32 +uint2 test_firstbithigh_int2(int2 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_int3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32 +uint3 test_firstbithigh_int3(int3 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_int4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32 +uint4 test_firstbithigh_int4(int4 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_long +// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i64 +uint test_firstbithigh_long(int64_t p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_long2 +// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64 +uint2 test_firstbithigh_long2(int64_t2 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_long3 +// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64 +uint3 test_firstbithigh_long3(int64_t3 p0) { + return firstbithigh(p0); +} + +// CHECK-LABEL: test_firstbithigh_long4 +// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64 +uint4 test_firstbithigh_long4(int64_t4 p0) { + return firstbithigh(p0); +} diff --git a/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl new file mode 100644 index 00000000000000..1912ab3ae806b3 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected + +int test_too_few_arg() { + return firstbithigh(); + // expected-error@-1 {{no matching function for call to 'firstbithigh'}} +} + +int test_too_many_arg(int p0) { + return firstbithigh(p0, p0); + // expected-error@-1 {{no matching function for call to 'firstbithigh'}} +} + +double test_int_builtin(double p0) { + return firstbithigh(p0); + // expected-error@-1 {{call to 'firstbithigh' is ambiguous}} +} + +double2 test_int_builtin_2(double2 p0) { + return __builtin_hlsl_elementwise_firstbithigh(p0); + // expected-error@-1 {{1st argument must be a vector of integers + // (was 'double2' (aka 'vector<double, 2>'))}} +} + +float test_int_builtin_3(float p0) { + return __builtin_hlsl_elementwise_firstbithigh(p0); + // expected-error@-1 {{1st argument must be a vector of integers + // (was 'float')}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 8cd5ff9006c1b7..c3a935c39ddc30 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -93,4 +93,6 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; +def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; +def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 72fee94908db72..629d6759dd65fd 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -99,4 +99,6 @@ let TargetPrefix = "spv" in { [llvm_any_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem]>; + def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; + def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; } diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index aa372b4e31f4b9..aab94e95a40ff9 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -564,6 +564,30 @@ def CountBits : DXILOp<31, unaryBits> { let attributes = [Attributes<DXIL1_0, [ReadNone]>]; } +def FirstbitHi : DXILOp<33, unaryBits> { + let Doc = "Returns the location of the first set bit starting from " + "the highest order bit and working downward."; + let LLVMIntrinsic = int_dx_firstbituhigh; + let arguments = [OverloadTy]; + let result = Int32Ty; + let overloads = + [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>]; + let stages = [Stages<DXIL1_0, [all_stages]>]; + let attributes = [Attributes<DXIL1_0, [ReadNone]>]; +} + +def FirstbitSHi : DXILOp<34, unaryBits> { + let Doc = "Returns the location of the first set bit from " + "the highest order bit based on the sign."; + let LLVMIntrinsic = int_dx_firstbitshigh; + let arguments = [OverloadTy]; + let result = Int32Ty; + let overloads = + [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>]; + let stages = [Stages<DXIL1_0, [all_stages]>]; + let attributes = [Attributes<DXIL1_0, [ReadNone]>]; +} + def FMax : DXILOp<35, binary> { let Doc = "Float maximum. FMax(a,b) = a > b ? a : b"; let LLVMIntrinsic = int_maxnum; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 231afd8ae3eeaf..b0436a39423405 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -32,6 +32,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_rsqrt: case Intrinsic::dx_wave_readlane: case Intrinsic::dx_splitdouble: + case Intrinsic::dx_firstbituhigh: + case Intrinsic::dx_firstbitshigh: return true; default: return false; diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index af0df4d6e5d563..f66506beaa6ed6 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -424,7 +424,7 @@ Register SPIRVGlobalRegistry::getOrCreateCompositeOrNull( LLT LLTy = LLT::scalar(64); Register SpvVecConst = CurMF->getRegInfo().createGenericVirtualRegister(LLTy); - CurMF->getRegInfo().setRegClass(SpvVecConst, &SPIRV::iIDRegClass); + CurMF->getRegInfo().setRegClass(SpvVecConst, getRegClass(SpvType)); assignSPIRVTypeToVReg(SpvType, SpvVecConst, *CurMF); DT.add(CA, CurMF, SpvVecConst); MachineInstrBuilder MIB; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 7aa5f4f2b1a8f1..be38b22f70c583 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -92,9 +92,26 @@ class SPIRVInstructionSelector : public InstructionSelector { bool spvSelect(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectFirstBitHigh(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, bool IsSigned) const; + + bool selectFirstBitHigh16(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, bool IsSigned) const; + + bool selectFirstBitHigh32(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, Register SrcReg, + bool IsSigned) const; + + bool selectFirstBitHigh64(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, bool IsSigned) const; + bool selectGlobalValue(Register ResVReg, MachineInstr &I, const MachineInstr *Init = nullptr) const; + bool selectNAryOpWithSrcs(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, std::vector<Register> SrcRegs, + unsigned Opcode) const; + bool selectUnOpWithSrc(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, Register SrcReg, unsigned Opcode) const; @@ -220,6 +237,8 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectPhi(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectExtInst(Register ResVReg, const SPIRVType *RestType, + MachineInstr &I, GL::GLSLExtInst GLInst) const; bool selectExtInst(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, CL::OpenCLExtInst CLInst) const; bool selectExtInst(Register ResVReg, const SPIRVType *ResType, @@ -769,6 +788,14 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg, } } +bool SPIRVInstructionSelector::selectExtInst(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + GL::GLSLExtInst GLInst) const { + return selectExtInst(ResVReg, ResType, I, + {{SPIRV::InstructionSet::GLSL_std_450, GLInst}}); +} + bool SPIRVInstructionSelector::selectExtInst(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, @@ -816,6 +843,20 @@ bool SPIRVInstructionSelector::selectExtInst(Register ResVReg, return false; } +bool SPIRVInstructionSelector::selectNAryOpWithSrcs(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + std::vector<Register> Srcs, + unsigned Opcode) const { + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)); + for (Register SReg : Srcs) { + MIB.addUse(SReg); + } + return MIB.constrainAllUses(TII, TRI, RBI); +} + bool SPIRVInstructionSelector::selectUnOpWithSrc(Register ResVReg, const SPIRVType *ResType, MachineInstr &I, @@ -2666,6 +2707,10 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_firstbituhigh: // There is no CL equivalent of FindUMsb + return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/false); + case Intrinsic::spv_firstbitshigh: // There is no CL equivalent of FindSMsb + return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/true); case Intrinsic::spv_group_memory_barrier_with_group_sync: { Register MemSemReg = buildI32Constant(SPIRV::MemorySemantics::SequentiallyConsistent, I); @@ -2786,6 +2831,160 @@ Register SPIRVInstructionSelector::buildPointerToResource( return AcReg; } +bool SPIRVInstructionSelector::selectFirstBitHigh16(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + bool IsSigned) const { + unsigned Opcode = IsSigned ? SPIRV::OpSConvert : SPIRV::OpUConvert; + // zero or sign extend + Register ExtReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); + bool Result = + selectUnOpWithSrc(ExtReg, ResType, I, I.getOperand(2).getReg(), Opcode); + return Result && selectFirstBitHigh32(ResVReg, ResType, I, ExtReg, IsSigned); +} + +bool SPIRVInstructionSelector::selectFirstBitHigh32(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + Register SrcReg, + bool IsSigned) const { + unsigned Opcode = IsSigned ? GL::FindSMsb : GL::FindUMsb; + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450)) + .addImm(Opcode) + .addUse(SrcReg) + .constrainAllUses(TII, TRI, RBI); +} + +bool SPIRVInstructionSelector::selectFirstBitHigh64(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + bool IsSigned) const { + Register OpReg = I.getOperand(2).getReg(); + // 1. split our int64 into 2 pieces using a bitcast + unsigned count = GR.getScalarOrVectorComponentCount(ResType); + SPIRVType *baseType = GR.retrieveScalarOrVectorIntType(ResType); + MachineIRBuilder MIRBuilder(I); + SPIRVType *postCastT = + GR.getOrCreateSPIRVVectorType(baseType, 2 * count, MIRBuilder); + Register bitcastReg = MRI->createVirtualRegister(GR.getRegClass(postCastT)); + bool Result = + selectUnOpWithSrc(bitcastReg, postCastT, I, OpReg, SPIRV::OpBitcast); + + // 2. call firstbithigh + Register FBHReg = MRI->createVirtualRegister(GR.getRegClass(postCastT)); + Result &= selectFirstBitHigh32(FBHReg, postCastT, I, bitcastReg, IsSigned); + + // 3. split result vector into high bits and low bits + Register HighReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); + Register LowReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); + + bool ZeroAsNull = STI.isOpenCLEnv(); + bool isScalarRes = ResType->getOpcode() != SPIRV::OpTypeVector; + if (isScalarRes) { + // if scalar do a vector extract + Result &= selectNAryOpWithSrcs( + HighReg, ResType, I, + {FBHReg, GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull)}, + SPIRV::OpVectorExtractDynamic); + Result &= selectNAryOpWithSrcs( + LowReg, ResType, I, + {FBHReg, GR.getOrCreateConstInt(1, I, ResType, TII, ZeroAsNull)}, + SPIRV::OpVectorExtractDynamic); + } else { // vector case do a shufflevector + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(SPIRV::OpVectorShuffle)) + .addDef(HighReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(FBHReg) + .addUse(FBHReg); + // ^^ this vector will not be selected from; could be empty + unsigned j; + for (j = 0; j < count * 2; j += 2) { + MIB.addImm(j); + } + Result &= MIB.constrainAllUses(TII, TRI, RBI); + + // get low bits + MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(SPIRV::OpVectorShuffle)) + .addDef(LowReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(FBHReg) + .addUse(FBHReg); + // ^^ this vector will not be selected from; could be empty + for (j = 1; j < count * 2; j += 2) { + MIB.addImm(j); + } + Result &= MIB.constrainAllUses(TII, TRI, RBI); + } + + // 4. check if result of each top 32 bits is == -1 + SPIRVType *BoolType = GR.getOrCreateSPIRVBoolType(I, TII); + Register NegOneReg; + Register Reg0; + Register Reg32; + unsigned selectOp; + unsigned addOp; + if (isScalarRes) { + NegOneReg = + GR.getOrCreateConstInt((unsigned)-1, I, ResType, TII, ZeroAsNull); + Reg0 = GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull); + Reg32 = GR.getOrCreateConstInt(32, I, ResType, TII, ZeroAsNull); + selectOp = SPIRV::OpSelectSISCond; + addOp = SPIRV::OpIAddS; + } else { + BoolType = GR.getOrCreateSPIRVVectorType(BoolType, count, MIRBuilder); + NegOneReg = + GR.getOrCreateConstVector((unsigned)-1, I, ResType, TII, ZeroAsNull); + Reg0 = GR.getOrCreateConstVector(0, I, ResType, TII, ZeroAsNull); + Reg32 = GR.getOrCreateConstVector(32, I, ResType, TII, ZeroAsNull); + selectOp = SPIRV::OpSelectVIVCond; + addOp = SPIRV::OpIAddV; + } + + // check if the high bits are == -1; true if -1 + Register BReg = MRI->createVirtualRegister(GR.getRegClass(BoolType)); + Result &= selectNAryOpWithSrcs(BReg, BoolType, I, {HighReg, NegOneReg}, + SPIRV::OpIEqual); + + // Select low bits if true in BReg, otherwise high bits + Register TmpReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); + Result &= selectNAryOpWithSrcs(TmpReg, ResType, I, {BReg, LowReg, HighReg}, + selectOp); + + // Add 32 for high bits, 0 for low bits + Register ValReg = MRI->createVirtualRegister(GR.getRegClass(ResType)); + Result &= + selectNAryOpWithSrcs(ValReg, ResType, I, {BReg, Reg0, Reg32}, selectOp); + + return Result && + selectNAryOpWithSrcs(ResVReg, ResType, I, {ValReg, TmpReg}, addOp); +} + +bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + bool IsSigned) const { + // FindUMsb and FindSMsb intrinsics only support 32 bit integers + Register OpReg = I.getOperand(2).getReg(); + SPIRVType *OpType = GR.getSPIRVTypeForVReg(OpReg); + + switch (GR.getScalarOrVectorBitWidth(OpType)) { + case 16: + return selectFirstBitHigh16(ResVReg, ResType, I, IsSigned); + case 32: + return selectFirstBitHigh32(ResVReg, ResType, I, OpReg, IsSigned); + case 64: + return selectFirstBitHigh64(ResVReg, ResType, I, IsSigned); + default: + report_fatal_error( + "spv_firstbituhigh and spv_firstbitshigh only support 16,32,64 bits."); + } +} + bool SPIRVInstructionSelector::selectAllocaArray(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { diff --git a/llvm/test/CodeGen/DirectX/firstbithigh.ll b/llvm/test/CodeGen/DirectX/firstbithigh.ll new file mode 100644 index 00000000000000..5584c433fb6f0e --- /dev/null +++ b/llvm/test/CodeGen/DirectX/firstbithigh.ll @@ -0,0 +1,91 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for firstbithigh are generated for all integer types. + +define noundef i32 @test_firstbithigh_ushort(i16 noundef %a) { +entry: +; CHECK: call i32 @dx.op.unaryBits.i16(i32 33, i16 %{{.*}}) + %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i16(i16 %a) + ret i32 %elt.firstbithigh +} + +define noundef i32 @test_firstbithigh_short(i16 noundef %a) { +entry: +; CHECK: call i32 @dx.op.unaryBits.i16(i32 34, i16 %{{.*}}) + %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i16(i16 %a) + ret i32 %elt.firstbithigh +} + +define noundef i32 @test_firstbithigh_uint(i32 noundef %a) { +entry: +; CHECK: call i32 @dx.op.unaryBits.i32(i32 33, i32 %{{.*}}) + %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i32(i32 %a) + ret i32 %elt.firstbithigh +} + +define noundef i32 @test_firstbithigh_int(i32 noundef %a) { +entry: +; CHECK: call i32 @dx.op.unaryBits.i32(i32 34, i32 %{{.*}}) + %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i32(i32 %a) + ret i32 %elt.firstbithigh +} + +define noundef i32 @test_firstbithigh_ulong(i64 noundef %a) { +entry: +; CHECK: call i32 @dx.op.unaryBits.i64(i32 33, i64 %{{.*}}) + %elt.firstbithigh = call i32 @llvm.dx.firstbituhigh.i64(i64 %a) + ret i32 %elt.firstbithigh +} + +define noundef i32 @test_firstbithigh_long(i64 noundef %a) { +entry: +; CHECK: call i32 @dx.op.unaryBits.i64(i32 34, i64 %{{.*}}) + %elt.firstbithigh = call i32 @llvm.dx.firstbitshigh.i64(i64 %a) + ret i32 %elt.firstbithigh +} + +define noundef <4 x i32> @test_firstbituhigh_vec4_i32(<4 x i32> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 33, i32 [[ee3]]) + ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3 + %2 = call <4 x i32> @llvm.dx.firstbituhigh.v4i32(<4 x i32> %a) + ret <4 x i32> %2 +} + +define noundef <4 x i32> @test_firstbitshigh_vec4_i32(<4 x i32> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 34, i32 [[ee3]]) + ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3 + %2 = call <4 x i32> @llvm.dx.firstbitshigh.v4i32(<4 x i32> %a) + ret <4 x i32> %2 +} + +declare i32 @llvm.dx.firstbituhigh.i16(i16) +declare i32 @llvm.dx.firstbituhigh.i32(i32) +declare i32 @llvm.dx.firstbituhigh.i64(i64) +declare <4 x i32> @llvm.dx.firstbituhigh.v4i32(<4 x i32>) + +declare i32 @llvm.dx.firstbitshigh.i16(i16) +declare i32 @llvm.dx.firstbitshigh.i32(i32) +declare i32 @llvm.dx.firstbitshigh.i64(i64) +declare <4 x i32> @llvm.dx.firstbitshigh.v4i32(<4 x i32>) diff --git a/llvm/test/CodeGen/DirectX/firstbitshigh_error.ll b/llvm/test/CodeGen/DirectX/firstbitshigh_error.ll new file mode 100644 index 00000000000000..22982a03e47921 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/firstbitshigh_error.ll @@ -0,0 +1,10 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation firstbitshigh does not support double overload type +; CHECK: invalid intrinsic signature + +define noundef double @firstbitshigh_double(double noundef %a) { +entry: + %1 = call double @llvm.dx.firstbitshigh.f64(double %a) + ret double %1 +} diff --git a/llvm/test/CodeGen/DirectX/firstbituhigh_error.ll b/llvm/test/CodeGen/DirectX/firstbituhigh_error.ll new file mode 100644 index 00000000000000..b611a96ffc2f9c --- /dev/null +++ b/llvm/test/CodeGen/DirectX/firstbituhigh_error.ll @@ -0,0 +1,10 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation firstbituhigh does not support double overload type +; CHECK: invalid intrinsic signature + +define noundef double @firstbituhigh_double(double noundef %a) { +entry: + %1 = call double @llvm.dx.firstbituhigh.f64(double %a) + ret double %1 +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll new file mode 100644 index 00000000000000..3d35e102310f50 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/firstbithigh.ll @@ -0,0 +1,107 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpMemoryModel Logical GLSL450 +; CHECK-DAG: [[Z:%.*]] = OpConstant %[[#]] 0 +; CHECK-DAG: [[X:%.*]] = OpConstant %[[#]] 1 + +define noundef i32 @firstbituhigh_i32(i32 noundef %a) { +entry: +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb %[[#]] + %elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i32(i32 %a) + ret i32 %elt.firstbituhigh +} + +define noundef <2 x i32> @firstbituhigh_2xi32(<2 x i32> noundef %a) { +entry: +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb %[[#]] + %elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i32(<2 x i32> %a) + ret <2 x i32> %elt.firstbituhigh +} + +define noundef i32 @firstbituhigh_i16(i16 noundef %a) { +entry: +; CHECK: [[A:%.*]] = OpUConvert %[[#]] +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb [[A]] + %elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i16(i16 %a) + ret i32 %elt.firstbituhigh +} + +define noundef <2 x i32> @firstbituhigh_v2i16(<2 x i16> noundef %a) { +entry: +; CHECK: [[A:%.*]] = OpUConvert %[[#]] +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindUMsb [[A]] + %elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i16(<2 x i16> %a) + ret <2 x i32> %elt.firstbituhigh +} + +define noundef i32 @firstbituhigh_i64(i64 noundef %a) { +entry: +; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]] +; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindUMsb [[O]] +; CHECK: [[M:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[Z]] +; CHECK: [[L:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[X]] +; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]] +; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]] +; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]] +; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]] + %elt.firstbituhigh = call i32 @llvm.spv.firstbituhigh.i64(i64 %a) + ret i32 %elt.firstbituhigh +} + +define noundef <2 x i32> @firstbituhigh_v2i64(<2 x i64> noundef %a) { +entry: +; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]] +; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindUMsb [[O]] +; CHECK: [[M:%.*]] = OpVectorShuffle %[[#]] [[N]] [[N]] 0 +; CHECK: [[L:%.*]] = OpVectorShuffle %[[#]] [[N]] [[N]] 1 +; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]] +; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]] +; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]] +; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]] +; CHECK: OpReturnValue [[B]] + %elt.firstbituhigh = call <2 x i32> @llvm.spv.firstbituhigh.v2i64(<2 x i64> %a) + ret <2 x i32> %elt.firstbituhigh +} + +define noundef i32 @firstbitshigh_i32(i32 noundef %a) { +entry: +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindSMsb %[[#]] + %elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i32(i32 %a) + ret i32 %elt.firstbitshigh +} + +define noundef i32 @firstbitshigh_i16(i16 noundef %a) { +entry: +; CHECK: [[A:%.*]] = OpSConvert %[[#]] +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FindSMsb %[[#]] + %elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i16(i16 %a) + ret i32 %elt.firstbitshigh +} + +define noundef i32 @firstbitshigh_i64(i64 noundef %a) { +entry: +; CHECK: [[O:%.*]] = OpBitcast %[[#]] %[[#]] +; CHECK: [[N:%.*]] = OpExtInst %[[#]] %[[#]] FindSMsb [[O]] +; CHECK: [[M:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[Z]] +; CHECK: [[L:%.*]] = OpVectorExtractDynamic %[[#]] [[N]] [[X]] +; CHECK: [[I:%.*]] = OpIEqual %[[#]] [[M]] %[[#]] +; CHECK: [[H:%.*]] = OpSelect %[[#]] [[I]] [[L]] [[M]] +; CHECK: [[C:%.*]] = OpSelect %[[#]] [[I]] %[[#]] %[[#]] +; CHECK: [[B:%.*]] = OpIAdd %[[#]] [[C]] [[H]] + %elt.firstbitshigh = call i32 @llvm.spv.firstbitshigh.i64(i64 %a) + ret i32 %elt.firstbitshigh +} + +;declare i16 @llvm.spv.firstbituhigh.i16(i16) +;declare i32 @llvm.spv.firstbituhigh.i32(i32) +;declare i64 @llvm.spv.firstbituhigh.i64(i64) +;declare i16 @llvm.spv.firstbituhigh.v2i16(<2 x i16>) +;declare i32 @llvm.spv.firstbituhigh.v2i32(<2 x i32>) +;declare i64 @llvm.spv.firstbituhigh.v2i64(<2 x i64>) +;declare i16 @llvm.spv.firstbitshigh.i16(i16) +;declare i32 @llvm.spv.firstbitshigh.i32(i32) +;declare i64 @llvm.spv.firstbitshigh.i64(i64) +;declare i16 @llvm.spv.firstbitshigh.v2i16(<2 x i16>) +;declare i32 @llvm.spv.firstbitshigh.v2i32(<2 x i32>) +;declare i64 @llvm.spv.firstbitshigh.v2i64(<2 x i64>) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits