Author: Farzon Lotfi Date: 2025-09-05T14:02:48-04:00 New Revision: 16661b5d6c0555850f95d34e585a0643f7f92fcb
URL: https://github.com/llvm/llvm-project/commit/16661b5d6c0555850f95d34e585a0643f7f92fcb DIFF: https://github.com/llvm/llvm-project/commit/16661b5d6c0555850f95d34e585a0643f7f92fcb.diff LOG: [DirectX] Add isinf f16 emulation for SM6.8 and lower (#156932) fixes #156068 - We needed to add a new sub arch to the target tripple so we can test that emulation does not happen when targeting SM6.9 - The HLSL toolchain needed to be updated to handle the conversion of strings to enums for the new sub arch. - The emulation is done in DXILIntrinsicExpansion.cpp and needs to be able to convert both llvm.is.fpclass and lvm.dx.isinf to the proper emulation - test updates in TargetParser/TripleTest.cpp, isinf.ll, is_fpclass.ll, and DXCModeTest.cpp Added: Modified: clang/lib/Driver/ToolChains/HLSL.cpp clang/unittests/Driver/DXCModeTest.cpp llvm/include/llvm/TargetParser/Triple.h llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp llvm/lib/TargetParser/Triple.cpp llvm/test/CodeGen/DirectX/is_fpclass.ll llvm/test/CodeGen/DirectX/isinf.ll llvm/unittests/TargetParser/TripleTest.cpp Removed: ################################################################################ diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp b/clang/lib/Driver/ToolChains/HLSL.cpp index 660661945d62a..559af32dc3808 100644 --- a/clang/lib/Driver/ToolChains/HLSL.cpp +++ b/clang/lib/Driver/ToolChains/HLSL.cpp @@ -132,6 +132,9 @@ std::optional<std::string> tryParseProfile(StringRef Profile) { case 8: SubArch = llvm::Triple::DXILSubArch_v1_8; break; + case 9: + SubArch = llvm::Triple::DXILSubArch_v1_9; + break; case OfflineLibMinor: // Always consider minor version x as the latest supported DXIL version SubArch = llvm::Triple::LatestDXILSubArch; diff --git a/clang/unittests/Driver/DXCModeTest.cpp b/clang/unittests/Driver/DXCModeTest.cpp index e7d8137144c6c..62274235c53f5 100644 --- a/clang/unittests/Driver/DXCModeTest.cpp +++ b/clang/unittests/Driver/DXCModeTest.cpp @@ -85,7 +85,7 @@ TEST(DxcModeTest, TargetProfileValidation) { InMemoryFileSystem, Diags); validateTargetProfile("-Tcs_6_8", "dxilv1.8--shadermodel6.8-compute", InMemoryFileSystem, Diags); - validateTargetProfile("-Tlib_6_x", "dxilv1.8--shadermodel6.15-library", + validateTargetProfile("-Tlib_6_x", "dxilv1.9--shadermodel6.15-library", InMemoryFileSystem, Diags); // Invalid tests. diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index f85984ed4f328..8e12c6852075d 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -180,7 +180,8 @@ class Triple { DXILSubArch_v1_6, DXILSubArch_v1_7, DXILSubArch_v1_8, - LatestDXILSubArch = DXILSubArch_v1_8, + DXILSubArch_v1_9, + LatestDXILSubArch = DXILSubArch_v1_9, }; enum VendorType { UnknownVendor, diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index ee1db54446cb8..c613b351d85b6 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -51,6 +51,41 @@ static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy, return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64); } +static Value *expand16BitIsInf(CallInst *Orig) { + Module *M = Orig->getModule(); + if (M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9)) + return nullptr; + + Value *Val = Orig->getOperand(0); + Type *ValTy = Val->getType(); + if (!ValTy->getScalarType()->isHalfTy()) + return nullptr; + + IRBuilder<> Builder(Orig); + Type *IType = Type::getInt16Ty(M->getContext()); + Constant *PosInf = + ValTy->isVectorTy() + ? ConstantVector::getSplat( + ElementCount::getFixed( + cast<FixedVectorType>(ValTy)->getNumElements()), + ConstantInt::get(IType, 0x7c00)) + : ConstantInt::get(IType, 0x7c00); + + Constant *NegInf = + ValTy->isVectorTy() + ? ConstantVector::getSplat( + ElementCount::getFixed( + cast<FixedVectorType>(ValTy)->getNumElements()), + ConstantInt::get(IType, 0xfc00)) + : ConstantInt::get(IType, 0xfc00); + + Value *IVal = Builder.CreateBitCast(Val, PosInf->getType()); + Value *B1 = Builder.CreateICmpEQ(IVal, PosInf); + Value *B2 = Builder.CreateICmpEQ(IVal, NegInf); + Value *B3 = Builder.CreateOr(B1, B2); + return B3; +} + static bool isIntrinsicExpansion(Function &F) { switch (F.getIntrinsicID()) { case Intrinsic::abs: @@ -68,6 +103,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::dx_sclamp: case Intrinsic::dx_nclamp: case Intrinsic::dx_degrees: + case Intrinsic::dx_isinf: case Intrinsic::dx_lerp: case Intrinsic::dx_normalize: case Intrinsic::dx_fdot: @@ -301,9 +337,10 @@ static Value *expandIsFPClass(CallInst *Orig) { auto *TCI = dyn_cast<ConstantInt>(T); // These FPClassTest cases have DXIL opcodes, so they will be handled in - // DXIL Op Lowering instead. + // DXIL Op Lowering instead for all non f16 cases. switch (TCI->getZExtValue()) { case FPClassTest::fcInf: + return expand16BitIsInf(Orig); case FPClassTest::fcNan: case FPClassTest::fcNormal: case FPClassTest::fcFinite: @@ -873,6 +910,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::dx_degrees: Result = expandDegreesIntrinsic(Orig); break; + case Intrinsic::dx_isinf: + Result = expand16BitIsInf(Orig); + break; case Intrinsic::dx_lerp: Result = expandLerpIntrinsic(Orig); break; diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index ec15f235b8624..71517e5e9e832 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -158,6 +158,8 @@ StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) { return "dxilv1.7"; case Triple::DXILSubArch_v1_8: return "dxilv1.8"; + case Triple::DXILSubArch_v1_9: + return "dxilv1.9"; default: break; } @@ -650,6 +652,8 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Cases("dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3", "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7", "dxilv1.8", Triple::dxil) + // Note: Cases has max limit of 10. + .Case("dxilv1.9", Triple::dxil) .Case("xtensa", Triple::xtensa) .Default(Triple::UnknownArch); @@ -842,6 +846,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { .EndsWith("v1.6", Triple::DXILSubArch_v1_6) .EndsWith("v1.7", Triple::DXILSubArch_v1_7) .EndsWith("v1.8", Triple::DXILSubArch_v1_8) + .EndsWith("v1.9", Triple::DXILSubArch_v1_9) .Default(Triple::NoSubArch); StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName); @@ -1111,7 +1116,7 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) { VersionTuple Ver = parseVersionFromName(ShaderModelStr.drop_front(strlen("shadermodel"))); // Default DXIL minor version when Shader Model version is anything other - // than 6.[0...8] or 6.x (which translates to latest current SM version) + // than 6.[0...9] or 6.x (which translates to latest current SM version) const unsigned SMMajor = 6; if (!Ver.empty()) { if (Ver.getMajor() == SMMajor) { @@ -1135,6 +1140,8 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) { return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_7); case 8: return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_8); + case 9: + return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_9); default: report_fatal_error("Unsupported Shader Model version", false); } diff --git a/llvm/test/CodeGen/DirectX/is_fpclass.ll b/llvm/test/CodeGen/DirectX/is_fpclass.ll index a628096aacd7d..1796e8bd794d8 100644 --- a/llvm/test/CodeGen/DirectX/is_fpclass.ll +++ b/llvm/test/CodeGen/DirectX/is_fpclass.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s --check-prefixes=CHECK,SM69CHECK +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.8-library %s | FileCheck %s --check-prefixes=CHECK,SMOLDCHECK define noundef i1 @isnegzero(float noundef %a) { @@ -75,6 +75,23 @@ entry: ret i1 %0 } +define noundef i1 @isinfh(half noundef %a) { +; CHECK-LABEL: define noundef i1 @isinfh( +; CHECK-SAME: half noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; SM69CHECK-NEXT: [[ISINF:%.*]] = call i1 @dx.op.isSpecialFloat.f16(i32 9, half [[A]]) #[[ATTR0]] +; SMOLDCHECK-NEXT: [[BITCAST:%.*]] = bitcast half %a to i16 +; SMOLDCHECK-NEXT: [[CMPHIGH:%.*]] = icmp eq i16 [[BITCAST]], 31744 +; SMOLDCHECK-NEXT: [[CMPLOW:%.*]] = icmp eq i16 [[BITCAST]], -1024 +; SMOLDCHECK-NEXT: [[OR:%.*]] = or i1 [[CMPHIGH]], [[CMPLOW]] +; SMOLDCHECK-NEXT: ret i1 [[OR]] +; SM69CHECK-NEXT: ret i1 [[ISINF]] +; +entry: + %0 = call i1 @llvm.is.fpclass.f16(half %a, i32 516) + ret i1 %0 +} + define noundef <2 x i1> @isinfv2(<2 x float> noundef %a) { ; CHECK-LABEL: define noundef <2 x i1> @isinfv2( ; CHECK-SAME: <2 x float> noundef [[A:%.*]]) { diff --git a/llvm/test/CodeGen/DirectX/isinf.ll b/llvm/test/CodeGen/DirectX/isinf.ll index 461553b533ae1..bf31363ee114c 100644 --- a/llvm/test/CodeGen/DirectX/isinf.ll +++ b/llvm/test/CodeGen/DirectX/isinf.ll @@ -1,4 +1,5 @@ -; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s --check-prefixes=CHECK,SM69CHECK +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.8-library %s | FileCheck %s --check-prefixes=CHECK,SMOLDCHECK ; Make sure dxil operation function calls for isinf are generated for float and half. @@ -11,17 +12,47 @@ entry: define noundef i1 @isinf_half(half noundef %a) { entry: - ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]] + ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]] + ; SMOLDCHECK: [[BITCAST:%.*]] = bitcast half %a to i16 + ; SMOLDCHECK: [[CMPHIGH:%.*]] = icmp eq i16 [[BITCAST]], 31744 + ; SMOLDCHECK: [[CMPLOW:%.*]] = icmp eq i16 [[BITCAST]], -1024 + ; SMOLDCHECK: [[OR:%.*]] = or i1 [[CMPHIGH]], [[CMPLOW]] %dx.isinf = call i1 @llvm.dx.isinf.f16(half %a) ret i1 %dx.isinf } define noundef <4 x i1> @isinf_half4(<4 x half> noundef %p0) { entry: - ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half - ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half - ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half - ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half + ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half + ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half + ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half + ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half + + ; SMOLDCHECK: [[ee0:%.*]] = extractelement <4 x half> %p0, i64 0 + ; SMOLDCHECK: [[BITCAST0:%.*]] = bitcast half [[ee0]] to i16 + ; SMOLDCHECK: [[ee1:%.*]] = extractelement <4 x half> %p0, i64 1 + ; SMOLDCHECK: [[BITCAST1:%.*]] = bitcast half [[ee1]] to i16 + ; SMOLDCHECK:[[ee2:%.*]] = extractelement <4 x half> %p0, i64 2 + ; SMOLDCHECK: [[BITCAST2:%.*]] = bitcast half [[ee2]] to i16 + ; SMOLDCHECK: [[ee3:%.*]] = extractelement <4 x half> %p0, i64 3 + ; SMOLDCHECK: [[BITCAST3:%.*]] = bitcast half [[ee3]] to i16 + ; SMOLDCHECK: [[ICMPHIGH0:%.*]] = icmp eq i16 [[BITCAST0]], 31744 + ; SMOLDCHECK: [[ICMPHIGH1:%.*]] = icmp eq i16 [[BITCAST1]], 31744 + ; SMOLDCHECK: [[ICMPHIGH2:%.*]] = icmp eq i16 [[BITCAST2]], 31744 + ; SMOLDCHECK: [[ICMPHIGH3:%.*]] = icmp eq i16 [[BITCAST3]], 31744 + ; SMOLDCHECK: [[ICMPLOW0:%.*]] = icmp eq i16 [[BITCAST0]], -1024 + ; SMOLDCHECK: [[ICMPLOW1:%.*]] = icmp eq i16 [[BITCAST1]], -1024 + ; SMOLDCHECK: [[ICMPLOW2:%.*]] = icmp eq i16 [[BITCAST2]], -1024 + ; SMOLDCHECK: [[ICMPLOW3:%.*]] = icmp eq i16 [[BITCAST3]], -1024 + ; SMOLDCHECK: [[OR0:%.*]] = or i1 [[ICMPHIGH0]], [[ICMPLOW0]] + ; SMOLDCHECK: [[OR1:%.*]] = or i1 [[ICMPHIGH1]], [[ICMPLOW1]] + ; SMOLDCHECK: [[OR2:%.*]] = or i1 [[ICMPHIGH2]], [[ICMPLOW2]] + ; SMOLDCHECK: [[OR3:%.*]] = or i1 [[ICMPHIGH3]], [[ICMPLOW3]] + ; SMOLDCHECK: %.upto019 = insertelement <4 x i1> poison, i1 [[OR0]], i64 0 + ; SMOLDCHECK: %.upto120 = insertelement <4 x i1> %.upto019, i1 [[OR1]], i64 1 + ; SMOLDCHECK: %.upto221 = insertelement <4 x i1> %.upto120, i1 [[OR2]], i64 2 + ; SMOLDCHECK: %0 = insertelement <4 x i1> %.upto221, i1 [[OR3]], i64 3 + %hlsl.isinf = call <4 x i1> @llvm.dx.isinf.v4f16(<4 x half> %p0) ret <4 x i1> %hlsl.isinf } diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp index 7d07615d273d7..d2ca30583fe82 100644 --- a/llvm/unittests/TargetParser/TripleTest.cpp +++ b/llvm/unittests/TargetParser/TripleTest.cpp @@ -553,6 +553,13 @@ TEST(TripleTest, ParsedIDs) { EXPECT_EQ(Triple::ShaderModel, T.getOS()); EXPECT_EQ(VersionTuple(1, 8), T.getDXILVersion()); + T = Triple("dxilv1.9-unknown-shadermodel6.15-library"); + EXPECT_EQ(Triple::dxil, T.getArch()); + EXPECT_EQ(Triple::DXILSubArch_v1_9, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::ShaderModel, T.getOS()); + EXPECT_EQ(VersionTuple(1, 9), T.getDXILVersion()); + T = Triple("x86_64-unknown-fuchsia"); EXPECT_EQ(Triple::x86_64, T.getArch()); EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); @@ -1270,6 +1277,12 @@ TEST(TripleTest, ParsedIDs) { EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); EXPECT_EQ(Triple::UnknownOS, T.getOS()); + T = Triple("dxilv1.9-unknown-unknown"); + EXPECT_EQ(Triple::dxil, T.getArch()); + EXPECT_EQ(Triple::DXILSubArch_v1_9, T.getSubArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + // Check specification of unknown SubArch results in // unknown architecture. T = Triple("dxilv1.999-unknown-unknown"); @@ -3226,9 +3239,9 @@ TEST(TripleTest, DXILNormaizeWithVersion) { Triple::normalize("dxil--shadermodel6.0")); EXPECT_EQ("dxilv1.1-unknown-shadermodel6.1-library", Triple::normalize("dxil-shadermodel6.1-unknown-library")); - EXPECT_EQ("dxilv1.8-unknown-shadermodel6.x-unknown", + EXPECT_EQ("dxilv1.9-unknown-shadermodel6.x-unknown", Triple::normalize("dxil-unknown-shadermodel6.x-unknown")); - EXPECT_EQ("dxilv1.8-unknown-shadermodel6.x-unknown", + EXPECT_EQ("dxilv1.9-unknown-shadermodel6.x-unknown", Triple::normalize("dxil-unknown-shadermodel6.x-unknown")); EXPECT_EQ("dxil-unknown-unknown-unknown", Triple::normalize("dxil---")); EXPECT_EQ("dxilv1.0-pc-shadermodel5.0-compute", _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits