llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir Author: Juan Manuel Martinez Caamaño (jmmartinez) <details> <summary>Changes</summary> This built-in that maps to `V_CVT_OFF_F32_I4` which treats its input as a 4-bit signed integer and returns `0.0625f * src`. SWDEV-518861 --- Full diff: https://github.com/llvm/llvm-project/pull/133741.diff 10 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+1) - (added) clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl (+15) - (added) clang/test/SemaOpenCL/builtins-amdgcn-cvt-off-f32-i4-err.cl (+8) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+6) - (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (+9) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+1) - (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+5) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll (+23) - (added) llvm/test/CodeGen/AMDGPU/simplify-amdgcn.cvt.off.f32.i4.ll (+158) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 44ef404aee72f..f38148cc795dc 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -140,6 +140,7 @@ BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc") +BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fUi", "nc") BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc") BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc") BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc") diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl new file mode 100644 index 0000000000000..6dc235f9cc6c7 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl @@ -0,0 +1,15 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \ +// RUN: -emit-llvm -o - | FileCheck %s + +// CHECK-LABEL: @test_builtin_amdgcn_cvt_off_f32_i4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store i32 [[N:%.*]], ptr addrspace(5) [[N_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[TMP0]]) +// CHECK-NEXT: ret float [[TMP1]] +// +float test_builtin_amdgcn_cvt_off_f32_i4(unsigned n) { + return __builtin_amdgcn_cvt_off_f32_i4(n); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-cvt-off-f32-i4-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-cvt-off-f32-i4-err.cl new file mode 100644 index 0000000000000..f5b02b80c37da --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-cvt-off-f32-i4-err.cl @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s + +void test_builtin_amdgcn_cvt_off_f32_i4(unsigned n) { + struct A{ unsigned x; } a; + __builtin_amdgcn_cvt_off_f32_i4(n, n); // expected-error {{too many arguments to function call, expected 1, have 2}} + __builtin_amdgcn_cvt_off_f32_i4(); // expected-error {{too few arguments to function call, expected 1, have 0}} + __builtin_amdgcn_cvt_off_f32_i4(a); // expected-error {{passing '__private struct A' to parameter of incompatible type 'unsigned int'}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f53016f62abbe..ebac0f9029791 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3375,6 +3375,12 @@ def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">, [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>; +// llvm.amdgcn.cvt.off.fp32.i4 int srcA +def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">, + DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // gfx950 intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 3246e575ea6a9..533ad349f7500 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -6042,6 +6042,7 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, // TODO: Handle more intrinsics switch (IntrinsicID) { case Intrinsic::amdgcn_cubeid: + case Intrinsic::amdgcn_cvt_off_f32_i4: return true; case Intrinsic::amdgcn_frexp_mant: { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 7cd97e95b0189..535fda7393bc1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -729,6 +729,15 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { break; } + case Intrinsic::amdgcn_cvt_off_f32_i4: { + ConstantInt *CArg = dyn_cast<ConstantInt>(II.getArgOperand(0)); + if (!CArg) + break; + int CI4BitAsInt = CArg->getValue().trunc(4).getSExtValue(); + float ResVal = 0.0625 * CI4BitAsInt; + Constant *Res = ConstantFP::get(II.getType(), ResVal); + return IC.replaceInstUsesWith(II, Res); + } case Intrinsic::amdgcn_ubfe: case Intrinsic::amdgcn_sbfe: { // Decompose simple cases into standard shifts. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 7df1e634b21ba..1d0e81db5a5db 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4585,6 +4585,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_dot4_f32_bf8_bf8: case Intrinsic::amdgcn_cvt_f32_fp8: case Intrinsic::amdgcn_cvt_f32_bf8: + case Intrinsic::amdgcn_cvt_off_f32_i4: case Intrinsic::amdgcn_cvt_pk_f32_fp8: case Intrinsic::amdgcn_cvt_pk_f32_bf8: case Intrinsic::amdgcn_cvt_pk_fp8_f32: diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index def06c1e9a0d7..1dae2e432eb8c 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -1578,6 +1578,11 @@ class MovDPP8Pattern<Predicate Pred, Instruction Inst, ValueType vt> : GCNPat < let OtherPredicates = [Pred]; } +def : GCNPat < + (f32 (int_amdgcn_cvt_off_f32_i4 i32:$src)), + (V_CVT_OFF_F32_I4_e32 VGPR_32:$src) +>; + foreach vt = Reg32Types.types in { def : MovDPP8Pattern<isGFX10Only, V_MOV_B32_dpp8_gfx10, vt>; def : MovDPP8Pattern<isGFX11Only, V_MOV_B32_dpp8_gfx11, vt>; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll new file mode 100644 index 0000000000000..a25d9c30a2331 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs %s --global-isel -o - | FileCheck %s + +declare float @llvm.amdgcn.cvt.off.f32.i4(i32) + +define amdgpu_cs float @cvt_var(i32 %a) { +; CHECK-LABEL: cvt_var: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cvt_off_f32_i4_e32 v0, v0 +; CHECK-NEXT: ; return to shader part epilog + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 %a) + ret float %ret +} + +define amdgpu_cs float @cvt_imm() { +; CHECK-LABEL: cvt_imm: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cvt_off_f32_i4_e32 v0, 4 +; CHECK-NEXT: ; return to shader part epilog + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 4) + ret float %ret +} diff --git a/llvm/test/CodeGen/AMDGPU/simplify-amdgcn.cvt.off.f32.i4.ll b/llvm/test/CodeGen/AMDGPU/simplify-amdgcn.cvt.off.f32.i4.ll new file mode 100644 index 0000000000000..bac02bd61d0a9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/simplify-amdgcn.cvt.off.f32.i4.ll @@ -0,0 +1,158 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck %s + +declare float @llvm.amdgcn.cvt.off.f32.i4(i32) + +define float @cvt_var(i32 %a) { +; CHECK-LABEL: define float @cvt_var( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[A]]) +; CHECK-NEXT: ret float [[RET]] +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 %a) + ret float %ret +} + +define float @cvt_imm_0() { +; CHECK-LABEL: define float @cvt_imm_0() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 0) + ret float %ret +} + +define float @cvt_imm_1() { +; CHECK-LABEL: define float @cvt_imm_1() { +; CHECK-NEXT: ret float 6.250000e-02 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 1) + ret float %ret +} + +define float @cvt_imm_2() { +; CHECK-LABEL: define float @cvt_imm_2() { +; CHECK-NEXT: ret float 1.250000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 2) + ret float %ret +} + +define float @cvt_imm_3() { +; CHECK-LABEL: define float @cvt_imm_3() { +; CHECK-NEXT: ret float 1.875000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 3) + ret float %ret +} + +define float @cvt_imm_4() { +; CHECK-LABEL: define float @cvt_imm_4() { +; CHECK-NEXT: ret float 2.500000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 4) + ret float %ret +} + +define float @cvt_imm_5() { +; CHECK-LABEL: define float @cvt_imm_5() { +; CHECK-NEXT: ret float 3.125000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 5) + ret float %ret +} + +define float @cvt_imm_6() { +; CHECK-LABEL: define float @cvt_imm_6() { +; CHECK-NEXT: ret float 3.750000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 6) + ret float %ret +} + +define float @cvt_imm_7() { +; CHECK-LABEL: define float @cvt_imm_7() { +; CHECK-NEXT: ret float 4.375000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 7) + ret float %ret +} + +define float @cvt_imm_8() { +; CHECK-LABEL: define float @cvt_imm_8() { +; CHECK-NEXT: ret float -5.000000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 8) + ret float %ret +} + +define float @cvt_imm_9() { +; CHECK-LABEL: define float @cvt_imm_9() { +; CHECK-NEXT: ret float -4.375000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 9) + ret float %ret +} + +define float @cvt_imm_10() { +; CHECK-LABEL: define float @cvt_imm_10() { +; CHECK-NEXT: ret float -3.750000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 10) + ret float %ret +} + +define float @cvt_imm_11() { +; CHECK-LABEL: define float @cvt_imm_11() { +; CHECK-NEXT: ret float -3.125000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 11) + ret float %ret +} + +define float @cvt_imm_12() { +; CHECK-LABEL: define float @cvt_imm_12() { +; CHECK-NEXT: ret float -2.500000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 12) + ret float %ret +} + +define float @cvt_imm_13() { +; CHECK-LABEL: define float @cvt_imm_13() { +; CHECK-NEXT: ret float -1.875000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 13) + ret float %ret +} + +define float @cvt_imm_14() { +; CHECK-LABEL: define float @cvt_imm_14() { +; CHECK-NEXT: ret float -1.250000e-01 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 14) + ret float %ret +} + +define float @cvt_imm_15() { +; CHECK-LABEL: define float @cvt_imm_15() { +; CHECK-NEXT: ret float -6.250000e-02 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 15) + ret float %ret +} + +define float @cvt_imm_underflow() { +; CHECK-LABEL: define float @cvt_imm_underflow() { +; CHECK-NEXT: ret float -6.250000e-02 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 -1) + ret float %ret +} + +define float @cvt_imm_overflow() { +; CHECK-LABEL: define float @cvt_imm_overflow() { +; CHECK-NEXT: ret float 0.000000e+00 +; + %ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 16) + ret float %ret +} `````````` </details> https://github.com/llvm/llvm-project/pull/133741 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits