[PATCH] D80727: AMDGPU: Add intrinsic for s_setreg

Matt Arsenault via Phabricator via cfe-commits Thu, 28 May 2020 08:12:53 -0700

arsenm created this revision.
arsenm added reviewers: rampitec, yaxunl, kerbowa.
Herald added subscribers: hiraditya, t-tye, tpr, dstuttard, nhaehnle, wdng, 
jvesely, kzhuravl.
Herald added a project: LLVM.


This will be more useful with fenv access implemented.


https://reviews.llvm.org/D80727

Files:
  clang/include/clang/Basic/BuiltinsAMDGPU.def
  clang/test/CodeGenOpenCL/builtins-amdgcn.cl
  clang/test/SemaOpenCL/builtins-amdgcn-error.cl
  llvm/include/llvm/IR/IntrinsicsAMDGPU.td
  llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
  llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
  llvm/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/lib/Target/AMDGPU/SIInstrInfo.td
  llvm/lib/Target/AMDGPU/SOPInstructions.td
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll

Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+; Set FP32 fp_round to round to zero
+define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() {
+; GCN-LABEL: test_setreg_f32_round_mode_rtz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 3), 3
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 4097, i32 3)
+  ret void
+}
+
+; Set FP64/FP16 fp_round to round to zero
+define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() {
+; GCN-LABEL: test_setreg_f64_round_mode_rtz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 3), 3
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 4225, i32 3)
+  ret void
+}
+
+; Set all fp_round to round to zero
+define amdgpu_kernel void @test_setreg_all_round_mode_rtz() {
+; GCN-LABEL: test_setreg_all_round_mode_rtz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 5), 7
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 8193, i32 7)
+  ret void
+}
+
+; Set FP32 fp_round to dynamic mode
+define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) {
+; GCN-LABEL: test_setreg_roundingmode_var:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s0
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
+  ret void
+}
+
+define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
+; GCN-LABEL: test_setreg_roundingmode_var_vgpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_readfirstlane_b32 s4, v0
+; GCN-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
+  ret void
+}
+
+declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
+
+attributes #0 = { nounwind }
Index: llvm/lib/Target/AMDGPU/SOPInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -807,7 +807,7 @@
   "s_setreg_b32",
   (outs), (ins SReg_32:$sdst, hwreg:$simm16),
   "$simm16, $sdst",
-  [(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))]
+  [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]
 >;
 
 // FIXME: Not on SI?
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1131,7 +1131,7 @@
 def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
 def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
 
-def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
+def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
 
 def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
 
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7845,32 +7845,32 @@
   const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
                                (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
                                (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
-  const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
+  const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
 
   const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
 
   if (!HasFP32Denormals) {
     SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
 
-    SDValue EnableDenorm;
+    SDNode *EnableDenorm;
     if (Subtarget->hasDenormModeInst()) {
       const SDValue EnableDenormValue =
           getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
 
       EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
-                                 DAG.getEntryNode(), EnableDenormValue);
+                                 DAG.getEntryNode(), EnableDenormValue).getNode();
     } else {
       const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
                                                         SL, MVT::i32);
-      EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
-                                 DAG.getEntryNode(), EnableDenormValue,
-                                 BitField);
+      EnableDenorm =
+          DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
+                             {EnableDenormValue, BitField, DAG.getEntryNode()});
     }
 
     SDValue Ops[3] = {
       NegDivScale0,
-      EnableDenorm.getValue(0),
-      EnableDenorm.getValue(1)
+      SDValue(EnableDenorm, 0),
+      SDValue(EnableDenorm, 1)
     };
 
     NegDivScale0 = DAG.getMergeValues(Ops, SL);
@@ -7894,25 +7894,25 @@
                              NumeratorScaled, Fma3);
 
   if (!HasFP32Denormals) {
-    SDValue DisableDenorm;
+    SDNode *DisableDenorm;
     if (Subtarget->hasDenormModeInst()) {
       const SDValue DisableDenormValue =
           getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
 
       DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
                                   Fma4.getValue(1), DisableDenormValue,
-                                  Fma4.getValue(2));
+                                  Fma4.getValue(2)).getNode();
     } else {
       const SDValue DisableDenormValue =
           DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
 
-      DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
-                                  Fma4.getValue(1), DisableDenormValue,
-                                  BitField, Fma4.getValue(2));
+      DisableDenorm = DAG.getMachineNode(
+          AMDGPU::S_SETREG_B32, SL, MVT::Other,
+          {DisableDenormValue, BitField, Fma4.getValue(1), Fma4.getValue(2)});
     }
 
     SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
-                                      DisableDenorm, DAG.getRoot());
+                                      SDValue(DisableDenorm, 0), DAG.getRoot());
     DAG.setRoot(OutputChain);
   }
 
Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2799,6 +2799,10 @@
       constrainOpWithReadfirstlane(MI, MRI, 2); // M0
       return;
     }
+    case Intrinsic::amdgcn_s_setreg: {
+      constrainOpWithReadfirstlane(MI, MRI, 2);
+      return;
+    }
     default: {
       if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
               AMDGPU::lookupRsrcIntrinsic(IntrID)) {
@@ -3942,6 +3946,13 @@
       OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
       break;
     }
+    case Intrinsic::amdgcn_s_setreg: {
+      // This must be an SGPR, but accept a VGPR.
+      unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+                                   AMDGPU::SGPRRegBankID);
+      OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
+      break;
+    }
     case Intrinsic::amdgcn_end_cf:
     case Intrinsic::amdgcn_init_exec: {
       unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
Index: llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -202,13 +202,6 @@
 
 def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
 
-def AMDGPUSetRegOp :  SDTypeProfile<0, 2, [
-  SDTCisInt<0>, SDTCisInt<1>
-]>;
-
-def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [
-  SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
-
 def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
    SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1207,6 +1207,16 @@
   [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg<0>]
 >;
 
+// Note this can be used to set FP environment properties that are unsafe to
+// change in non-stricfp functions. The register properties available
+// (and value required to access them) may differ per subtarget.
+// llvm.amdgcn.s.setreg(hwmode, value)
+def int_amdgcn_s_setreg :
+  GCCBuiltin<"__builtin_amdgcn_s_setreg">,
+  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
+  [IntrInaccessibleMemOnly, IntrNoMem, IntrHasSideEffects, ImmArg<0>]
+>;
+
 // int_amdgcn_s_getpc is provided to allow a specific style of position
 // independent code to determine the high part of its address when it is
 // known (through convention) that the code and any data of interest does
Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl
===================================================================
--- clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -139,3 +139,8 @@
   const char ptr[] = "workgroup";
   __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
 }
+
+void test_s_setreg(int x, int y) {
+  __builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
+  __builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
+}
Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl
===================================================================
--- clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -715,6 +715,12 @@
   *out = __builtin_amdgcn_mqsad_u32_u8(src0, src1, src2);
 }
 
+// CHECK-LABEL: test_s_setreg(
+// CHECK: call void @llvm.amdgcn.s.setreg(i32 8193, i32 %val)
+kernel void test_s_setreg(uint val) {
+  __builtin_amdgcn_s_setreg(8193, val);
+}
+
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
 // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
Index: clang/include/clang/Basic/BuiltinsAMDGPU.def
===================================================================
--- clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -44,6 +44,7 @@
 // Instruction builtins.
 //===----------------------------------------------------------------------===//
 BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n")
+BUILTIN(__builtin_amdgcn_s_setreg, "vIiUi", "n")
 BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n")
 BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80727: AMDGPU: Add intrinsic for s_setreg

Reply via email to