Hi, The attached three patches along with this one should fix VSELECT on SI as well.
-Tom On Tue, Jul 16, 2013 at 05:12:40PM -0500, Aaron Watry wrote: > Looks good to me. > > I've tested on Cedar (HD5400) with no OpenCL regressions, but cannot > test on SI because SETCC still causes issues (see > https://bugs.freedesktop.org/show_bug.cgi?id=66175). Once SETCC is > fixed for SI, we should probably add SI-CHECK lines to vselect.ll > > --Aaron > > On Tue, Jul 16, 2013 at 2:15 PM, Tom Stellard <t...@stellard.net> wrote: > > From: Tom Stellard <thomas.stell...@amd.com> > > > > --- > > lib/Target/R600/AMDGPUISelLowering.cpp | 3 +++ > > lib/Target/R600/R600ISelLowering.cpp | 3 --- > > test/CodeGen/R600/vselect.ll | 30 ++++++++++++++++++++++++++++++ > > 3 files changed, 33 insertions(+), 3 deletions(-) > > > > diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp > > b/lib/Target/R600/AMDGPUISelLowering.cpp > > index 9891ad3..e93ddc4 100644 > > --- a/lib/Target/R600/AMDGPUISelLowering.cpp > > +++ b/lib/Target/R600/AMDGPUISelLowering.cpp > > @@ -77,6 +77,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine > > &TM) : > > setOperationAction(ISD::UDIV, MVT::i32, Expand); > > setOperationAction(ISD::UDIVREM, MVT::i32, Custom); > > setOperationAction(ISD::UREM, MVT::i32, Expand); > > + setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); > > + setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); > > > > int types[] = { > > (int)MVT::v2i32, > > @@ -97,6 +99,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine > > &TM) : > > setOperationAction(ISD::SUB, VT, Expand); > > setOperationAction(ISD::UDIV, VT, Expand); > > setOperationAction(ISD::UREM, VT, Expand); > > + setOperationAction(ISD::VSELECT, VT, Expand); > > setOperationAction(ISD::XOR, VT, Expand); > > } > > } > > diff --git a/lib/Target/R600/R600ISelLowering.cpp > > b/lib/Target/R600/R600ISelLowering.cpp > > index 7aef08a..1067b38 100644 > > --- a/lib/Target/R600/R600ISelLowering.cpp > > +++ b/lib/Target/R600/R600ISelLowering.cpp > > @@ -67,9 +67,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) > > : > > setOperationAction(ISD::SELECT, MVT::i32, Custom); > > setOperationAction(ISD::SELECT, MVT::f32, Custom); > > > > - setOperationAction(ISD::VSELECT, MVT::v4i32, Expand); > > - setOperationAction(ISD::VSELECT, MVT::v2i32, Expand); > > - > > // Legalize loads and stores to the private address space. > > setOperationAction(ISD::LOAD, MVT::i32, Custom); > > setOperationAction(ISD::LOAD, MVT::v2i32, Expand); > > diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll > > index 3f08cec..79d896b 100644 > > --- a/test/CodeGen/R600/vselect.ll > > +++ b/test/CodeGen/R600/vselect.ll > > @@ -14,6 +14,20 @@ entry: > > ret void > > } > > > > +;EG-CHECK: @test_select_v2f32 > > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], > > T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], > > T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > > + > > +define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> > > addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) { > > +entry: > > + %0 = load <2 x float> addrspace(1)* %in0 > > + %1 = load <2 x float> addrspace(1)* %in1 > > + %cmp = fcmp one <2 x float> %0, %1 > > + %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1 > > + store <2 x float> %result, <2 x float> addrspace(1)* %out > > + ret void > > +} > > + > > ;EG-CHECK: @test_select_v4i32 > > ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], > > T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > > ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], > > T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > > @@ -29,3 +43,19 @@ entry: > > store <4 x i32> %result, <4 x i32> addrspace(1)* %out > > ret void > > } > > + > > +;EG-CHECK: @test_select_v4f32 > > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], > > T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], > > T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], > > T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], > > T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} > > + > > +define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> > > addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) { > > +entry: > > + %0 = load <4 x float> addrspace(1)* %in0 > > + %1 = load <4 x float> addrspace(1)* %in1 > > + %cmp = fcmp one <4 x float> %0, %1 > > + %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1 > > + store <4 x float> %result, <4 x float> addrspace(1)* %out > > + ret void > > +} > > -- > > 1.7.11.4 > > > > _______________________________________________ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>From 762ec2680973282127737a8b5797edf9ff2ad87d Mon Sep 17 00:00:00 2001 From: Tom Stellard <thomas.stell...@amd.com> Date: Tue, 16 Jul 2013 18:13:03 -0700 Subject: [PATCH 1/3] R600/SI: Add support for v2f32 stores --- lib/Target/R600/AMDGPUISelLowering.cpp | 3 +++ lib/Target/R600/SIInstructions.td | 3 +++ test/CodeGen/R600/store.ll | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index e93ddc4..04d379e 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -57,6 +57,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::f32, Promote); AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); + setOperationAction(ISD::STORE, MVT::v2f32, Promote); + AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); + setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index ffa45c5..a74efcc 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1482,6 +1482,9 @@ def : BitConvert <i64, f64, VReg_64>; def : BitConvert <f64, i64, VReg_64>; +def : BitConvert <v2f32, v2i32, VReg_64>; +def : BitConvert <v2i32, v2f32, VReg_64>; + /********** =================== **********/ /********** Src & Dst modifiers **********/ /********** =================== **********/ diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll index f8c6f84..d233c73 100644 --- a/test/CodeGen/R600/store.ll +++ b/test/CodeGen/R600/store.ll @@ -15,6 +15,24 @@ define void @store_f32(float addrspace(1)* %out, float %in) { ret void } +; vec2 floating-point stores +; EG-CHECK: @store_v2f32 +; EG-CHECK: RAT_WRITE_CACHELESS_32_eg +; EG-CHECK-NEXT: RAT_WRITE_CACHELESS_32_eg +; CM-CHECK: @store_v2f32 +; CM-CHECK: EXPORT_RAT_INST_STORE_DWORD +; CM-CHECK-NEXT: EXPORT_RAT_INST_STORE_DWORD +; SI-CHECK: @store_v2f32 +; SI-CHECK: BUFFER_STORE_DWORDX2 + +define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) { +entry: + %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0 + %1 = insertelement <2 x float> %0, float %b, i32 0 + store <2 x float> %1, <2 x float> addrspace(1)* %out + ret void +} + ; The stores in this function are combined by the optimizer to create a ; 64-bit store with 32-bit alignment. This is legal for SI and the legalizer ; should not try to split the 64-bit store back into 2 32-bit stores. -- 1.7.11.4
>From b75ca32d36800c3bf99928a98847068283f97a3d Mon Sep 17 00:00:00 2001 From: Tom Stellard <thomas.stell...@amd.com> Date: Tue, 16 Jul 2013 18:27:21 -0700 Subject: [PATCH 2/3] R600/SI: Add support for v2f32 loads --- lib/Target/R600/AMDGPUISelLowering.cpp | 3 +++ lib/Target/R600/SIInstructions.td | 1 + lib/Target/R600/SIRegisterInfo.td | 2 +- test/CodeGen/R600/load.ll | 14 ++++++++++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 04d379e..9250f0a 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -69,6 +69,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::f32, Promote); AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); + setOperationAction(ISD::LOAD, MVT::v2f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); + setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index a74efcc..48add71 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1705,6 +1705,7 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> { defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>; defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>; +defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>; defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>; defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 244d4c0..292b9d2 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -153,7 +153,7 @@ def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add SGPR_32, M0Reg) >; -def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64, +def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64, (add SGPR_64, VCCReg, EXECReg) >; diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index d1ebaa3..60f7f86 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -41,6 +41,20 @@ entry: ret void } +; load a v2f32 value from the global address space +; R600-CHECK: @load_v2f32 +; R600-CHECK: VTX_READ_32 +; R600-CHECK: VTX_READ_32 + +; SI-CHECK: @load_v2f32 +; SI-CHECK: BUFFER_LOAD_DWORDX2 +define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { +entry: + %0 = load <2 x float> addrspace(1)* %in + store <2 x float> %0, <2 x float> addrspace(1)* %out + ret void +} + ; Load an i32 value from the constant address space. ; R600-CHECK: @load_const_addrspace_i32 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 -- 1.7.11.4
>From 31b245e0948ecdb0c2b7a2efa54f4683aba7653e Mon Sep 17 00:00:00 2001 From: Tom Stellard <thomas.stell...@amd.com> Date: Tue, 16 Jul 2013 18:32:24 -0700 Subject: [PATCH 3/3] R600/SI: Fix crash with VSELECT https://bugs.freedesktop.org/show_bug.cgi?id=66175 --- lib/Target/R600/SIISelLowering.cpp | 11 ++++++++++- lib/Target/R600/SIInstructions.td | 3 +++ test/CodeGen/R600/vselect.ll | 15 +++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 336bfbf..520b0e4 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -34,6 +34,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass); addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass); @@ -72,6 +75,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SETCC, MVT::v2i1, Expand); + setOperationAction(ISD::SETCC, MVT::v4i1, Expand); + setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -316,7 +322,10 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( } EVT SITargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - return MVT::i1; + if (!VT.isVector()) { + return MVT::i1; + } + return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); } MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 48add71..68cf692 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1485,6 +1485,9 @@ def : BitConvert <f64, i64, VReg_64>; def : BitConvert <v2f32, v2i32, VReg_64>; def : BitConvert <v2i32, v2f32, VReg_64>; +def : BitConvert <v4f32, v4i32, VReg_128>; +def : BitConvert <v4i32, v4f32, VReg_128>; + /********** =================== **********/ /********** Src & Dst modifiers **********/ /********** =================== **********/ diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll index 79d896b..72a9084 100644 --- a/test/CodeGen/R600/vselect.ll +++ b/test/CodeGen/R600/vselect.ll @@ -1,9 +1,14 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s ;EG-CHECK: @test_select_v2i32 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;SI-CHECK: @test_select_v2i32 +;SI-CHECK: V_CNDMASK_B32_e64 +;SI-CHECK: V_CNDMASK_B32_e64 + define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) { entry: %0 = load <2 x i32> addrspace(1)* %in0 @@ -18,6 +23,10 @@ entry: ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;SI-CHECK: @test_select_v2f32 +;SI-CHECK: V_CNDMASK_B32_e64 +;SI-CHECK: V_CNDMASK_B32_e64 + define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) { entry: %0 = load <2 x float> addrspace(1)* %in0 @@ -34,6 +43,12 @@ entry: ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;SI-CHECK: @test_select_v4i32 +;SI-CHECK: V_CNDMASK_B32_e64 +;SI-CHECK: V_CNDMASK_B32_e64 +;SI-CHECK: V_CNDMASK_B32_e64 +;SI-CHECK: V_CNDMASK_B32_e64 + define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) { entry: %0 = load <4 x i32> addrspace(1)* %in0 -- 1.7.11.4
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev