On Tue, Jul 02, 2013 at 10:44:10AM +0200, Niels Ole Salscheider wrote: > Hi, > > the attached patches add initial support for double precision operations on > Southern Islands cards. > > Some expressions containing multiple double precision kernel arguments cause > llvm to run until all memory is used - but I do not (yet) know why. > It works fine as long as I pass pointers to double values. >
I may have an idea about why this is happening. Could you file a bug report and attach an LLVM IR test case? All these patches look good to me, but #2 and #6 should have a test case with them. If you resubmit these patches with test cases, I will push the entire series. Nice work! -Tom > Regards, > > Ole > From 4224b314cf2d97cdf2ac99564d6155fa04fbb971 Mon Sep 17 00:00:00 2001 > From: Niels Ole Salscheider <niels_...@salscheider-online.de> > Date: Sat, 1 Jun 2013 16:48:56 +0200 > Subject: [PATCH 1/6] R600/SI: Add initial double precision support for SI > > --- > lib/Target/R600/AMDGPUISelLowering.cpp | 6 ++++++ > lib/Target/R600/SIISelLowering.cpp | 1 + > lib/Target/R600/SIInstructions.td | 30 +++++++++++++++++++++++++++++- > test/CodeGen/R600/fadd64.ll | 13 +++++++++++++ > test/CodeGen/R600/fdiv64.ll | 14 ++++++++++++++ > test/CodeGen/R600/fmul64.ll | 13 +++++++++++++ > test/CodeGen/R600/load64.ll | 20 ++++++++++++++++++++ > 7 Dateien geändert, 96 Zeilen hinzugefügt(+), 1 Zeile entfernt(-) > create mode 100644 test/CodeGen/R600/fadd64.ll > create mode 100644 test/CodeGen/R600/fdiv64.ll > create mode 100644 test/CodeGen/R600/fmul64.ll > create mode 100644 test/CodeGen/R600/load64.ll > > diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp > b/lib/Target/R600/AMDGPUISelLowering.cpp > index 4019a1f..5f3d496 100644 > --- a/lib/Target/R600/AMDGPUISelLowering.cpp > +++ b/lib/Target/R600/AMDGPUISelLowering.cpp > @@ -60,12 +60,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine > &TM) : > setOperationAction(ISD::STORE, MVT::v4f32, Promote); > AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); > > + setOperationAction(ISD::STORE, MVT::f64, Promote); > + AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); > + > setOperationAction(ISD::LOAD, MVT::f32, Promote); > AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); > > setOperationAction(ISD::LOAD, MVT::v4f32, Promote); > AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); > > + setOperationAction(ISD::LOAD, MVT::f64, Promote); > + AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); > + > setOperationAction(ISD::MUL, MVT::i64, Expand); > > setOperationAction(ISD::UDIV, MVT::i32, Expand); > diff --git a/lib/Target/R600/SIISelLowering.cpp > b/lib/Target/R600/SIISelLowering.cpp > index 9d4cfef..0d17a12 100644 > --- a/lib/Target/R600/SIISelLowering.cpp > +++ b/lib/Target/R600/SIISelLowering.cpp > @@ -45,6 +45,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : > > addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass); > addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass); > + addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass); > > addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); > addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); > diff --git a/lib/Target/R600/SIInstructions.td > b/lib/Target/R600/SIInstructions.td > index 9c96c08..b956387 100644 > --- a/lib/Target/R600/SIInstructions.td > +++ b/lib/Target/R600/SIInstructions.td > @@ -660,7 +660,9 @@ defm V_RSQ_LEGACY_F32 : VOP1_32 < > [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))] > >; > defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; > -defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>; > +defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", > + [(set f64:$dst, (fdiv FP_ONE, f64:$src0))] > +>; > defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; > defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; > defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; > @@ -996,10 +998,25 @@ def V_LSHR_B64 : VOP3_64_Shift <0x00000162, > "V_LSHR_B64", > >; > def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", []>; > > +let isCommutable = 1 in { > + > def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>; > def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; > def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; > def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; > + > +} // isCommutable = 1 > + > +def : Pat < > + (fadd f64:$src0, f64:$src1), > + (V_ADD_F64 $src0, $src1, (i64 0)) > +>; > + > +def : Pat < > + (fmul f64:$src0, f64:$src1), > + (V_MUL_F64 $src0, $src1, (i64 0)) > +>; > + > def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; > > let isCommutable = 1 in { > @@ -1417,6 +1434,10 @@ def : BitConvert <i32, f32, VReg_32>; > def : BitConvert <f32, i32, SReg_32>; > def : BitConvert <f32, i32, VReg_32>; > > +def : BitConvert <i64, f64, VReg_64>; > + > +def : BitConvert <f64, i64, VReg_64>; > + > /********** =================== **********/ > /********** Src & Dst modifiers **********/ > /********** =================== **********/ > @@ -1505,6 +1526,11 @@ def : Pat< > (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1)) > >; > > +def : Pat< > + (fdiv f64:$src0, f64:$src1), > + (V_MUL_F64 $src0, (V_RCP_F64_e32 $src1), (i64 0)) > +>; > + > def : Pat < > (fcos f32:$src0), > (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) > @@ -1634,6 +1660,8 @@ multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, > ValueType vt, > >; > } > > +defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64, > + global_load, constant_load>; > defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, > global_load, constant_load>; > defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, > diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fadd64.ll > new file mode 100644 > index 0000000..130302f > --- /dev/null > +++ b/test/CodeGen/R600/fadd64.ll > @@ -0,0 +1,13 @@ > +; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s > + > +; CHECK: @fadd_f64 > +; CHECK: V_ADD_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, > VGPR[0-9]+_VGPR[0-9]+}} > + > +define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1, > + double addrspace(1)* %in2) { > + %r0 = load double addrspace(1)* %in1 > + %r1 = load double addrspace(1)* %in2 > + %r2 = fadd double %r0, %r1 > + store double %r2, double addrspace(1)* %out > + ret void > +} > diff --git a/test/CodeGen/R600/fdiv64.ll b/test/CodeGen/R600/fdiv64.ll > new file mode 100644 > index 0000000..76c5ca3 > --- /dev/null > +++ b/test/CodeGen/R600/fdiv64.ll > @@ -0,0 +1,14 @@ > +; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s > + > +; CHECK: @fdiv_f64 > +; CHECK: V_RCP_F64_e32 {{VGPR[0-9]+_VGPR[0-9]+}} > +; CHECK: V_MUL_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, > VGPR[0-9]+_VGPR[0-9]+}} > + > +define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1, > + double addrspace(1)* %in2) { > + %r0 = load double addrspace(1)* %in1 > + %r1 = load double addrspace(1)* %in2 > + %r2 = fdiv double %r0, %r1 > + store double %r2, double addrspace(1)* %out > + ret void > +} > diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/R600/fmul64.ll > new file mode 100644 > index 0000000..8a57d4a > --- /dev/null > +++ b/test/CodeGen/R600/fmul64.ll > @@ -0,0 +1,13 @@ > +; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s > + > +; CHECK: @fmul_f64 > +; CHECK: V_MUL_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, > VGPR[0-9]+_VGPR[0-9]+}} > + > +define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1, > + double addrspace(1)* %in2) { > + %r0 = load double addrspace(1)* %in1 > + %r1 = load double addrspace(1)* %in2 > + %r2 = fmul double %r0, %r1 > + store double %r2, double addrspace(1)* %out > + ret void > +} > diff --git a/test/CodeGen/R600/load64.ll b/test/CodeGen/R600/load64.ll > new file mode 100644 > index 0000000..3b4a8f8 > --- /dev/null > +++ b/test/CodeGen/R600/load64.ll > @@ -0,0 +1,20 @@ > +; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s > + > +; load a f64 value from the global address space. > +; CHECK: @load_f64 > +; CHECK: BUFFER_LOAD_DWORDX2 VGPR{{[0-9]+}} > +define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) { > +entry: > + %0 = load double addrspace(1)* %in > + store double %0, double addrspace(1)* %out > + ret void > +} > + > +; Load a f64 value from the constant address space. > +; CHECK: @load_const_addrspace_f64 > +; CHECK: S_LOAD_DWORDX2 SGPR{{[0-9]+}} > +define void @load_const_addrspace_f64(double addrspace(1)* %out, double > addrspace(2)* %in) { > + %1 = load double addrspace(2)* %in > + store double %1, double addrspace(1)* %out > + ret void > +} > -- > 1.7.11.7 > > From da82e334a976619497e232cbfd657b88137970a9 Mon Sep 17 00:00:00 2001 > From: Niels Ole Salscheider <niels_...@salscheider-online.de> > Date: Sat, 15 Jun 2013 00:11:50 +0200 > Subject: [PATCH 2/6] R600/SI: SI support for 64bit ConstantFP > > --- > lib/Target/R600/SIInstrInfo.td | 12 ++++++++++++ > lib/Target/R600/SIInstructions.td | 7 +++++++ > 2 Dateien geändert, 19 Zeilen hinzugefügt(+) > > diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td > index 36812ca..a13168c 100644 > --- a/lib/Target/R600/SIInstrInfo.td > +++ b/lib/Target/R600/SIInstrInfo.td > @@ -21,11 +21,23 @@ def LO32 : SDNodeXForm<imm, [{ > return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32); > }]>; > > +def LO32f : SDNodeXForm<fpimm, [{ > + uint64_t val = N->getValueAPF().bitcastToAPInt().getZExtValue() & > 0xffffffff; > + float *fval = reinterpret_cast<float *>(&val); > + return CurDAG->getTargetConstantFP(*fval, MVT::f32); > +}]>; > + > // Transformation function, extract the upper 32bit of a 64bit immediate > def HI32 : SDNodeXForm<imm, [{ > return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32); > }]>; > > +def HI32f : SDNodeXForm<fpimm, [{ > + uint64_t val = N->getValueAPF().bitcastToAPInt().getZExtValue() >> 32; > + float *fval = reinterpret_cast<float *>(&val); > + return CurDAG->getTargetConstantFP(*fval, MVT::f32); > +}]>; > + > def IMM8bitDWORD : ImmLeaf < > i32, [{ > return (Imm & ~0x3FC) == 0; > diff --git a/lib/Target/R600/SIInstructions.td > b/lib/Target/R600/SIInstructions.td > index b956387..0c62ac2 100644 > --- a/lib/Target/R600/SIInstructions.td > +++ b/lib/Target/R600/SIInstructions.td > @@ -1492,6 +1492,13 @@ def : Pat < > (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1) > >; > > +def : Pat < > + (f64 fpimm:$imm), > + (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), > + (S_MOV_B32 (f32 (LO32f fpimm:$imm))), sub0), > + (S_MOV_B32 (f32 (HI32f fpimm:$imm))), sub1) > +>; > + > /********** ===================== **********/ > /********** Interpolation Paterns **********/ > /********** ===================== **********/ > -- > 1.7.11.7 > > From bd9d8bb95be28bd9df19526a1e83fa23b6cf5086 Mon Sep 17 00:00:00 2001 > From: Niels Ole Salscheider <niels_...@salscheider-online.de> > Date: Sat, 15 Jun 2013 16:20:29 +0200 > Subject: [PATCH 3/6] R600/SI: Add double precision fsub pattern for SI > > --- > lib/Target/R600/SIISelLowering.cpp | 15 +++++++++++++++ > lib/Target/R600/SIInstructions.td | 17 ++++++++++++++--- > test/CodeGen/R600/{fadd64.ll => fsub64.ll} | 8 ++++---- > 3 Dateien geändert, 33 Zeilen hinzugefügt(+), 7 Zeilen entfernt(-) > copy test/CodeGen/R600/{fadd64.ll => fsub64.ll} (67%) > > diff --git a/lib/Target/R600/SIISelLowering.cpp > b/lib/Target/R600/SIISelLowering.cpp > index 0d17a12..629ccc6 100644 > --- a/lib/Target/R600/SIISelLowering.cpp > +++ b/lib/Target/R600/SIISelLowering.cpp > @@ -294,6 +294,21 @@ MachineBasicBlock * > SITargetLowering::EmitInstrWithCustomInserter( > MI->eraseFromParent(); > break; > } > + case AMDGPU::V_SUB_F64: { > + const SIInstrInfo *TII = > + static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); > + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), > + MI->getOperand(0).getReg()) > + .addReg(MI->getOperand(1).getReg()) > + .addReg(MI->getOperand(2).getReg()) > + .addImm(0) /* src2 */ > + .addImm(0) /* ABS */ > + .addImm(0) /* CLAMP */ > + .addImm(0) /* OMOD */ > + .addImm(2); /* NEG */ > + MI->eraseFromParent(); > + break; > + } > } > return BB; > } > diff --git a/lib/Target/R600/SIInstructions.td > b/lib/Target/R600/SIInstructions.td > index 0c62ac2..6be54ba 100644 > --- a/lib/Target/R600/SIInstructions.td > +++ b/lib/Target/R600/SIInstructions.td > @@ -1220,17 +1220,23 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>; > > } // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0] > > -// This psuedo instruction takes a pointer as input and outputs a resource > -// constant that can be used with the ADDR64 MUBUF instructions. > - > let usesCustomInserter = 1 in { > > +// This psuedo instruction takes a pointer as input and outputs a resource > +// constant that can be used with the ADDR64 MUBUF instructions. > def SI_ADDR64_RSRC : InstSI < > (outs SReg_128:$srsrc), > (ins SReg_64:$ptr), > "", [] > >; > > +def V_SUB_F64 : InstSI < > + (outs VReg_64:$dst), > + (ins VReg_64:$src0, VReg_64:$src1), > + "V_SUB_F64 $dst, $src0, $src1", > + [] > +>; > + > } // end usesCustomInserter > > } // end IsCodeGenOnly, isPseudo > @@ -1259,6 +1265,11 @@ def : Pat < > $src0, $src1, $src2, $src3) > >; > > +def : Pat < > + (f64 (fsub f64:$src0, f64:$src1)), > + (V_SUB_F64 $src0, $src1) > +>; > + > /********** ======================= **********/ > /********** Image sampling patterns **********/ > /********** ======================= **********/ > diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fsub64.ll > similarity index 67% > copy from test/CodeGen/R600/fadd64.ll > copy to test/CodeGen/R600/fsub64.ll > index 130302f..fa59dcc 100644 > --- a/test/CodeGen/R600/fadd64.ll > +++ b/test/CodeGen/R600/fsub64.ll > @@ -1,13 +1,13 @@ > ; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s > > -; CHECK: @fadd_f64 > -; CHECK: V_ADD_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, > VGPR[0-9]+_VGPR[0-9]+}} > +; CHECK: @fsub_f64 > +; CHECK: V_ADD_F64 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, > VGPR[0-9]+_VGPR[0-9]+}}, 0, 0, 0, 0, 2 > > -define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1, > +define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1, > double addrspace(1)* %in2) { > %r0 = load double addrspace(1)* %in1 > %r1 = load double addrspace(1)* %in2 > - %r2 = fadd double %r0, %r1 > + %r2 = fsub double %r0, %r1 > store double %r2, double addrspace(1)* %out > ret void > } > -- > 1.7.11.7 > > From 79bb46b5e5932960a39c2fd57007a82060ec0a47 Mon Sep 17 00:00:00 2001 > From: Niels Ole Salscheider <niels_...@salscheider-online.de> > Date: Sat, 15 Jun 2013 16:27:26 +0200 > Subject: [PATCH 4/6] R600/SI: Add fsqrt pattern for SI > > --- > lib/Target/R600/SIInstructions.td | 8 ++++++-- > test/CodeGen/R600/fsqrt.ll | 24 ++++++++++++++++++++++++ > 2 Dateien geändert, 30 Zeilen hinzugefügt(+), 2 Zeilen entfernt(-) > create mode 100644 test/CodeGen/R600/fsqrt.ll > > diff --git a/lib/Target/R600/SIInstructions.td > b/lib/Target/R600/SIInstructions.td > index 6be54ba..0ee270f 100644 > --- a/lib/Target/R600/SIInstructions.td > +++ b/lib/Target/R600/SIInstructions.td > @@ -666,8 +666,12 @@ defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", > defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; > defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; > defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; > -defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>; > -defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>; > +defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", > + [(set f32:$dst, (fsqrt f32:$src0))] > +>; > +defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", > + [(set f64:$dst, (fsqrt f64:$src0))] > +>; > defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>; > defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>; > defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>; > diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/R600/fsqrt.ll > new file mode 100644 > index 0000000..2613805 > --- /dev/null > +++ b/test/CodeGen/R600/fsqrt.ll > @@ -0,0 +1,24 @@ > +; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s > + > +; CHECK: @fsqrt_f32 > +; CHECK: V_SQRT_F32_e32 {{VGPR[0-9]+, VGPR[0-9]+}} > + > +define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) { > + %r0 = load float addrspace(1)* %in > + %r1 = call float @llvm.sqrt.f32(float %r0) > + store float %r1, float addrspace(1)* %out > + ret void > +} > + > +; CHECK: @fsqrt_f64 > +; CHECK: V_SQRT_F64_e32 {{VGPR[0-9]+_VGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+}} > + > +define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) { > + %r0 = load double addrspace(1)* %in > + %r1 = call double @llvm.sqrt.f64(double %r0) > + store double %r1, double addrspace(1)* %out > + ret void > +} > + > +declare float @llvm.sqrt.f32(float %Val) > +declare double @llvm.sqrt.f64(double %Val) > -- > 1.7.11.7 > > From 7f5b1d6a4fe3b1b8ff7ae21c68310285113ce665 Mon Sep 17 00:00:00 2001 > From: Niels Ole Salscheider <niels_...@salscheider-online.de> > Date: Thu, 20 Jun 2013 17:29:20 +0200 > Subject: [PATCH 6/6] R600/SI: Add support for f64 kernel arguments > > --- > lib/Target/R600/AMDGPUCallingConv.td | 2 +- > 1 Datei geändert, 1 Zeile hinzugefügt(+), 1 Zeile entfernt(-) > > diff --git a/lib/Target/R600/AMDGPUCallingConv.td > b/lib/Target/R600/AMDGPUCallingConv.td > index 826932b..29a0326 100644 > --- a/lib/Target/R600/AMDGPUCallingConv.td > +++ b/lib/Target/R600/AMDGPUCallingConv.td > @@ -39,7 +39,7 @@ def CC_SI : CallingConv<[ > // Calling convention for SI compute kernels > def CC_SI_Kernel : CallingConv<[ > CCIfType<[v4i32, v4f32], CCAssignToStack <16, 4>>, > - CCIfType<[i64], CCAssignToStack < 8, 4>>, > + CCIfType<[i64, f64], CCAssignToStack < 8, 4>>, > CCIfType<[i32, f32], CCAssignToStack < 4, 4>>, > CCIfType<[i16], CCAssignToStack < 2, 4>>, > CCIfType<[i8], CCAssignToStack < 1, 4>> > -- > 1.7.11.7 > > From aca3cf3f010e597d70950526a946bafbc5421515 Mon Sep 17 00:00:00 2001 > From: Niels Ole Salscheider <niels_...@salscheider-online.de> > Date: Sat, 15 Jun 2013 01:10:48 +0200 > Subject: [PATCH 5/6] R600/SI: Implement select and compares for SI > > --- > lib/Target/R600/SIInstructions.td | 24 +++++++++--- > test/CodeGen/R600/fcmp64.ll | 79 > +++++++++++++++++++++++++++++++++++++++ > 2 Dateien geändert, 97 Zeilen hinzugefügt(+), 6 Zeilen entfernt(-) > create mode 100644 test/CodeGen/R600/fcmp64.ll > > diff --git a/lib/Target/R600/SIInstructions.td > b/lib/Target/R600/SIInstructions.td > index 0ee270f..15715ce 100644 > --- a/lib/Target/R600/SIInstructions.td > +++ b/lib/Target/R600/SIInstructions.td > @@ -176,19 +176,19 @@ defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, > "V_CMPX_TRU_F32">; > } // End hasSideEffects = 1, Defs = [EXEC] > > defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">; > -defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64">; > -defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64">; > -defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64">; > -defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64">; > +defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_LT>; > +defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", f64, COND_EQ>; > +defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", f64, COND_LE>; > +defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", f64, COND_GT>; > defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">; > -defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64">; > +defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", f64, COND_GE>; > defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64">; > defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64">; > defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">; > defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">; > defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">; > defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">; > -defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64">; > +defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_NE>; > defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">; > defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">; > > @@ -815,6 +815,18 @@ def : Pat < > (V_CNDMASK_B32_e64 $src0, $src1, $src2) > >; > > +//use two V_CNDMASK_B32_e64 instructions for f64 > +def : Pat < > + (f64 (select i1:$src2, f64:$src1, f64:$src0)), > + (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), > + (V_CNDMASK_B32_e64 (EXTRACT_SUBREG $src0, sub0), > + (EXTRACT_SUBREG $src1, sub0), > + $src2), sub0), > + (V_CNDMASK_B32_e64 (EXTRACT_SUBREG $src0, sub1), > + (EXTRACT_SUBREG $src1, sub1), > + $src2), sub1) > +>; > + > defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; > defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; > > diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll > new file mode 100644 > index 0000000..8f2513b > --- /dev/null > +++ b/test/CodeGen/R600/fcmp64.ll > @@ -0,0 +1,79 @@ > +; RUN: llc < %s -march=r600 -mcpu=tahiti | FileCheck %s > + > +; CHECK: @flt_f64 > +; CHECK: V_CMP_LT_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, > VGPR[0-9]+_VGPR[0-9]+}} > + > +define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1, > + double addrspace(1)* %in2) { > + %r0 = load double addrspace(1)* %in1 > + %r1 = load double addrspace(1)* %in2 > + %r2 = fcmp ult double %r0, %r1 > + %r3 = select i1 %r2, double %r0, double %r1 > + store double %r3, double addrspace(1)* %out > + ret void > +} > + > +; CHECK: @fle_f64 > +; CHECK: V_CMP_LE_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, > VGPR[0-9]+_VGPR[0-9]+}} > + > +define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1, > + double addrspace(1)* %in2) { > + %r0 = load double addrspace(1)* %in1 > + %r1 = load double addrspace(1)* %in2 > + %r2 = fcmp ule double %r0, %r1 > + %r3 = select i1 %r2, double %r0, double %r1 > + store double %r3, double addrspace(1)* %out > + ret void > +} > + > +; CHECK: @fgt_f64 > +; CHECK: V_CMP_GT_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, > VGPR[0-9]+_VGPR[0-9]+}} > + > +define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1, > + double addrspace(1)* %in2) { > + %r0 = load double addrspace(1)* %in1 > + %r1 = load double addrspace(1)* %in2 > + %r2 = fcmp ugt double %r0, %r1 > + %r3 = select i1 %r2, double %r0, double %r1 > + store double %r3, double addrspace(1)* %out > + ret void > +} > + > +; CHECK: @fge_f64 > +; CHECK: V_CMP_GE_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, > VGPR[0-9]+_VGPR[0-9]+}} > + > +define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1, > + double addrspace(1)* %in2) { > + %r0 = load double addrspace(1)* %in1 > + %r1 = load double addrspace(1)* %in2 > + %r2 = fcmp uge double %r0, %r1 > + %r3 = select i1 %r2, double %r0, double %r1 > + store double %r3, double addrspace(1)* %out > + ret void > +} > + > +; CHECK: @fne_f64 > +; CHECK: V_CMP_NEQ_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, > VGPR[0-9]+_VGPR[0-9]+}} > + > +define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1, > + double addrspace(1)* %in2) { > + %r0 = load double addrspace(1)* %in1 > + %r1 = load double addrspace(1)* %in2 > + %r2 = fcmp une double %r0, %r1 > + %r3 = select i1 %r2, double %r0, double %r1 > + store double %r3, double addrspace(1)* %out > + ret void > +} > + > +; CHECK: @feq_f64 > +; CHECK: V_CMP_EQ_F64_e64 {{SGPR[0-9]+_SGPR[0-9]+, VGPR[0-9]+_VGPR[0-9]+, > VGPR[0-9]+_VGPR[0-9]+}} > + > +define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1, > + double addrspace(1)* %in2) { > + %r0 = load double addrspace(1)* %in1 > + %r1 = load double addrspace(1)* %in2 > + %r2 = fcmp ueq double %r0, %r1 > + %r3 = select i1 %r2, double %r0, double %r1 > + store double %r3, double addrspace(1)* %out > + ret void > +} > -- > 1.7.11.7 > > _______________________________________________ > llvm-commits mailing list > llvm-comm...@cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev