From: Marek Olšák <marek.ol...@amd.com> This adds a new type of intrinsic and SDNode: SampleRaw. All fields of the MIMG opcodes are exposed and can be set by Mesa, even DMASK. All GATHER4 variants are added and there are a lot of them.
v2: document DMASK behavior --- lib/Target/R600/AMDGPUISelLowering.cpp | 24 +++++++++ lib/Target/R600/AMDGPUISelLowering.h | 31 +++++++++++ lib/Target/R600/SIISelLowering.cpp | 72 +++++++++++++++++++++++++ lib/Target/R600/SIISelLowering.h | 2 + lib/Target/R600/SIInstrInfo.td | 91 ++++++++++++++++++++++++++++++++ lib/Target/R600/SIInstructions.td | 96 +++++++++++++++++++++++++--------- lib/Target/R600/SIIntrinsics.td | 48 +++++++++++++++++ 7 files changed, 340 insertions(+), 24 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 849f169..359161c 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1542,6 +1542,30 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SAMPLEB) NODE_NAME_CASE(SAMPLED) NODE_NAME_CASE(SAMPLEL) + NODE_NAME_CASE(GATHER4) + NODE_NAME_CASE(GATHER4_CL) + NODE_NAME_CASE(GATHER4_L) + NODE_NAME_CASE(GATHER4_B) + NODE_NAME_CASE(GATHER4_B_CL) + NODE_NAME_CASE(GATHER4_LZ) + NODE_NAME_CASE(GATHER4_C) + NODE_NAME_CASE(GATHER4_C_CL) + NODE_NAME_CASE(GATHER4_C_L) + NODE_NAME_CASE(GATHER4_C_B) + NODE_NAME_CASE(GATHER4_C_B_CL) + NODE_NAME_CASE(GATHER4_C_LZ) + NODE_NAME_CASE(GATHER4_O) + NODE_NAME_CASE(GATHER4_CL_O) + NODE_NAME_CASE(GATHER4_L_O) + NODE_NAME_CASE(GATHER4_B_O) + NODE_NAME_CASE(GATHER4_B_CL_O) + NODE_NAME_CASE(GATHER4_LZ_O) + NODE_NAME_CASE(GATHER4_C_O) + NODE_NAME_CASE(GATHER4_C_CL_O) + NODE_NAME_CASE(GATHER4_C_L_O) + NODE_NAME_CASE(GATHER4_C_B_O) + NODE_NAME_CASE(GATHER4_C_B_CL_O) + NODE_NAME_CASE(GATHER4_C_LZ_O) NODE_NAME_CASE(STORE_MSKOR) NODE_NAME_CASE(TBUFFER_STORE_FORMAT) } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index d5d821d..a9af195 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -203,6 +203,37 @@ enum { SAMPLEB, SAMPLED, SAMPLEL, + + // Gather4 opcodes + GATHER4, + GATHER4_CL, + GATHER4_L, + GATHER4_B, + GATHER4_B_CL, + GATHER4_LZ, + + GATHER4_C, + GATHER4_C_CL, + GATHER4_C_L, + GATHER4_C_B, + GATHER4_C_B_CL, + GATHER4_C_LZ, + + GATHER4_O, + GATHER4_CL_O, + GATHER4_L_O, + GATHER4_B_O, + GATHER4_B_CL_O, + GATHER4_LZ_O, + + GATHER4_C_O, + GATHER4_C_CL_O, + GATHER4_C_L_O, + GATHER4_C_B_O, + GATHER4_C_B_CL_O, + GATHER4_C_LZ_O, + + // Nemory opcodes FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 1a861d4..909255d 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -688,6 +688,59 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + // Gather4 intrinsics + case AMDGPUIntrinsic::SI_gather4: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_cl: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_l: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_b: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_b_cl: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_lz: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ, Op, DAG); + + case AMDGPUIntrinsic::SI_gather4_c: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_c_cl: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_c_l: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_c_b: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_c_b_cl: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_c_lz: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ, Op, DAG); + + case AMDGPUIntrinsic::SI_gather4_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_O, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_cl_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL_O, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_l_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L_O, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_b_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_O, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_b_cl_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL_O, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_lz_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ_O, Op, DAG); + + case AMDGPUIntrinsic::SI_gather4_c_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_O, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_c_cl_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL_O, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_c_l_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L_O, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_c_b_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_O, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_c_b_cl_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL_O, Op, DAG); + case AMDGPUIntrinsic::SI_gather4_c_lz_o: + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ_O, Op, DAG); } } @@ -876,6 +929,25 @@ SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode, Op.getOperand(4)); } +SDValue SITargetLowering::LowerSampleRawIntrinsic(unsigned Opcode, + const SDValue &Op, + SelectionDAG &DAG) const { + SDValue Ops[] = { + Op.getOperand(1), + Op.getOperand(2), + Op.getOperand(3), + Op.getOperand(4), + Op.getOperand(5), + Op.getOperand(6), + Op.getOperand(7), + Op.getOperand(8), + Op.getOperand(9), + Op.getOperand(10), + Op.getOperand(11) + }; + return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Ops); +} + SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType() != MVT::i64) return SDValue(); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index c6eaa81..b48da3b 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -25,6 +25,8 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue Chain, unsigned Offset, bool Signed) const; SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, SelectionDAG &DAG) const; + SDValue LowerSampleRawIntrinsic(unsigned Opcode, const SDValue &Op, + SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 3368d49..23a7ca3 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -57,6 +57,50 @@ def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">; def SIsampled : SDSample<"AMDGPUISD::SAMPLED">; def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">; +class SDSampleRaw<string opcode> : SDNode <opcode, + SDTypeProfile<1, 11, + [SDTCisVT<0, v4f32>, // vdata(VGPR) + SDTCisVT<2, v32i8>, // rsrc(SGPR) + SDTCisVT<3, v4i32>, // sampler(SGPR) + SDTCisVT<4, i32>, // dmask(imm) + SDTCisVT<5, i32>, // unorm(imm) + SDTCisVT<6, i32>, // r128(imm) + SDTCisVT<7, i32>, // da(imm) + SDTCisVT<8, i32>, // glc(imm) + SDTCisVT<9, i32>, // slc(imm) + SDTCisVT<10, i32>, // tfe(imm) + SDTCisVT<11, i32> // lwe(imm) + ]> +>; + +def SIgather4 : SDSampleRaw<"AMDGPUISD::GATHER4">; +def SIgather4_cl : SDSampleRaw<"AMDGPUISD::GATHER4_CL">; +def SIgather4_l : SDSampleRaw<"AMDGPUISD::GATHER4_L">; +def SIgather4_b : SDSampleRaw<"AMDGPUISD::GATHER4_B">; +def SIgather4_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL">; +def SIgather4_lz : SDSampleRaw<"AMDGPUISD::GATHER4_LZ">; + +def SIgather4_c : SDSampleRaw<"AMDGPUISD::GATHER4_C">; +def SIgather4_c_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL">; +def SIgather4_c_l : SDSampleRaw<"AMDGPUISD::GATHER4_C_L">; +def SIgather4_c_b : SDSampleRaw<"AMDGPUISD::GATHER4_C_B">; +def SIgather4_c_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL">; +def SIgather4_c_lz : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ">; + +def SIgather4_o : SDSampleRaw<"AMDGPUISD::GATHER4_O">; +def SIgather4_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_CL_O">; +def SIgather4_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_L_O">; +def SIgather4_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_O">; +def SIgather4_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL_O">; +def SIgather4_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_LZ_O">; + +def SIgather4_c_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_O">; +def SIgather4_c_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL_O">; +def SIgather4_c_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_L_O">; +def SIgather4_c_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_O">; +def SIgather4_c_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL_O">; +def SIgather4_c_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ_O">; + // Transformation function, extract the lower 32bit of a 64bit immediate def LO32 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32); @@ -658,6 +702,53 @@ multiclass MIMG_Sampler <bits<7> op, string asm> { defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>; } +class MIMG_Gather_Helper <bits<7> op, string asm, + RegisterClass dst_rc, + RegisterClass src_rc> : MIMG < + op, + (outs dst_rc:$vdata), + (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr, + SReg_256:$srsrc, SReg_128:$ssamp), + asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," + #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp", + []> { + let mayLoad = 1; + let mayStore = 0; + + // DMASK was repurposed for GATHER4. 4 components are always + // returned and DMASK works like a swizzle - it selects + // the component to fetch. The only useful DMASK values are + // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns + // (red,red,red,red) etc.) The ISA document doesn't mention + // this. + // Therefore, disable all code which updates DMASK by setting these two: + let MIMG = 0; + let hasPostISelHook = 0; +} + +multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm, + RegisterClass dst_rc, + int channels> { + def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>, + MIMG_Mask<asm#"_V1", channels>; + def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>, + MIMG_Mask<asm#"_V2", channels>; + def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>, + MIMG_Mask<asm#"_V4", channels>; + def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>, + MIMG_Mask<asm#"_V8", channels>; + def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>, + MIMG_Mask<asm#"_V16", channels>; +} + +multiclass MIMG_Gather <bits<7> op, string asm> { + defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>; + defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>; + defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>; + defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>; +} + //===----------------------------------------------------------------------===// // Vector instruction mappings //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index d4a7c5c..d65d88b 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -887,30 +887,30 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">; //def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>; //def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>; //def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>; -//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>; -//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>; -//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>; -//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>; -//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>; -//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>; -//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>; -//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>; -//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>; -//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>; -//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>; -//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>; -//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>; -//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>; -//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>; -//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>; -//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>; -//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>; -//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>; -//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>; -//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>; -//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>; -//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>; -//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>; +defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "IMAGE_GATHER4">; +defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">; +defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">; +defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">; +defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">; +defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">; +defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">; +defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">; +defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">; +defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">; +defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">; +defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">; +defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">; +defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">; +defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">; +defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">; +defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">; +defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">; +defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">; +defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">; +defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">; +defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">; +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">; +defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">; //def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>; //def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>; //def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>; @@ -1655,6 +1655,54 @@ def : SextInReg <i16, 16>; /********** Image sampling patterns **********/ /********** ======================= **********/ +class SampleRawPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm, + i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe), + (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da), + (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc), + $addr, $rsrc, $sampler) +>; + +// Gather4 patterns. Only the variants which make sense are defined. +def : SampleRawPattern<SIgather4, IMAGE_GATHER4_V4_V2, v2i32>; +def : SampleRawPattern<SIgather4, IMAGE_GATHER4_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_cl, IMAGE_GATHER4_CL_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_l, IMAGE_GATHER4_L_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_b, IMAGE_GATHER4_B_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_lz, IMAGE_GATHER4_LZ_V4_V2, v2i32>; +def : SampleRawPattern<SIgather4_lz, IMAGE_GATHER4_LZ_V4_V4, v4i32>; + +def : SampleRawPattern<SIgather4_c, IMAGE_GATHER4_C_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_c_l, IMAGE_GATHER4_C_L_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_c_l, IMAGE_GATHER4_C_L_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_c_b, IMAGE_GATHER4_C_B_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_c_b, IMAGE_GATHER4_C_B_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, v4i32>; + +def : SampleRawPattern<SIgather4_o, IMAGE_GATHER4_O_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_l_o, IMAGE_GATHER4_L_O_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_l_o, IMAGE_GATHER4_L_O_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_b_o, IMAGE_GATHER4_B_O_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_b_o, IMAGE_GATHER4_B_O_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, v4i32>; + +def : SampleRawPattern<SIgather4_c_o, IMAGE_GATHER4_C_O_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_c_o, IMAGE_GATHER4_C_O_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>; +def : SampleRawPattern<SIgather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>; +def : SampleRawPattern<SIgather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>; + /* SIsample for simple 1D texture lookup */ def : Pat < (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm), diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 00e32c0..9d85f17 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -56,11 +56,59 @@ let TargetPrefix = "SI", isTarget = 1 in { class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; + // Fully-flexible SAMPLE instruction. + class SampleRaw : Intrinsic < + [llvm_v4f32_ty], // vdata(VGPR) + [llvm_anyvector_ty, // vaddr(VGPR) + llvm_v32i8_ty, // rsrc(SGPR) + llvm_v16i8_ty, // sampler(SGPR) + llvm_i32_ty, // dmask(imm) + llvm_i32_ty, // unorm(imm) + llvm_i32_ty, // r128(imm) + llvm_i32_ty, // da(imm) + llvm_i32_ty, // glc(imm) + llvm_i32_ty, // slc(imm) + llvm_i32_ty, // tfe(imm) + llvm_i32_ty], // lwe(imm) + [IntrNoMem]>; + def int_SI_sample : Sample; def int_SI_sampleb : Sample; def int_SI_sampled : Sample; def int_SI_samplel : Sample; + // Basic gather4 + def int_SI_gather4 : SampleRaw; + def int_SI_gather4_cl : SampleRaw; + def int_SI_gather4_l : SampleRaw; + def int_SI_gather4_b : SampleRaw; + def int_SI_gather4_b_cl : SampleRaw; + def int_SI_gather4_lz : SampleRaw; + + // Gather4 with comparison + def int_SI_gather4_c : SampleRaw; + def int_SI_gather4_c_cl : SampleRaw; + def int_SI_gather4_c_l : SampleRaw; + def int_SI_gather4_c_b : SampleRaw; + def int_SI_gather4_c_b_cl : SampleRaw; + def int_SI_gather4_c_lz : SampleRaw; + + // Gather4 with offsets + def int_SI_gather4_o : SampleRaw; + def int_SI_gather4_cl_o : SampleRaw; + def int_SI_gather4_l_o : SampleRaw; + def int_SI_gather4_b_o : SampleRaw; + def int_SI_gather4_b_cl_o : SampleRaw; + def int_SI_gather4_lz_o : SampleRaw; + + // Gather4 with comparison and offsets + def int_SI_gather4_c_o : SampleRaw; + def int_SI_gather4_c_cl_o : SampleRaw; + def int_SI_gather4_c_l_o : SampleRaw; + def int_SI_gather4_c_b_o : SampleRaw; + def int_SI_gather4_c_b_cl_o : SampleRaw; + def int_SI_gather4_c_lz_o : SampleRaw; + def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; -- 1.9.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev