Why are there SDNodes for the other "sample" intrinsics then? Marek
On Mon, Jun 16, 2014 at 5:45 PM, Tom Stellard <t...@stellard.net> wrote: > On Thu, Jun 12, 2014 at 02:11:10AM +0200, Marek Olšák wrote: >> From: Marek Olšák <marek.ol...@amd.com> >> >> This adds a new type of intrinsic and SDNode: SampleRaw. >> All fields of the MIMG opcodes are exposed and can be set by Mesa, >> even DMASK. All GATHER4 variants are added and there are a lot of them. >> >> v2: document DMASK behavior >> --- >> lib/Target/R600/AMDGPUISelLowering.cpp | 24 +++++++++ >> lib/Target/R600/AMDGPUISelLowering.h | 31 +++++++++++ >> lib/Target/R600/SIISelLowering.cpp | 72 +++++++++++++++++++++++++ >> lib/Target/R600/SIISelLowering.h | 2 + >> lib/Target/R600/SIInstrInfo.td | 91 ++++++++++++++++++++++++++++++++ >> lib/Target/R600/SIInstructions.td | 96 >> +++++++++++++++++++++++++--------- >> lib/Target/R600/SIIntrinsics.td | 48 +++++++++++++++++ >> 7 files changed, 340 insertions(+), 24 deletions(-) >> >> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp >> b/lib/Target/R600/AMDGPUISelLowering.cpp >> index 849f169..359161c 100644 >> --- a/lib/Target/R600/AMDGPUISelLowering.cpp >> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp >> @@ -1542,6 +1542,30 @@ const char* >> AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { >> NODE_NAME_CASE(SAMPLEB) >> NODE_NAME_CASE(SAMPLED) >> NODE_NAME_CASE(SAMPLEL) >> + NODE_NAME_CASE(GATHER4) >> + NODE_NAME_CASE(GATHER4_CL) >> + NODE_NAME_CASE(GATHER4_L) >> + NODE_NAME_CASE(GATHER4_B) >> + NODE_NAME_CASE(GATHER4_B_CL) >> + NODE_NAME_CASE(GATHER4_LZ) >> + NODE_NAME_CASE(GATHER4_C) >> + NODE_NAME_CASE(GATHER4_C_CL) >> + NODE_NAME_CASE(GATHER4_C_L) >> + NODE_NAME_CASE(GATHER4_C_B) >> + NODE_NAME_CASE(GATHER4_C_B_CL) >> + NODE_NAME_CASE(GATHER4_C_LZ) >> + NODE_NAME_CASE(GATHER4_O) >> + NODE_NAME_CASE(GATHER4_CL_O) >> + NODE_NAME_CASE(GATHER4_L_O) >> + NODE_NAME_CASE(GATHER4_B_O) >> + NODE_NAME_CASE(GATHER4_B_CL_O) >> + NODE_NAME_CASE(GATHER4_LZ_O) >> + NODE_NAME_CASE(GATHER4_C_O) >> + NODE_NAME_CASE(GATHER4_C_CL_O) >> + NODE_NAME_CASE(GATHER4_C_L_O) >> + NODE_NAME_CASE(GATHER4_C_B_O) >> + NODE_NAME_CASE(GATHER4_C_B_CL_O) >> + NODE_NAME_CASE(GATHER4_C_LZ_O) > > You don't need to add new SDNodes for all these instructions, you can just use > the intrinsic directly in the pattern. > > The only reason to add SDNodes, is if there are optimizations / special > lowering > we can do for these instructions. > >> NODE_NAME_CASE(STORE_MSKOR) >> NODE_NAME_CASE(TBUFFER_STORE_FORMAT) >> } >> diff --git a/lib/Target/R600/AMDGPUISelLowering.h >> b/lib/Target/R600/AMDGPUISelLowering.h >> index d5d821d..a9af195 100644 >> --- a/lib/Target/R600/AMDGPUISelLowering.h >> +++ b/lib/Target/R600/AMDGPUISelLowering.h >> @@ -203,6 +203,37 @@ enum { >> SAMPLEB, >> SAMPLED, >> SAMPLEL, >> + >> + // Gather4 opcodes >> + GATHER4, >> + GATHER4_CL, >> + GATHER4_L, >> + GATHER4_B, >> + GATHER4_B_CL, >> + GATHER4_LZ, >> + >> + GATHER4_C, >> + GATHER4_C_CL, >> + GATHER4_C_L, >> + GATHER4_C_B, >> + GATHER4_C_B_CL, >> + GATHER4_C_LZ, >> + >> + GATHER4_O, >> + GATHER4_CL_O, >> + GATHER4_L_O, >> + GATHER4_B_O, >> + GATHER4_B_CL_O, >> + GATHER4_LZ_O, >> + >> + GATHER4_C_O, >> + GATHER4_C_CL_O, >> + GATHER4_C_L_O, >> + GATHER4_C_B_O, >> + GATHER4_C_B_CL_O, >> + GATHER4_C_LZ_O, >> + >> + // Nemory opcodes >> FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, >> STORE_MSKOR, >> LOAD_CONSTANT, >> diff --git a/lib/Target/R600/SIISelLowering.cpp >> b/lib/Target/R600/SIISelLowering.cpp >> index 1a861d4..909255d 100644 >> --- a/lib/Target/R600/SIISelLowering.cpp >> +++ b/lib/Target/R600/SIISelLowering.cpp >> @@ -688,6 +688,59 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, >> SelectionDAG &DAG) const { >> Op.getOperand(1), >> Op.getOperand(2), >> Op.getOperand(3)); >> + >> + // Gather4 intrinsics >> + case AMDGPUIntrinsic::SI_gather4: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_cl: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_l: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_b: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_b_cl: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_lz: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ, Op, DAG); >> + >> + case AMDGPUIntrinsic::SI_gather4_c: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_c_cl: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_c_l: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_c_b: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_c_b_cl: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_c_lz: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ, Op, DAG); >> + >> + case AMDGPUIntrinsic::SI_gather4_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_O, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_cl_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_CL_O, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_l_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_L_O, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_b_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_O, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_b_cl_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_B_CL_O, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_lz_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_LZ_O, Op, DAG); >> + >> + case AMDGPUIntrinsic::SI_gather4_c_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_O, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_c_cl_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_CL_O, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_c_l_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_L_O, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_c_b_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_O, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_c_b_cl_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_B_CL_O, Op, DAG); >> + case AMDGPUIntrinsic::SI_gather4_c_lz_o: >> + return LowerSampleRawIntrinsic(AMDGPUISD::GATHER4_C_LZ_O, Op, DAG); >> } >> } >> >> @@ -876,6 +929,25 @@ SDValue SITargetLowering::LowerSampleIntrinsic(unsigned >> Opcode, >> Op.getOperand(4)); >> } >> >> +SDValue SITargetLowering::LowerSampleRawIntrinsic(unsigned Opcode, >> + const SDValue &Op, >> + SelectionDAG &DAG) const { >> + SDValue Ops[] = { >> + Op.getOperand(1), >> + Op.getOperand(2), >> + Op.getOperand(3), >> + Op.getOperand(4), >> + Op.getOperand(5), >> + Op.getOperand(6), >> + Op.getOperand(7), >> + Op.getOperand(8), >> + Op.getOperand(9), >> + Op.getOperand(10), >> + Op.getOperand(11) >> + }; >> + return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Ops); >> +} >> + >> SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { >> if (Op.getValueType() != MVT::i64) >> return SDValue(); >> diff --git a/lib/Target/R600/SIISelLowering.h >> b/lib/Target/R600/SIISelLowering.h >> index c6eaa81..b48da3b 100644 >> --- a/lib/Target/R600/SIISelLowering.h >> +++ b/lib/Target/R600/SIISelLowering.h >> @@ -25,6 +25,8 @@ class SITargetLowering : public AMDGPUTargetLowering { >> SDValue Chain, unsigned Offset, bool Signed) const; >> SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, >> SelectionDAG &DAG) const; >> + SDValue LowerSampleRawIntrinsic(unsigned Opcode, const SDValue &Op, >> + SelectionDAG &DAG) const; >> SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; >> SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; >> SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; >> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td >> index 3368d49..23a7ca3 100644 >> --- a/lib/Target/R600/SIInstrInfo.td >> +++ b/lib/Target/R600/SIInstrInfo.td >> @@ -57,6 +57,50 @@ def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">; >> def SIsampled : SDSample<"AMDGPUISD::SAMPLED">; >> def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">; >> >> +class SDSampleRaw<string opcode> : SDNode <opcode, >> + SDTypeProfile<1, 11, >> + [SDTCisVT<0, v4f32>, // vdata(VGPR) >> + SDTCisVT<2, v32i8>, // rsrc(SGPR) >> + SDTCisVT<3, v4i32>, // sampler(SGPR) >> + SDTCisVT<4, i32>, // dmask(imm) >> + SDTCisVT<5, i32>, // unorm(imm) >> + SDTCisVT<6, i32>, // r128(imm) >> + SDTCisVT<7, i32>, // da(imm) >> + SDTCisVT<8, i32>, // glc(imm) >> + SDTCisVT<9, i32>, // slc(imm) >> + SDTCisVT<10, i32>, // tfe(imm) >> + SDTCisVT<11, i32> // lwe(imm) >> + ]> >> +>; >> + >> +def SIgather4 : SDSampleRaw<"AMDGPUISD::GATHER4">; >> +def SIgather4_cl : SDSampleRaw<"AMDGPUISD::GATHER4_CL">; >> +def SIgather4_l : SDSampleRaw<"AMDGPUISD::GATHER4_L">; >> +def SIgather4_b : SDSampleRaw<"AMDGPUISD::GATHER4_B">; >> +def SIgather4_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL">; >> +def SIgather4_lz : SDSampleRaw<"AMDGPUISD::GATHER4_LZ">; >> + >> +def SIgather4_c : SDSampleRaw<"AMDGPUISD::GATHER4_C">; >> +def SIgather4_c_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL">; >> +def SIgather4_c_l : SDSampleRaw<"AMDGPUISD::GATHER4_C_L">; >> +def SIgather4_c_b : SDSampleRaw<"AMDGPUISD::GATHER4_C_B">; >> +def SIgather4_c_b_cl : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL">; >> +def SIgather4_c_lz : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ">; >> + >> +def SIgather4_o : SDSampleRaw<"AMDGPUISD::GATHER4_O">; >> +def SIgather4_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_CL_O">; >> +def SIgather4_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_L_O">; >> +def SIgather4_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_O">; >> +def SIgather4_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_B_CL_O">; >> +def SIgather4_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_LZ_O">; >> + >> +def SIgather4_c_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_O">; >> +def SIgather4_c_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_CL_O">; >> +def SIgather4_c_l_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_L_O">; >> +def SIgather4_c_b_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_O">; >> +def SIgather4_c_b_cl_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_B_CL_O">; >> +def SIgather4_c_lz_o : SDSampleRaw<"AMDGPUISD::GATHER4_C_LZ_O">; >> + >> // Transformation function, extract the lower 32bit of a 64bit immediate >> def LO32 : SDNodeXForm<imm, [{ >> return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, >> MVT::i32); >> @@ -658,6 +702,53 @@ multiclass MIMG_Sampler <bits<7> op, string asm> { >> defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>; >> } >> >> +class MIMG_Gather_Helper <bits<7> op, string asm, >> + RegisterClass dst_rc, >> + RegisterClass src_rc> : MIMG < >> + op, >> + (outs dst_rc:$vdata), >> + (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, >> + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr, >> + SReg_256:$srsrc, SReg_128:$ssamp), >> + asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," >> + #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp", >> + []> { >> + let mayLoad = 1; >> + let mayStore = 0; >> + >> + // DMASK was repurposed for GATHER4. 4 components are always >> + // returned and DMASK works like a swizzle - it selects >> + // the component to fetch. The only useful DMASK values are >> + // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns >> + // (red,red,red,red) etc.) The ISA document doesn't mention >> + // this. >> + // Therefore, disable all code which updates DMASK by setting these two: >> + let MIMG = 0; >> + let hasPostISelHook = 0; >> +} >> + >> +multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm, >> + RegisterClass dst_rc, >> + int channels> { >> + def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>, >> + MIMG_Mask<asm#"_V1", channels>; >> + def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>, >> + MIMG_Mask<asm#"_V2", channels>; >> + def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>, >> + MIMG_Mask<asm#"_V4", channels>; >> + def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>, >> + MIMG_Mask<asm#"_V8", channels>; >> + def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>, >> + MIMG_Mask<asm#"_V16", channels>; >> +} >> + >> +multiclass MIMG_Gather <bits<7> op, string asm> { >> + defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>; >> + defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>; >> + defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>; >> + defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>; >> +} >> + >> >> //===----------------------------------------------------------------------===// >> // Vector instruction mappings >> >> //===----------------------------------------------------------------------===// >> diff --git a/lib/Target/R600/SIInstructions.td >> b/lib/Target/R600/SIInstructions.td >> index d4a7c5c..d65d88b 100644 >> --- a/lib/Target/R600/SIInstructions.td >> +++ b/lib/Target/R600/SIInstructions.td >> @@ -887,30 +887,30 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, >> "IMAGE_SAMPLE_C_B">; >> //def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", >> 0x0000003d>; >> //def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", >> 0x0000003e>; >> //def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", >> 0x0000003f>; >> -//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>; >> -//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", >> 0x00000041>; >> -//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", >> 0x00000044>; >> -//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", >> 0x00000045>; >> -//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", >> 0x00000046>; >> -//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", >> 0x00000047>; >> -//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", >> 0x00000048>; >> -//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", >> 0x00000049>; >> -//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", >> 0x0000004c>; >> -//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", >> 0x0000004d>; >> -//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 >> <"IMAGE_GATHER4_C_B_CL", 0x0000004e>; >> -//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", >> 0x0000004f>; >> -//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", >> 0x00000050>; >> -//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", >> 0x00000051>; >> -//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", >> 0x00000054>; >> -//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", >> 0x00000055>; >> -//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 >> <"IMAGE_GATHER4_B_CL_O", 0x00000056>; >> -//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", >> 0x00000057>; >> -//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", >> 0x00000058>; >> -//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 >> <"IMAGE_GATHER4_C_CL_O", 0x00000059>; >> -//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", >> 0x0000005c>; >> -//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", >> 0x0000005d>; >> -//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 >> <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>; >> -//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 >> <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>; >> +defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "IMAGE_GATHER4">; >> +defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">; >> +defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">; >> +defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">; >> +defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, >> "IMAGE_GATHER4_B_CL">; >> +defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">; >> +defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">; >> +defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, >> "IMAGE_GATHER4_C_CL">; >> +defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">; >> +defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">; >> +defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, >> "IMAGE_GATHER4_C_B_CL">; >> +defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, >> "IMAGE_GATHER4_C_LZ">; >> +defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">; >> +defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, >> "IMAGE_GATHER4_CL_O">; >> +defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">; >> +defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">; >> +defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, >> "IMAGE_GATHER4_B_CL_O">; >> +defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, >> "IMAGE_GATHER4_LZ_O">; >> +defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">; >> +defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, >> "IMAGE_GATHER4_C_CL_O">; >> +defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, >> "IMAGE_GATHER4_C_L_O">; >> +defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, >> "IMAGE_GATHER4_C_B_O">; >> +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, >> "IMAGE_GATHER4_C_B_CL_O">; >> +defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, >> "IMAGE_GATHER4_C_LZ_O">; >> //def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>; >> //def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>; >> //def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", >> 0x00000069>; >> @@ -1655,6 +1655,54 @@ def : SextInReg <i16, 16>; >> /********** Image sampling patterns **********/ >> /********** ======================= **********/ >> >> +class SampleRawPattern<SDNode name, MIMG opcode, ValueType vt> : Pat < >> + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm, >> + i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe), >> + (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm >> $da), >> + (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm >> $slc), >> + $addr, $rsrc, $sampler) >> +>; >> + >> +// Gather4 patterns. Only the variants which make sense are defined. >> +def : SampleRawPattern<SIgather4, IMAGE_GATHER4_V4_V2, >> v2i32>; >> +def : SampleRawPattern<SIgather4, IMAGE_GATHER4_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_cl, IMAGE_GATHER4_CL_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_l, IMAGE_GATHER4_L_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_b, IMAGE_GATHER4_B_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_lz, IMAGE_GATHER4_LZ_V4_V2, >> v2i32>; >> +def : SampleRawPattern<SIgather4_lz, IMAGE_GATHER4_LZ_V4_V4, >> v4i32>; >> + >> +def : SampleRawPattern<SIgather4_c, IMAGE_GATHER4_C_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_c_l, IMAGE_GATHER4_C_L_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_c_l, IMAGE_GATHER4_C_L_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_c_b, IMAGE_GATHER4_C_B_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_c_b, IMAGE_GATHER4_C_B_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, >> v4i32>; >> + >> +def : SampleRawPattern<SIgather4_o, IMAGE_GATHER4_O_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_l_o, IMAGE_GATHER4_L_O_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_l_o, IMAGE_GATHER4_L_O_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_b_o, IMAGE_GATHER4_B_O_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_b_o, IMAGE_GATHER4_B_O_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, >> v4i32>; >> + >> +def : SampleRawPattern<SIgather4_c_o, IMAGE_GATHER4_C_O_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_c_o, IMAGE_GATHER4_C_O_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, >> v8i32>; >> +def : SampleRawPattern<SIgather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, >> v4i32>; >> +def : SampleRawPattern<SIgather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, >> v8i32>; >> + >> /* SIsample for simple 1D texture lookup */ >> def : Pat < >> (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm), >> diff --git a/lib/Target/R600/SIIntrinsics.td >> b/lib/Target/R600/SIIntrinsics.td >> index 00e32c0..9d85f17 100644 >> --- a/lib/Target/R600/SIIntrinsics.td >> +++ b/lib/Target/R600/SIIntrinsics.td >> @@ -56,11 +56,59 @@ let TargetPrefix = "SI", isTarget = 1 in { >> >> class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, >> llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; >> >> + // Fully-flexible SAMPLE instruction. >> + class SampleRaw : Intrinsic < >> + [llvm_v4f32_ty], // vdata(VGPR) >> + [llvm_anyvector_ty, // vaddr(VGPR) >> + llvm_v32i8_ty, // rsrc(SGPR) >> + llvm_v16i8_ty, // sampler(SGPR) >> + llvm_i32_ty, // dmask(imm) >> + llvm_i32_ty, // unorm(imm) >> + llvm_i32_ty, // r128(imm) >> + llvm_i32_ty, // da(imm) >> + llvm_i32_ty, // glc(imm) >> + llvm_i32_ty, // slc(imm) >> + llvm_i32_ty, // tfe(imm) >> + llvm_i32_ty], // lwe(imm) >> + [IntrNoMem]>; >> + >> def int_SI_sample : Sample; >> def int_SI_sampleb : Sample; >> def int_SI_sampled : Sample; >> def int_SI_samplel : Sample; >> >> + // Basic gather4 >> + def int_SI_gather4 : SampleRaw; >> + def int_SI_gather4_cl : SampleRaw; >> + def int_SI_gather4_l : SampleRaw; >> + def int_SI_gather4_b : SampleRaw; >> + def int_SI_gather4_b_cl : SampleRaw; >> + def int_SI_gather4_lz : SampleRaw; >> + >> + // Gather4 with comparison >> + def int_SI_gather4_c : SampleRaw; >> + def int_SI_gather4_c_cl : SampleRaw; >> + def int_SI_gather4_c_l : SampleRaw; >> + def int_SI_gather4_c_b : SampleRaw; >> + def int_SI_gather4_c_b_cl : SampleRaw; >> + def int_SI_gather4_c_lz : SampleRaw; >> + >> + // Gather4 with offsets >> + def int_SI_gather4_o : SampleRaw; >> + def int_SI_gather4_cl_o : SampleRaw; >> + def int_SI_gather4_l_o : SampleRaw; >> + def int_SI_gather4_b_o : SampleRaw; >> + def int_SI_gather4_b_cl_o : SampleRaw; >> + def int_SI_gather4_lz_o : SampleRaw; >> + >> + // Gather4 with comparison and offsets >> + def int_SI_gather4_c_o : SampleRaw; >> + def int_SI_gather4_c_cl_o : SampleRaw; >> + def int_SI_gather4_c_l_o : SampleRaw; >> + def int_SI_gather4_c_b_o : SampleRaw; >> + def int_SI_gather4_c_b_cl_o : SampleRaw; >> + def int_SI_gather4_c_lz_o : SampleRaw; >> + >> def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, >> llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; >> >> def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, >> llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; >> -- >> 1.9.1 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev