No, I just added the test. Marek
On Wed, Oct 9, 2013 at 4:28 PM, Christian König <deathsim...@vodafone.de> wrote: > Are there any changes to the original patch you've send out? > > Anyway this version is: Reviewed-by: Christian König > <christian.koe...@amd.com> > > Am 09.10.2013 15:33, schrieb Marek Olšák: > >> From: Marek Olšák <marek.ol...@amd.com> >> >> This fixes piglit: >> - shaders/glsl-fs-texture2d-masked >> - shaders/glsl-fs-texture2d-masked-4 >> >> Signed-off-by: Marek Olšák <marek.ol...@amd.com> >> --- >> lib/Target/R600/SIISelLowering.cpp | 27 +++++++-- >> test/CodeGen/R600/llvm.SI.sample-masked.ll | 93 >> ++++++++++++++++++++++++++++++ >> 2 files changed, 114 insertions(+), 6 deletions(-) >> create mode 100644 test/CodeGen/R600/llvm.SI.sample-masked.ll >> >> diff --git a/lib/Target/R600/SIISelLowering.cpp >> b/lib/Target/R600/SIISelLowering.cpp >> index 2174753..891a51b 100644 >> --- a/lib/Target/R600/SIISelLowering.cpp >> +++ b/lib/Target/R600/SIISelLowering.cpp >> @@ -1065,7 +1065,9 @@ static unsigned SubIdx2Lane(unsigned Idx) { >> void SITargetLowering::adjustWritemask(MachineSDNode *&Node, >> SelectionDAG &DAG) const { >> SDNode *Users[4] = { }; >> - unsigned Writemask = 0, Lane = 0; >> + unsigned Lane = 0; >> + unsigned OldDmask = Node->getConstantOperandVal(0); >> + unsigned NewDmask = 0; >> // Try to figure out the used register components >> for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end(); >> @@ -1076,29 +1078,42 @@ void >> SITargetLowering::adjustWritemask(MachineSDNode *&Node, >> I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) >> return; >> + /* Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used. >> + * Note that subregs are packed, i.e. Lane==0 is the first bit set >> + * in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second >> bit >> + * set, etc. */ >> Lane = SubIdx2Lane(I->getConstantOperandVal(1)); >> + // Set which texture component corresponds to the lane. >> + unsigned Comp; >> + for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) { >> + assert(Dmask); >> + Comp = ffs(Dmask)-1; >> + Dmask &= ~(1 << Comp); >> + } >> + >> // Abort if we have more than one user per component >> if (Users[Lane]) >> return; >> Users[Lane] = *I; >> - Writemask |= 1 << Lane; >> + NewDmask |= 1 << Comp; >> } >> - // Abort if all components are used >> - if (Writemask == 0xf) >> + // Abort if there's no change >> + if (NewDmask == OldDmask) >> return; >> // Adjust the writemask in the node >> std::vector<SDValue> Ops; >> - Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32)); >> + Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32)); >> for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) >> Ops.push_back(Node->getOperand(i)); >> Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), >> Ops.size()); >> // If we only got one lane, replace it with a copy >> - if (Writemask == (1U << Lane)) { >> + // (if NewDmask has only one bit set...) >> + if (NewDmask && (NewDmask & (NewDmask-1)) == 0) { >> SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, >> MVT::i32); >> SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, >> SDLoc(), >> Users[Lane]->getValueType(0), >> diff --git a/test/CodeGen/R600/llvm.SI.sample-masked.ll >> b/test/CodeGen/R600/llvm.SI.sample-masked.ll >> new file mode 100644 >> index 0000000..1b4cc4e >> --- /dev/null >> +++ b/test/CodeGen/R600/llvm.SI.sample-masked.ll >> @@ -0,0 +1,93 @@ >> +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s >> + >> +; CHECK: @v1 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 13 >> +define void @v1(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 0 >> + %3 = extractelement <4 x float> %1, i32 2 >> + %4 = extractelement <4 x float> %1, i32 3 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float >> %2, float %3, float %4, float %4) >> + ret void >> +} >> + >> +; CHECK: @v2 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 11 >> +define void @v2(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 0 >> + %3 = extractelement <4 x float> %1, i32 1 >> + %4 = extractelement <4 x float> %1, i32 3 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float >> %2, float %3, float %4, float %4) >> + ret void >> +} >> + >> +; CHECK: @v3 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 14 >> +define void @v3(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 1 >> + %3 = extractelement <4 x float> %1, i32 2 >> + %4 = extractelement <4 x float> %1, i32 3 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float >> %2, float %3, float %4, float %4) >> + ret void >> +} >> + >> +; CHECK: @v4 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 7 >> +define void @v4(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 0 >> + %3 = extractelement <4 x float> %1, i32 1 >> + %4 = extractelement <4 x float> %1, i32 2 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float >> %2, float %3, float %4, float %4) >> + ret void >> +} >> + >> +; CHECK: @v5 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 10 >> +define void @v5(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 1 >> + %3 = extractelement <4 x float> %1, i32 3 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float >> %2, float %3, float %3, float %3) >> + ret void >> +} >> + >> +; CHECK: @v6 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 6 >> +define void @v6(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 1 >> + %3 = extractelement <4 x float> %1, i32 2 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float >> %2, float %3, float %3, float %3) >> + ret void >> +} >> + >> +; CHECK: @v7 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 9 >> +define void @v7(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 0 >> + %3 = extractelement <4 x float> %1, i32 3 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float >> %2, float %3, float %3, float %3) >> + ret void >> +} >> + >> +declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x >> i8>, i32) readnone >> + >> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, >> float, float) > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev