Pushed, thanks. -Tom
On Tue, Oct 22, 2013 at 02:15:01AM +0200, Marek Ol????k wrote: > From: Marek Ol????k <marek.ol...@amd.com> > > This fixes piglit: > - shaders/glsl-fs-texture2d-masked > - shaders/glsl-fs-texture2d-masked-4 > > Signed-off-by: Marek Ol????k <marek.ol...@amd.com> > Reviewed-by: Tom Stellard <thomas.stell...@amd.com> > --- > lib/Target/R600/SIISelLowering.cpp | 27 +++++++-- > test/CodeGen/R600/llvm.SI.sample-masked.ll | 93 > ++++++++++++++++++++++++++++++ > 2 files changed, 114 insertions(+), 6 deletions(-) > create mode 100644 test/CodeGen/R600/llvm.SI.sample-masked.ll > > diff --git a/lib/Target/R600/SIISelLowering.cpp > b/lib/Target/R600/SIISelLowering.cpp > index 2c9270e..bfc9e8d 100644 > --- a/lib/Target/R600/SIISelLowering.cpp > +++ b/lib/Target/R600/SIISelLowering.cpp > @@ -1065,7 +1065,9 @@ static unsigned SubIdx2Lane(unsigned Idx) { > void SITargetLowering::adjustWritemask(MachineSDNode *&Node, > SelectionDAG &DAG) const { > SDNode *Users[4] = { }; > - unsigned Writemask = 0, Lane = 0; > + unsigned Lane = 0; > + unsigned OldDmask = Node->getConstantOperandVal(0); > + unsigned NewDmask = 0; > > // Try to figure out the used register components > for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end(); > @@ -1076,29 +1078,42 @@ void SITargetLowering::adjustWritemask(MachineSDNode > *&Node, > I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) > return; > > + // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used. > + // Note that subregs are packed, i.e. Lane==0 is the first bit set > + // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit > + // set, etc. > Lane = SubIdx2Lane(I->getConstantOperandVal(1)); > > + // Set which texture component corresponds to the lane. > + unsigned Comp; > + for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) { > + assert(Dmask); > + Comp = ffs(Dmask)-1; > + Dmask &= ~(1 << Comp); > + } > + > // Abort if we have more than one user per component > if (Users[Lane]) > return; > > Users[Lane] = *I; > - Writemask |= 1 << Lane; > + NewDmask |= 1 << Comp; > } > > - // Abort if all components are used > - if (Writemask == 0xf) > + // Abort if there's no change > + if (NewDmask == OldDmask) > return; > > // Adjust the writemask in the node > std::vector<SDValue> Ops; > - Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32)); > + Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32)); > for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) > Ops.push_back(Node->getOperand(i)); > Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), > Ops.size()); > > // If we only got one lane, replace it with a copy > - if (Writemask == (1U << Lane)) { > + // (if NewDmask has only one bit set...) > + if (NewDmask && (NewDmask & (NewDmask-1)) == 0) { > SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32); > SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, > SDLoc(), Users[Lane]->getValueType(0), > diff --git a/test/CodeGen/R600/llvm.SI.sample-masked.ll > b/test/CodeGen/R600/llvm.SI.sample-masked.ll > new file mode 100644 > index 0000000..454e48b > --- /dev/null > +++ b/test/CodeGen/R600/llvm.SI.sample-masked.ll > @@ -0,0 +1,93 @@ > +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s > + > +; CHECK-LABEL: @v1 > +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 13 > +define void @v1(i32 %a1) { > +entry: > + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 > + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, > <16 x i8> undef, i32 0) > + %2 = extractelement <4 x float> %1, i32 0 > + %3 = extractelement <4 x float> %1, i32 2 > + %4 = extractelement <4 x float> %1, i32 3 > + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, > float %3, float %4, float %4) > + ret void > +} > + > +; CHECK-LABEL: @v2 > +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 11 > +define void @v2(i32 %a1) { > +entry: > + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 > + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, > <16 x i8> undef, i32 0) > + %2 = extractelement <4 x float> %1, i32 0 > + %3 = extractelement <4 x float> %1, i32 1 > + %4 = extractelement <4 x float> %1, i32 3 > + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, > float %3, float %4, float %4) > + ret void > +} > + > +; CHECK-LABEL: @v3 > +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 14 > +define void @v3(i32 %a1) { > +entry: > + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 > + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, > <16 x i8> undef, i32 0) > + %2 = extractelement <4 x float> %1, i32 1 > + %3 = extractelement <4 x float> %1, i32 2 > + %4 = extractelement <4 x float> %1, i32 3 > + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, > float %3, float %4, float %4) > + ret void > +} > + > +; CHECK-LABEL: @v4 > +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 7 > +define void @v4(i32 %a1) { > +entry: > + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 > + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, > <16 x i8> undef, i32 0) > + %2 = extractelement <4 x float> %1, i32 0 > + %3 = extractelement <4 x float> %1, i32 1 > + %4 = extractelement <4 x float> %1, i32 2 > + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, > float %3, float %4, float %4) > + ret void > +} > + > +; CHECK-LABEL: @v5 > +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 10 > +define void @v5(i32 %a1) { > +entry: > + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 > + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, > <16 x i8> undef, i32 0) > + %2 = extractelement <4 x float> %1, i32 1 > + %3 = extractelement <4 x float> %1, i32 3 > + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, > float %3, float %3, float %3) > + ret void > +} > + > +; CHECK-LABEL: @v6 > +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 6 > +define void @v6(i32 %a1) { > +entry: > + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 > + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, > <16 x i8> undef, i32 0) > + %2 = extractelement <4 x float> %1, i32 1 > + %3 = extractelement <4 x float> %1, i32 2 > + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, > float %3, float %3, float %3) > + ret void > +} > + > +; CHECK-LABEL: @v7 > +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 9 > +define void @v7(i32 %a1) { > +entry: > + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 > + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, > <16 x i8> undef, i32 0) > + %2 = extractelement <4 x float> %1, i32 0 > + %3 = extractelement <4 x float> %1, i32 3 > + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, > float %3, float %3, float %3) > + ret void > +} > + > +declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, > i32) readnone > + > +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, > float) > -- > 1.8.1.2 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev