No, I don't have commit access. I haven't even asked for it yet. Marek
On Wed, Oct 9, 2013 at 8:01 PM, Tom Stellard <t...@stellard.net> wrote: > On Wed, Oct 09, 2013 at 03:33:37PM +0200, Marek Olšák wrote: >> From: Marek Olšák <marek.ol...@amd.com> >> >> This fixes piglit: >> - shaders/glsl-fs-texture2d-masked >> - shaders/glsl-fs-texture2d-masked-4 >> >> Signed-off-by: Marek Olšák <marek.ol...@amd.com> >> --- >> lib/Target/R600/SIISelLowering.cpp | 27 +++++++-- >> test/CodeGen/R600/llvm.SI.sample-masked.ll | 93 >> ++++++++++++++++++++++++++++++ >> 2 files changed, 114 insertions(+), 6 deletions(-) >> create mode 100644 test/CodeGen/R600/llvm.SI.sample-masked.ll >> >> diff --git a/lib/Target/R600/SIISelLowering.cpp >> b/lib/Target/R600/SIISelLowering.cpp >> index 2174753..891a51b 100644 >> --- a/lib/Target/R600/SIISelLowering.cpp >> +++ b/lib/Target/R600/SIISelLowering.cpp >> @@ -1065,7 +1065,9 @@ static unsigned SubIdx2Lane(unsigned Idx) { >> void SITargetLowering::adjustWritemask(MachineSDNode *&Node, >> SelectionDAG &DAG) const { >> SDNode *Users[4] = { }; >> - unsigned Writemask = 0, Lane = 0; >> + unsigned Lane = 0; >> + unsigned OldDmask = Node->getConstantOperandVal(0); >> + unsigned NewDmask = 0; >> >> // Try to figure out the used register components >> for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end(); >> @@ -1076,29 +1078,42 @@ void SITargetLowering::adjustWritemask(MachineSDNode >> *&Node, >> I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) >> return; >> >> + /* Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used. >> + * Note that subregs are packed, i.e. Lane==0 is the first bit set >> + * in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit >> + * set, etc. */ >> Lane = SubIdx2Lane(I->getConstantOperandVal(1)); >> >> + // Set which texture component corresponds to the lane. >> + unsigned Comp; >> + for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) { >> + assert(Dmask); >> + Comp = ffs(Dmask)-1; >> + Dmask &= ~(1 << Comp); >> + } >> + >> // Abort if we have more than one user per component >> if (Users[Lane]) >> return; >> >> Users[Lane] = *I; >> - Writemask |= 1 << Lane; >> + NewDmask |= 1 << Comp; >> } >> >> - // Abort if all components are used >> - if (Writemask == 0xf) >> + // Abort if there's no change >> + if (NewDmask == OldDmask) >> return; >> >> // Adjust the writemask in the node >> std::vector<SDValue> Ops; >> - Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32)); >> + Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32)); >> for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) >> Ops.push_back(Node->getOperand(i)); >> Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), >> Ops.size()); >> >> // If we only got one lane, replace it with a copy >> - if (Writemask == (1U << Lane)) { >> + // (if NewDmask has only one bit set...) >> + if (NewDmask && (NewDmask & (NewDmask-1)) == 0) { >> SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32); >> SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, >> SDLoc(), Users[Lane]->getValueType(0), >> diff --git a/test/CodeGen/R600/llvm.SI.sample-masked.ll >> b/test/CodeGen/R600/llvm.SI.sample-masked.ll >> new file mode 100644 >> index 0000000..1b4cc4e >> --- /dev/null >> +++ b/test/CodeGen/R600/llvm.SI.sample-masked.ll >> @@ -0,0 +1,93 @@ >> +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s >> + >> +; CHECK: @v1 > > I always forget to mention this, but we should use CHECK-LABEL: when > checking the function names. CHECK-LABEL: tells lit that a new test is > starting, so that if it fails. lit will recover and restart at the next > CHECK-LABEL. > > With that change the patch is: > > Reviewed-by: Tom Stellard <thomas.stell...@amd.com> > > Do you have commit access yet? > > -Tom > >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 13 >> +define void @v1(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 0 >> + %3 = extractelement <4 x float> %1, i32 2 >> + %4 = extractelement <4 x float> %1, i32 3 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, >> float %3, float %4, float %4) >> + ret void >> +} >> + >> +; CHECK: @v2 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 11 >> +define void @v2(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 0 >> + %3 = extractelement <4 x float> %1, i32 1 >> + %4 = extractelement <4 x float> %1, i32 3 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, >> float %3, float %4, float %4) >> + ret void >> +} >> + >> +; CHECK: @v3 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 14 >> +define void @v3(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 1 >> + %3 = extractelement <4 x float> %1, i32 2 >> + %4 = extractelement <4 x float> %1, i32 3 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, >> float %3, float %4, float %4) >> + ret void >> +} >> + >> +; CHECK: @v4 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 7 >> +define void @v4(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 0 >> + %3 = extractelement <4 x float> %1, i32 1 >> + %4 = extractelement <4 x float> %1, i32 2 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, >> float %3, float %4, float %4) >> + ret void >> +} >> + >> +; CHECK: @v5 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 10 >> +define void @v5(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 1 >> + %3 = extractelement <4 x float> %1, i32 3 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, >> float %3, float %3, float %3) >> + ret void >> +} >> + >> +; CHECK: @v6 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 6 >> +define void @v6(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 1 >> + %3 = extractelement <4 x float> %1, i32 2 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, >> float %3, float %3, float %3) >> + ret void >> +} >> + >> +; CHECK: @v7 >> +; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}, 9 >> +define void @v7(i32 %a1) { >> +entry: >> + %0 = insertelement <1 x i32> undef, i32 %a1, i32 0 >> + %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> >> undef, <16 x i8> undef, i32 0) >> + %2 = extractelement <4 x float> %1, i32 0 >> + %3 = extractelement <4 x float> %1, i32 3 >> + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, >> float %3, float %3, float %3) >> + ret void >> +} >> + >> +declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, >> i32) readnone >> + >> +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, >> float) >> -- >> 1.8.1.2 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev