--- lib/Target/AMDGPU/R600ISelLowering.cpp | 51 +++++++++++++++++++++++---- lib/Target/AMDGPU/R600Instructions.td | 5 +++ lib/Target/AMDGPU/R600Intrinsics.td | 4 +++ lib/Target/AMDGPU/R600MachineFunctionInfo.cpp | 1 + lib/Target/AMDGPU/R600MachineFunctionInfo.h | 1 + 5 files changed, 56 insertions(+), 6 deletions(-)
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 6c594cc..5b9545e 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -350,8 +350,27 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( } case AMDGPU::EG_ExportSwz: case AMDGPU::R600_ExportSwz: { + // Instruction is left unmodified if its not the last one of its type + bool isLastInstructionOfItsType; + { + isLastInstructionOfItsType = true; + unsigned InstExportType = MI->getOperand(1).getImm(); + for (MachineBasicBlock::iterator NextExportInst = llvm::next(I), + EndBlock = BB->end(); NextExportInst != EndBlock; + NextExportInst = llvm::next(NextExportInst)) { + if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || + NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { + unsigned CurrentInstExportType = NextExportInst->getOperand(1) + .getImm(); + if (CurrentInstExportType == InstExportType) { + isLastInstructionOfItsType = false; + break; + } + } + } + } bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; - if (!EOP) + if (!EOP && !isLastInstructionOfItsType) return BB; unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) @@ -363,7 +382,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( .addOperand(MI->getOperand(5)) .addOperand(MI->getOperand(6)) .addImm(CfInst) - .addImm(1); + .addImm(EOP); break; } } @@ -382,7 +401,7 @@ using namespace llvm::AMDGPUIntrinsic; static SDValue InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap, unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type, - SDValue Scalar, SDValue Chain) { + unsigned ArrayBaseOffset, SDValue Scalar, SDValue Chain) { if (!ExportMap[Slot]) { SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4f32, @@ -420,7 +439,7 @@ InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap, const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector, DAG.getConstant(Inst, MVT::i32), DAG.getConstant(Type, MVT::i32), - DAG.getConstant(Slot, MVT::i32), + DAG.getConstant(Slot + ArrayBaseOffset, MVT::i32), DAG.getConstant(Mask, MVT::i32)}; DAG.UpdateNodeOperands(ExportInstruction, @@ -464,7 +483,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const SDNode **OutputsMap = MFI->Outputs; return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, - RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2), + RegIndex / 4, RegIndex % 4, 0, 0, 0, Op.getOperand(2), Chain); } @@ -498,7 +517,27 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const } return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, - RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2), + RegIndex / 4, RegIndex % 4, Inst, 0, 0, Op.getOperand(2), + Chain); + } + case AMDGPUIntrinsic::R600_store_vertex_position: { + MachineFunction &MF = DAG.getMachineFunction(); + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + + SDNode **OutputsMap = MFI->Outputs; + return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, + RegIndex / 4, RegIndex % 4, 0, 1, 60, Op.getOperand(2), + Chain); + } + case AMDGPUIntrinsic::R600_store_vertex_param: { + MachineFunction &MF = DAG.getMachineFunction(); + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + + SDNode **OutputsMap = MFI->VertexParamOutputs; + return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, + RegIndex / 4, RegIndex % 4, 0, 2, 0, Op.getOperand(2), Chain); } // default for switch(IntrinsicID) diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index c3ffe97..3834df5 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -545,6 +545,11 @@ multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, 0, 1, 2, 3, cf_inst, 0) >; + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), + (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 0, 1, 2, 3, cf_inst, 0) + >; } multiclass SteamOutputExportPattern<Instruction ExportInst, diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td index 0186f9d..069fa07 100644 --- a/lib/Target/AMDGPU/R600Intrinsics.td +++ b/lib/Target/AMDGPU/R600Intrinsics.td @@ -21,6 +21,10 @@ let TargetPrefix = "R600", isTarget = 1 in { Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; def int_R600_clipvertex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_R600_store_vertex_position : + Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; + def int_R600_store_vertex_param : + Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; def int_R600_store_stream_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_R600_store_pixel_color : diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp index 93b4608..ecc98ec 100644 --- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp @@ -19,6 +19,7 @@ R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) HasPerspectiveInterpolation(false) { memset(Outputs, 0, sizeof(Outputs)); memset(StreamOutputs, 0, sizeof(StreamOutputs)); + memset(VertexParamOutputs, 0, sizeof(StreamOutputs)); } unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const { diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h index 6cc875f..f7cede3 100644 --- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h @@ -26,6 +26,7 @@ public: R600MachineFunctionInfo(const MachineFunction &MF); std::vector<unsigned> ReservedRegs; SDNode *Outputs[16]; + SDNode *VertexParamOutputs[16]; SDNode *StreamOutputs[64][4]; BitVector IndirectChannels; bool HasLinearInterpolation; -- 1.8.0.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev