Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.350 -> 1.351 --- Log message: rearrange code --- Diffs of the changes: (+441 -449) X86ISelLowering.cpp | 890 +++++++++++++++++++++++++--------------------------- 1 files changed, 441 insertions(+), 449 deletions(-) Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.350 llvm/lib/Target/X86/X86ISelLowering.cpp:1.351 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.350 Wed Feb 28 00:05:16 2007 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Wed Feb 28 00:10:12 2007 @@ -912,211 +912,283 @@ //===----------------------------------------------------------------------===// -// X86-64 C Calling Convention implementation +// FastCall Calling Convention implementation //===----------------------------------------------------------------------===// - - - +// +// The X86 'fastcall' calling convention passes up to two integer arguments in +// registers (an appropriate portion of ECX/EDX), passes arguments in C order, +// and requires that the callee pop its arguments off the stack (allowing proper +// tail calls), and has the same return value conventions as C calling convs. +// +// This calling convention always arranges for the callee pop value to be 8n+4 +// bytes, which is needed for tail recursion elimination and stack alignment +// reasons. SDOperand -X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { - unsigned NumArgs = Op.Val->getNumValues() - 1; +X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { + unsigned NumArgs = Op.Val->getNumValues()-1; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); SDOperand Root = Op.getOperand(0); - bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; + SmallVector<SDOperand, 8> ArgValues; + + // Add DAG nodes to load the arguments... On entry to a function the stack + // frame looks like this: + // + // [ESP] -- return address + // [ESP + 4] -- first nonreg argument (leftmost lexically) + // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size + // ... + unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot + + // Keep track of the number of integer regs passed so far. This can be either + // 0 (neither EAX/ECX or EDX used), 1 (EAX/ECX is used) or 2 (EAX/ECX and EDX + // are both used). + unsigned NumIntRegs = 0; + unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. - static const unsigned GPR64ArgRegs[] = { - X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 - }; static const unsigned XMMArgRegs[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, - X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 }; - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), - ArgLocs); - - for (unsigned i = 0; i != NumArgs; ++i) { - MVT::ValueType ArgVT = Op.getValue(i).getValueType(); - unsigned ArgFlags = cast<ConstantSDNode>(Op.getOperand(3+i))->getValue(); - if (CC_X86_64_C(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) - assert(0 && "Unhandled argument type!"); - } - - SmallVector<SDOperand, 8> ArgValues; - unsigned LastVal = ~0U; - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - // TODO: If an arg is passed in two places (e.g. reg and stack), skip later - // places. - assert(VA.getValNo() != LastVal && - "Don't support value assigned to multiple locs yet"); - LastVal = VA.getValNo(); + static const unsigned GPRArgRegs[][2] = { + { X86::CL, X86::DL }, + { X86::CX, X86::DX }, + { X86::ECX, X86::EDX } + }; + + static const TargetRegisterClass* GPRClasses[3] = { + X86::GR8RegisterClass, X86::GR16RegisterClass, X86::GR32RegisterClass + }; + + for (unsigned i = 0; i < NumArgs; ++i) { + MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); + unsigned ArgIncrement = 4; + unsigned ObjSize = 0; + unsigned ObjXMMRegs = 0; + unsigned ObjIntRegs = 0; + unsigned Reg = 0; + SDOperand ArgValue; + + HowToPassCallArgument(ObjectVT, + true, // Use as much registers as possible + NumIntRegs, NumXMMRegs, 2, + ObjSize, ObjIntRegs, ObjXMMRegs); - if (VA.isRegLoc()) { - MVT::ValueType RegVT = VA.getLocVT(); - TargetRegisterClass *RC; - if (RegVT == MVT::i32) - RC = X86::GR32RegisterClass; - else if (RegVT == MVT::i64) - RC = X86::GR64RegisterClass; - else if (RegVT == MVT::f32) - RC = X86::FR32RegisterClass; - else if (RegVT == MVT::f64) - RC = X86::FR64RegisterClass; - else { - assert(MVT::isVector(RegVT)); - RC = X86::VR128RegisterClass; + if (ObjSize > 4) + ArgIncrement = ObjSize; + + if (ObjIntRegs || ObjXMMRegs) { + switch (ObjectVT) { + default: assert(0 && "Unhandled argument type!"); + case MVT::i8: + case MVT::i16: + case MVT::i32: { + unsigned RegToUse = GPRArgRegs[ObjectVT-MVT::i8][NumIntRegs]; + Reg = AddLiveIn(MF, RegToUse, GPRClasses[ObjectVT-MVT::i8]); + ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); + break; } - - SDOperand ArgValue = DAG.getCopyFromReg(Root, VA.getLocReg(), RegVT); - AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); - - // If this is an 8 or 16-bit value, it is really passed promoted to 32 - // bits. Insert an assert[sz]ext to capture this, then truncate to the - // right size. - if (VA.getLocInfo() == CCValAssign::SExt) - ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, - DAG.getValueType(VA.getValVT())); - else if (VA.getLocInfo() == CCValAssign::ZExt) - ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, - DAG.getValueType(VA.getValVT())); - - if (VA.getLocInfo() != CCValAssign::Full) - ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); - - ArgValues.push_back(ArgValue); - } else { - assert(VA.isMemLoc()); - - // Create the nodes corresponding to a load from this parameter slot. - int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, - VA.getLocMemOffset()); - SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); - ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: { + Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass); + ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); + break; + } + } + NumIntRegs += ObjIntRegs; + NumXMMRegs += ObjXMMRegs; } - } - - unsigned StackSize = CCInfo.getNextStackOffset(); - - // If the function takes variable number of arguments, make a frame index for - // the start of the first vararg value... for expansion of llvm.va_start. - if (isVarArg) { - unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); - unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); - - // For X86-64, if there are vararg parameters that are passed via - // registers, then we must store them to their spots on the stack so they - // may be loaded by deferencing the result of va_next. - VarArgsGPOffset = NumIntRegs * 8; - VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; - VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); - RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); - - // Store the integer parameter registers. - SmallVector<SDOperand, 8> MemOps; - SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); - SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, - DAG.getConstant(VarArgsGPOffset, getPointerTy())); - for (; NumIntRegs != 6; ++NumIntRegs) { - unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], - X86::GR64RegisterClass); - SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); - SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, - DAG.getConstant(8, getPointerTy())); + if (ObjSize) { + // XMM arguments have to be aligned on 16-byte boundary. + if (ObjSize == 16) + ArgOffset = ((ArgOffset + 15) / 16) * 16; + // Create the SelectionDAG nodes corresponding to a load from this + // parameter. + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); + SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); + + ArgOffset += ArgIncrement; // Move on to the next argument. } - // Now store the XMM (fp + vector) parameter registers. - FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, - DAG.getConstant(VarArgsFPOffset, getPointerTy())); - for (; NumXMMRegs != 8; ++NumXMMRegs) { - unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], - X86::VR128RegisterClass); - SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); - SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, - DAG.getConstant(16, getPointerTy())); - } - if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, MVT::Other, - &MemOps[0], MemOps.size()); + ArgValues.push_back(ArgValue); } ArgValues.push_back(Root); - ReturnAddrIndex = 0; // No return address slot generated yet. - BytesToPopOnReturn = 0; // Callee pops nothing. - BytesCallerReserves = StackSize; + // Make sure the instruction takes 8n+4 bytes to make sure the start of the + // arguments and the arguments after the retaddr has been pushed are aligned. + if ((ArgOffset & 7) == 0) + ArgOffset += 4; + + VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. + RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. + ReturnAddrIndex = 0; // No return address slot generated yet. + BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. + BytesCallerReserves = 0; + + MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn); + + // Finally, inform the code generator which regs we return values in. + switch (getValueType(MF.getFunction()->getReturnType())) { + default: assert(0 && "Unknown type!"); + case MVT::isVoid: break; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + MF.addLiveOut(X86::EAX); + break; + case MVT::i64: + MF.addLiveOut(X86::EAX); + MF.addLiveOut(X86::EDX); + break; + case MVT::f32: + case MVT::f64: + MF.addLiveOut(X86::ST0); + break; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + MF.addLiveOut(X86::XMM0); + break; + } // Return the new list of results. return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); } -SDOperand -X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, - unsigned CC) { +SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, + unsigned CC) { SDOperand Chain = Op.getOperand(0); - bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; SDOperand Callee = Op.getOperand(4); unsigned NumOps = (Op.getNumOperands() - 5) / 2; - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CC, getTargetMachine(), ArgLocs); + // Count how many bytes are to be pushed on the stack. + unsigned NumBytes = 0; + + // Keep track of the number of integer regs passed so far. This can be either + // 0 (neither EAX/ECX or EDX used), 1 (EAX/ECX is used) or 2 (EAX/ECX and EDX + // are both used). + unsigned NumIntRegs = 0; + unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. + + static const unsigned GPRArgRegs[][2] = { + { X86::CL, X86::DL }, + { X86::CX, X86::DX }, + { X86::ECX, X86::EDX } + }; + static const unsigned XMMArgRegs[] = { + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 + }; for (unsigned i = 0; i != NumOps; ++i) { - MVT::ValueType ArgVT = Op.getOperand(5+2*i).getValueType(); - unsigned ArgFlags =cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue(); - if (CC_X86_64_C(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) - assert(0 && "Unhandled argument type!"); + SDOperand Arg = Op.getOperand(5+2*i); + + switch (Arg.getValueType()) { + default: assert(0 && "Unknown value type!"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + if (NumIntRegs < 2) { + ++NumIntRegs; + break; + } // Fall through + case MVT::f32: + NumBytes += 4; + break; + case MVT::f64: + NumBytes += 8; + break; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + if (NumXMMRegs < 4) + NumXMMRegs++; + else { + // XMM arguments have to be aligned on 16-byte boundary. + NumBytes = ((NumBytes + 15) / 16) * 16; + NumBytes += 16; + } + break; + } } - - // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); + + // Make sure the instruction takes 8n+4 bytes to make sure the start of the + // arguments and the arguments after the retaddr has been pushed are aligned. + if ((NumBytes & 7) == 0) + NumBytes += 4; + Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); + // Arguments go on the stack in reverse order, as specified by the ABI. + unsigned ArgOffset = 0; + NumIntRegs = 0; SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; SmallVector<SDOperand, 8> MemOpChains; + SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); + for (unsigned i = 0; i != NumOps; ++i) { + SDOperand Arg = Op.getOperand(5+2*i); - SDOperand StackPtr; - - // Walk the register/memloc assignments, inserting copies/loads. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); - - // Promote the value if needed. - switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); - break; - case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); - break; - case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); + switch (Arg.getValueType()) { + default: assert(0 && "Unexpected ValueType for argument!"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + if (NumIntRegs < 2) { + unsigned RegToUse = + GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs]; + RegsToPass.push_back(std::make_pair(RegToUse, Arg)); + ++NumIntRegs; + break; + } // Fall through + case MVT::f32: { + SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); + PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + ArgOffset += 4; break; } - - if (VA.isRegLoc()) { - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { - assert(VA.isMemLoc()); - if (StackPtr.Val == 0) - StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); - SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); + case MVT::f64: { + SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + ArgOffset += 8; + break; + } + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + if (NumXMMRegs < 4) { + RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); + NumXMMRegs++; + } else { + // XMM arguments have to be aligned on 16-byte boundary. + ArgOffset = ((ArgOffset + 15) / 16) * 16; + SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); + PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + ArgOffset += 16; + } + break; } } - + if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOpChains[0], MemOpChains.size()); @@ -1130,27 +1202,6 @@ InFlag = Chain.getValue(1); } - if (isVarArg) { - // From AMD64 ABI document: - // For calls that may call functions that use varargs or stdargs - // (prototype-less calls or calls to functions containing ellipsis (...) in - // the declaration) %al is used as hidden argument to specify the number - // of SSE registers used. The contents of %al do not need to match exactly - // the number of registers, but must be an ubound on the number of SSE - // registers used and is in the range 0 - 8 inclusive. - - // Count the number of XMM registers allocated. - static const unsigned XMMArgRegs[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, - X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 - }; - unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); - - Chain = DAG.getCopyToReg(Chain, X86::AL, - DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); - InFlag = Chain.getValue(1); - } - // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { @@ -1162,6 +1213,16 @@ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + // ELF / PIC requires GOT in the EBX register before function calls via PLT + // GOT pointer. + if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT()) { + Chain = DAG.getCopyToReg(Chain, X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), + InFlag); + InFlag = Chain.getValue(1); + } + // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); SmallVector<SDOperand, 8> Ops; @@ -1174,6 +1235,11 @@ Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Add an implicit use GOT pointer in EBX. + if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT()) + Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); + if (InFlag.Val) Ops.push_back(InFlag); @@ -1187,307 +1253,221 @@ Ops.clear(); Ops.push_back(Chain); Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); - Ops.push_back(DAG.getConstant(0, getPointerTy())); + Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); Ops.push_back(InFlag); Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); - + // Handle result values, copying them out of physregs into vregs that we // return. return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); } + //===----------------------------------------------------------------------===// -// Fast & FastCall Calling Convention implementation -//===----------------------------------------------------------------------===// -// -// The X86 'fast' calling convention passes up to two integer arguments in -// registers (an appropriate portion of EAX/EDX), passes arguments in C order, -// and requires that the callee pop its arguments off the stack (allowing proper -// tail calls), and has the same return value conventions as C calling convs. -// -// This calling convention always arranges for the callee pop value to be 8n+4 -// bytes, which is needed for tail recursion elimination and stack alignment -// reasons. -// -// Note that this can be enhanced in the future to pass fp vals in registers -// (when we have a global fp allocator) and do other tricks. -// +// X86-64 C Calling Convention implementation //===----------------------------------------------------------------------===// -// The X86 'fastcall' calling convention passes up to two integer arguments in -// registers (an appropriate portion of ECX/EDX), passes arguments in C order, -// and requires that the callee pop its arguments off the stack (allowing proper -// tail calls), and has the same return value conventions as C calling convs. -// -// This calling convention always arranges for the callee pop value to be 8n+4 -// bytes, which is needed for tail recursion elimination and stack alignment -// reasons. + SDOperand -X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { - unsigned NumArgs = Op.Val->getNumValues()-1; +X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { + unsigned NumArgs = Op.Val->getNumValues() - 1; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); SDOperand Root = Op.getOperand(0); - SmallVector<SDOperand, 8> ArgValues; - - // Add DAG nodes to load the arguments... On entry to a function the stack - // frame looks like this: - // - // [ESP] -- return address - // [ESP + 4] -- first nonreg argument (leftmost lexically) - // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size - // ... - unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot - - // Keep track of the number of integer regs passed so far. This can be either - // 0 (neither EAX/ECX or EDX used), 1 (EAX/ECX is used) or 2 (EAX/ECX and EDX - // are both used). - unsigned NumIntRegs = 0; - unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. - - static const unsigned XMMArgRegs[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 - }; + bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; - static const unsigned GPRArgRegs[][2] = { - { X86::CL, X86::DL }, - { X86::CX, X86::DX }, - { X86::ECX, X86::EDX } + static const unsigned GPR64ArgRegs[] = { + X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 }; - - static const TargetRegisterClass* GPRClasses[3] = { - X86::GR8RegisterClass, X86::GR16RegisterClass, X86::GR32RegisterClass + static const unsigned XMMArgRegs[] = { + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, + X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; - for (unsigned i = 0; i < NumArgs; ++i) { - MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); - unsigned ArgIncrement = 4; - unsigned ObjSize = 0; - unsigned ObjXMMRegs = 0; - unsigned ObjIntRegs = 0; - unsigned Reg = 0; - SDOperand ArgValue; - - HowToPassCallArgument(ObjectVT, - true, // Use as much registers as possible - NumIntRegs, NumXMMRegs, 2, - ObjSize, ObjIntRegs, ObjXMMRegs); + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), + ArgLocs); + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT::ValueType ArgVT = Op.getValue(i).getValueType(); + unsigned ArgFlags = cast<ConstantSDNode>(Op.getOperand(3+i))->getValue(); + if (CC_X86_64_C(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) + assert(0 && "Unhandled argument type!"); + } + + SmallVector<SDOperand, 8> ArgValues; + unsigned LastVal = ~0U; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + // TODO: If an arg is passed in two places (e.g. reg and stack), skip later + // places. + assert(VA.getValNo() != LastVal && + "Don't support value assigned to multiple locs yet"); + LastVal = VA.getValNo(); - if (ObjSize > 4) - ArgIncrement = ObjSize; - - if (ObjIntRegs || ObjXMMRegs) { - switch (ObjectVT) { - default: assert(0 && "Unhandled argument type!"); - case MVT::i8: - case MVT::i16: - case MVT::i32: { - unsigned RegToUse = GPRArgRegs[ObjectVT-MVT::i8][NumIntRegs]; - Reg = AddLiveIn(MF, RegToUse, GPRClasses[ObjectVT-MVT::i8]); - ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); - break; - } - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: { - Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass); - ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); - break; - } + if (VA.isRegLoc()) { + MVT::ValueType RegVT = VA.getLocVT(); + TargetRegisterClass *RC; + if (RegVT == MVT::i32) + RC = X86::GR32RegisterClass; + else if (RegVT == MVT::i64) + RC = X86::GR64RegisterClass; + else if (RegVT == MVT::f32) + RC = X86::FR32RegisterClass; + else if (RegVT == MVT::f64) + RC = X86::FR64RegisterClass; + else { + assert(MVT::isVector(RegVT)); + RC = X86::VR128RegisterClass; } - NumIntRegs += ObjIntRegs; - NumXMMRegs += ObjXMMRegs; - } - if (ObjSize) { - // XMM arguments have to be aligned on 16-byte boundary. - if (ObjSize == 16) - ArgOffset = ((ArgOffset + 15) / 16) * 16; - // Create the SelectionDAG nodes corresponding to a load from this - // parameter. - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); - SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); - ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); - ArgOffset += ArgIncrement; // Move on to the next argument. + SDOperand ArgValue = DAG.getCopyFromReg(Root, VA.getLocReg(), RegVT); + AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); + + // If this is an 8 or 16-bit value, it is really passed promoted to 32 + // bits. Insert an assert[sz]ext to capture this, then truncate to the + // right size. + if (VA.getLocInfo() == CCValAssign::SExt) + ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + + if (VA.getLocInfo() != CCValAssign::Full) + ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); + + ArgValues.push_back(ArgValue); + } else { + assert(VA.isMemLoc()); + + // Create the nodes corresponding to a load from this parameter slot. + int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, + VA.getLocMemOffset()); + SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); + } + } + + unsigned StackSize = CCInfo.getNextStackOffset(); + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); + unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); + + // For X86-64, if there are vararg parameters that are passed via + // registers, then we must store them to their spots on the stack so they + // may be loaded by deferencing the result of va_next. + VarArgsGPOffset = NumIntRegs * 8; + VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16; + VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); + RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16); + + // Store the integer parameter registers. + SmallVector<SDOperand, 8> MemOps; + SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); + SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, + DAG.getConstant(VarArgsGPOffset, getPointerTy())); + for (; NumIntRegs != 6; ++NumIntRegs) { + unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs], + X86::GR64RegisterClass); + SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64); + SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, + DAG.getConstant(8, getPointerTy())); } - ArgValues.push_back(ArgValue); + // Now store the XMM (fp + vector) parameter registers. + FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN, + DAG.getConstant(VarArgsFPOffset, getPointerTy())); + for (; NumXMMRegs != 8; ++NumXMMRegs) { + unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], + X86::VR128RegisterClass); + SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32); + SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, + DAG.getConstant(16, getPointerTy())); + } + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOps[0], MemOps.size()); } ArgValues.push_back(Root); - // Make sure the instruction takes 8n+4 bytes to make sure the start of the - // arguments and the arguments after the retaddr has been pushed are aligned. - if ((ArgOffset & 7) == 0) - ArgOffset += 4; - - VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. - RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. - ReturnAddrIndex = 0; // No return address slot generated yet. - BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. - BytesCallerReserves = 0; - - MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn); - - // Finally, inform the code generator which regs we return values in. - switch (getValueType(MF.getFunction()->getReturnType())) { - default: assert(0 && "Unknown type!"); - case MVT::isVoid: break; - case MVT::i1: - case MVT::i8: - case MVT::i16: - case MVT::i32: - MF.addLiveOut(X86::EAX); - break; - case MVT::i64: - MF.addLiveOut(X86::EAX); - MF.addLiveOut(X86::EDX); - break; - case MVT::f32: - case MVT::f64: - MF.addLiveOut(X86::ST0); - break; - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - MF.addLiveOut(X86::XMM0); - break; - } + ReturnAddrIndex = 0; // No return address slot generated yet. + BytesToPopOnReturn = 0; // Callee pops nothing. + BytesCallerReserves = StackSize; // Return the new list of results. return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), &ArgValues[0], ArgValues.size()).getValue(Op.ResNo); } -SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, - unsigned CC) { +SDOperand +X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, + unsigned CC) { SDOperand Chain = Op.getOperand(0); + bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0; SDOperand Callee = Op.getOperand(4); unsigned NumOps = (Op.getNumOperands() - 5) / 2; - // Count how many bytes are to be pushed on the stack. - unsigned NumBytes = 0; - - // Keep track of the number of integer regs passed so far. This can be either - // 0 (neither EAX/ECX or EDX used), 1 (EAX/ECX is used) or 2 (EAX/ECX and EDX - // are both used). - unsigned NumIntRegs = 0; - unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. - - static const unsigned GPRArgRegs[][2] = { - { X86::CL, X86::DL }, - { X86::CX, X86::DX }, - { X86::ECX, X86::EDX } - }; - static const unsigned XMMArgRegs[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 - }; + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CC, getTargetMachine(), ArgLocs); for (unsigned i = 0; i != NumOps; ++i) { - SDOperand Arg = Op.getOperand(5+2*i); - - switch (Arg.getValueType()) { - default: assert(0 && "Unknown value type!"); - case MVT::i8: - case MVT::i16: - case MVT::i32: - if (NumIntRegs < 2) { - ++NumIntRegs; - break; - } // Fall through - case MVT::f32: - NumBytes += 4; - break; - case MVT::f64: - NumBytes += 8; - break; - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - if (NumXMMRegs < 4) - NumXMMRegs++; - else { - // XMM arguments have to be aligned on 16-byte boundary. - NumBytes = ((NumBytes + 15) / 16) * 16; - NumBytes += 16; - } - break; - } + MVT::ValueType ArgVT = Op.getOperand(5+2*i).getValueType(); + unsigned ArgFlags =cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue(); + if (CC_X86_64_C(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) + assert(0 && "Unhandled argument type!"); } - - // Make sure the instruction takes 8n+4 bytes to make sure the start of the - // arguments and the arguments after the retaddr has been pushed are aligned. - if ((NumBytes & 7) == 0) - NumBytes += 4; - + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); - // Arguments go on the stack in reverse order, as specified by the ABI. - unsigned ArgOffset = 0; - NumIntRegs = 0; SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass; SmallVector<SDOperand, 8> MemOpChains; - SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); - for (unsigned i = 0; i != NumOps; ++i) { - SDOperand Arg = Op.getOperand(5+2*i); - switch (Arg.getValueType()) { - default: assert(0 && "Unexpected ValueType for argument!"); - case MVT::i8: - case MVT::i16: - case MVT::i32: - if (NumIntRegs < 2) { - unsigned RegToUse = - GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs]; - RegsToPass.push_back(std::make_pair(RegToUse, Arg)); - ++NumIntRegs; - break; - } // Fall through - case MVT::f32: { - SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); - PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); - ArgOffset += 4; + SDOperand StackPtr; + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); break; } - case MVT::f64: { - SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + if (StackPtr.Val == 0) + StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); + SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); - ArgOffset += 8; - break; - } - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - if (NumXMMRegs < 4) { - RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); - NumXMMRegs++; - } else { - // XMM arguments have to be aligned on 16-byte boundary. - ArgOffset = ((ArgOffset + 15) / 16) * 16; - SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); - PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); - ArgOffset += 16; - } - break; } } - + if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOpChains[0], MemOpChains.size()); @@ -1501,6 +1481,27 @@ InFlag = Chain.getValue(1); } + if (isVarArg) { + // From AMD64 ABI document: + // For calls that may call functions that use varargs or stdargs + // (prototype-less calls or calls to functions containing ellipsis (...) in + // the declaration) %al is used as hidden argument to specify the number + // of SSE registers used. The contents of %al do not need to match exactly + // the number of registers, but must be an ubound on the number of SSE + // registers used and is in the range 0 - 8 inclusive. + + // Count the number of XMM registers allocated. + static const unsigned XMMArgRegs[] = { + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, + X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 + }; + unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); + + Chain = DAG.getCopyToReg(Chain, X86::AL, + DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); + InFlag = Chain.getValue(1); + } + // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { @@ -1512,16 +1513,6 @@ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); - // ELF / PIC requires GOT in the EBX register before function calls via PLT - // GOT pointer. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT()) { - Chain = DAG.getCopyToReg(Chain, X86::EBX, - DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), - InFlag); - InFlag = Chain.getValue(1); - } - // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); SmallVector<SDOperand, 8> Ops; @@ -1534,11 +1525,6 @@ Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - // Add an implicit use GOT pointer in EBX. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT()) - Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); - if (InFlag.Val) Ops.push_back(InFlag); @@ -1552,16 +1538,22 @@ Ops.clear(); Ops.push_back(Chain); Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); - Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); + Ops.push_back(DAG.getConstant(0, getPointerTy())); Ops.push_back(InFlag); Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); - + // Handle result values, copying them out of physregs into vregs that we // return. return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); } + +//===----------------------------------------------------------------------===// +// Other Lowering Hooks +//===----------------------------------------------------------------------===// + + SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits