Changes in directory llvm/lib/Target/X86:
X86ISelLowering.cpp updated: 1.100 -> 1.101 --- Log message: MEMSET / MEMCPY lowering bugs: we can't issue a single WORD / DWORD version of rep/stos and rep/mov if the count is not a constant. We could do rep/stosl; and $count, 3; rep/stosb For now, I will lower them to memset / memcpy calls. We will revisit this after a little bit experiment. Also need to take care of the trailing bytes even if the count is a constant. Since the max. number of trailing bytes are 3, we will simply issue loads / stores. --- Diffs of the changes: (+83 -28) X86ISelLowering.cpp | 111 ++++++++++++++++++++++++++++++++++++++-------------- 1 files changed, 83 insertions(+), 28 deletions(-) Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.100 llvm/lib/Target/X86/X86ISelLowering.cpp:1.101 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.100 Fri Mar 3 19:12:00 2006 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Fri Mar 3 20:48:56 2006 @@ -1696,7 +1696,7 @@ Op.getOperand(0), Op.getOperand(2), CC, Cond); } case ISD::MEMSET: { - SDOperand InFlag; + SDOperand InFlag(0, 0); SDOperand Chain = Op.getOperand(0); unsigned Align = (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue(); @@ -1705,7 +1705,7 @@ ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); // If not DWORD aligned, call memset if size is less than the threshold. // It knows how to align to the right boundary first. - if ((Align & 3) != 0 && + if ((Align & 3) != 0 || !(I && I->getValue() >= Subtarget->getMinRepStrSizeThreshold())) { MVT::ValueType IntPtr = getPointerTy(); const Type *IntPtrTy = getTargetData().getIntPtrType(); @@ -1723,7 +1723,9 @@ MVT::ValueType AVT; SDOperand Count; - if (ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2))) { + ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + unsigned BytesLeft = 0; + if (ValC) { unsigned ValReg; unsigned Val = ValC->getValue() & 255; @@ -1731,21 +1733,15 @@ switch (Align & 3) { case 2: // WORD aligned AVT = MVT::i16; - if (I) - Count = DAG.getConstant(I->getValue() / 2, MVT::i32); - else - Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), - DAG.getConstant(1, MVT::i8)); + Count = DAG.getConstant(I->getValue() / 2, MVT::i32); + BytesLeft = I->getValue() % 2; Val = (Val << 8) | Val; ValReg = X86::AX; break; case 0: // DWORD aligned AVT = MVT::i32; - if (I) - Count = DAG.getConstant(I->getValue() / 4, MVT::i32); - else - Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), - DAG.getConstant(2, MVT::i8)); + Count = DAG.getConstant(I->getValue() / 4, MVT::i32); + BytesLeft = I->getValue() % 4; Val = (Val << 8) | Val; Val = (Val << 16) | Val; ValReg = X86::EAX; @@ -1771,9 +1767,36 @@ InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); InFlag = Chain.getValue(1); + Chain = DAG.getNode(X86ISD::REP_STOS, MVT::Other, Chain, + DAG.getValueType(AVT), InFlag); + + if (BytesLeft) { + // Issue stores for the last 1 - 3 bytes. + SDOperand Value; + unsigned Val = ValC->getValue() & 255; + unsigned Offset = I->getValue() - BytesLeft; + SDOperand DstAddr = Op.getOperand(1); + MVT::ValueType AddrVT = DstAddr.getValueType(); + if (BytesLeft >= 2) { + Value = DAG.getConstant((Val << 8) | Val, MVT::i16); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, + DAG.getNode(ISD::ADD, AddrVT, DstAddr, + DAG.getConstant(Offset, AddrVT)), + DAG.getSrcValue(NULL)); + BytesLeft -= 2; + Offset += 2; + } + + if (BytesLeft == 1) { + Value = DAG.getConstant(Val, MVT::i8); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, + DAG.getNode(ISD::ADD, AddrVT, DstAddr, + DAG.getConstant(Offset, AddrVT)), + DAG.getSrcValue(NULL)); + } + } - return DAG.getNode(X86ISD::REP_STOS, MVT::Other, Chain, - DAG.getValueType(AVT), InFlag); + return Chain; } case ISD::MEMCPY: { SDOperand Chain = Op.getOperand(0); @@ -1784,7 +1807,7 @@ ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)); // If not DWORD aligned, call memcpy if size is less than the threshold. // It knows how to align to the right boundary first. - if ((Align & 3) != 0 && + if ((Align & 3) != 0 || !(I && I->getValue() >= Subtarget->getMinRepStrSizeThreshold())) { MVT::ValueType IntPtr = getPointerTy(); const Type *IntPtrTy = getTargetData().getIntPtrType(); @@ -1800,21 +1823,17 @@ MVT::ValueType AVT; SDOperand Count; + unsigned BytesLeft = 0; switch (Align & 3) { case 2: // WORD aligned AVT = MVT::i16; - if (I) - Count = DAG.getConstant(I->getValue() / 2, MVT::i32); - else - Count = DAG.getConstant(I->getValue() / 2, MVT::i32); + Count = DAG.getConstant(I->getValue() / 2, MVT::i32); + BytesLeft = I->getValue() % 2; break; case 0: // DWORD aligned AVT = MVT::i32; - if (I) - Count = DAG.getConstant(I->getValue() / 4, MVT::i32); - else - Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3), - DAG.getConstant(2, MVT::i8)); + Count = DAG.getConstant(I->getValue() / 4, MVT::i32); + BytesLeft = I->getValue() % 4; break; default: // Byte aligned AVT = MVT::i8; @@ -1822,16 +1841,52 @@ break; } - SDOperand InFlag; + SDOperand InFlag(0, 0); Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag); InFlag = Chain.getValue(1); + Chain = DAG.getNode(X86ISD::REP_MOVS, MVT::Other, Chain, + DAG.getValueType(AVT), InFlag); + + if (BytesLeft) { + // Issue loads and stores for the last 1 - 3 bytes. + unsigned Offset = I->getValue() - BytesLeft; + SDOperand DstAddr = Op.getOperand(1); + MVT::ValueType DstVT = DstAddr.getValueType(); + SDOperand SrcAddr = Op.getOperand(2); + MVT::ValueType SrcVT = SrcAddr.getValueType(); + SDOperand Value; + if (BytesLeft >= 2) { + Value = DAG.getLoad(MVT::i16, Chain, + DAG.getNode(ISD::ADD, SrcVT, SrcAddr, + DAG.getConstant(Offset, SrcVT)), + DAG.getSrcValue(NULL)); + Chain = Value.getValue(1); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, + DAG.getNode(ISD::ADD, DstVT, DstAddr, + DAG.getConstant(Offset, DstVT)), + DAG.getSrcValue(NULL)); + BytesLeft -= 2; + Offset += 2; + } + + if (BytesLeft == 1) { + Value = DAG.getLoad(MVT::i8, Chain, + DAG.getNode(ISD::ADD, SrcVT, SrcAddr, + DAG.getConstant(Offset, SrcVT)), + DAG.getSrcValue(NULL)); + Chain = Value.getValue(1); + Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, + DAG.getNode(ISD::ADD, DstVT, DstAddr, + DAG.getConstant(Offset, DstVT)), + DAG.getSrcValue(NULL)); + } + } - return DAG.getNode(X86ISD::REP_MOVS, MVT::Other, Chain, - DAG.getValueType(AVT), InFlag); + return Chain; } // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits