vec_shuffle.ll

Evan Cheng Thu, 26 Jul 2007 18:38:39 -0700

Hi Dan,

I am going to revert the patch for now. Please re-commit once you  
fixed it.


Thanks,

Evan

On Jul 26, 2007, at 5:05 PM, Evan Cheng wrote:

> Hi Dan,
>
> This is breaking oggenc (at least on Mac OS X / x86). Can you look
> into it?
>
> Thanks,
>
> Evan
>
> On Jul 25, 2007, at 5:31 PM, Dan Gohman wrote:
>
>> Author: djg
>> Date: Wed Jul 25 19:31:09 2007
>> New Revision: 40504
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=40504&view=rev
>> Log:
>> Remove X86ISD::LOAD_PACK and X86ISD::LOAD_UA and associated code
>> from the
>> x86 target, replacing them with the new alignment attributes on  
>> memory
>> references.
>>
>> Modified:
>>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>>     llvm/trunk/lib/Target/X86/X86ISelLowering.h
>>     llvm/trunk/lib/Target/X86/X86InstrSSE.td
>>     llvm/trunk/test/CodeGen/X86/vec_shuffle.ll
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/
>> X86ISelLowering.cpp?rev=40504&r1=40503&r2=40504&view=diff
>>
>> ===================================================================== 
>> =
>> ========
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jul 25
>> 19:31:09 2007
>> @@ -3367,14 +3367,10 @@
>>      CV.push_back(C);
>>      CV.push_back(C);
>>    }
>> -  Constant *CS = ConstantStruct::get(CV);
>> -  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
>> -  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
>> -  SmallVector<SDOperand, 3> Ops;
>> -  Ops.push_back(DAG.getEntryNode());
>> -  Ops.push_back(CPIdx);
>> -  Ops.push_back(DAG.getSrcValue(NULL));
>> -  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0],
>> Ops.size());
>> +  Constant *C = ConstantVector::get(CV);
>> +  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
>> +  SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
>> NULL, 0,
>> +                               false, 16);
>>    return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
>>  }
>>
>> @@ -3399,21 +3395,16 @@
>>      CV.push_back(C);
>>      CV.push_back(C);
>>    }
>> -  Constant *CS = ConstantStruct::get(CV);
>> -  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
>> +  Constant *C = ConstantVector::get(CV);
>> +  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
>> +  SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
>> NULL, 0,
>> +                               false, 16);
>>    if (MVT::isVector(VT)) {
>> -    SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
>> NULL, 0);
>>      return DAG.getNode(ISD::BIT_CONVERT, VT,
>>                         DAG.getNode(ISD::XOR, MVT::v2i64,
>>                      DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64,
>> Op.getOperand(0)),
>>                      DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64,
>> Mask)));
>>    } else {
>> -    SDVTList Tys = DAG.getVTList(VT, MVT::Other);
>> -    SmallVector<SDOperand, 3> Ops;
>> -    Ops.push_back(DAG.getEntryNode());
>> -    Ops.push_back(CPIdx);
>> -    Ops.push_back(DAG.getSrcValue(NULL));
>> -    SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0],
>> Ops.size());
>>      return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
>>    }
>>  }
>> @@ -3442,14 +3433,10 @@
>>      CV.push_back(ConstantFP::get(SrcTy, 0.0));
>>      CV.push_back(ConstantFP::get(SrcTy, 0.0));
>>    }
>> -  Constant *CS = ConstantStruct::get(CV);
>> -  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
>> -  SDVTList Tys = DAG.getVTList(SrcVT, MVT::Other);
>> -  SmallVector<SDOperand, 3> Ops;
>> -  Ops.push_back(DAG.getEntryNode());
>> -  Ops.push_back(CPIdx);
>> -  Ops.push_back(DAG.getSrcValue(NULL));
>> -  SDOperand Mask1 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0],
>> Ops.size());
>> +  Constant *C = ConstantVector::get(CV);
>> +  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
>> +  SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx,
>> NULL, 0,
>> +                                false, 16);
>>    SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
>>
>>    // Shift sign bit right or left if the two operands have
>> different types.
>> @@ -3474,14 +3461,10 @@
>>      CV.push_back(ConstantFP::get(SrcTy, 0.0));
>>      CV.push_back(ConstantFP::get(SrcTy, 0.0));
>>    }
>> -  CS = ConstantStruct::get(CV);
>> -  CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
>> -  Tys = DAG.getVTList(VT, MVT::Other);
>> -  Ops.clear();
>> -  Ops.push_back(DAG.getEntryNode());
>> -  Ops.push_back(CPIdx);
>> -  Ops.push_back(DAG.getSrcValue(NULL));
>> -  SDOperand Mask2 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0],
>> Ops.size());
>> +  C = ConstantVector::get(CV);
>> +  CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
>> +  SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
>> NULL, 0,
>> +                                false, 16);
>>    SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
>>
>>    // Or the value with the sign bit.
>> @@ -4357,8 +4340,6 @@
>>    case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
>>    case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
>>    case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
>> -  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
>> -  case X86ISD::LOAD_UA:            return "X86ISD::LOAD_UA";
>>    case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
>>    case X86ISD::Wrapper:            return "X86ISD::Wrapper";
>>    case X86ISD::S2VEC:              return "X86ISD::S2VEC";
>> @@ -4756,19 +4737,14 @@
>>    }
>>
>>    bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI,
>> Subtarget);
>> +  LoadSDNode *LD = cast<LoadSDNode>(Base);
>>    if (isAlign16) {
>> -    LoadSDNode *LD = cast<LoadSDNode>(Base);
>>      return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD-
>>> getSrcValue(),
>> -                       LD->getSrcValueOffset());
>> +                       LD->getSrcValueOffset(), LD->isVolatile());
>>    } else {
>> -    // Just use movups, it's shorter.
>> -    SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
>> -    SmallVector<SDOperand, 3> Ops;
>> -    Ops.push_back(Base->getOperand(0));
>> -    Ops.push_back(Base->getOperand(1));
>> -    Ops.push_back(Base->getOperand(2));
>> -    return DAG.getNode(ISD::BIT_CONVERT, VT,
>> -                       DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0],
>> Ops.size()));
>> +    return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD-
>>> getSrcValue(),
>> +                       LD->getSrcValueOffset(), LD->isVolatile(),
>> +                       LD->getAlignment());
>>    }
>>  }
>>
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/
>> X86ISelLowering.h?rev=40504&r1=40503&r2=40504&view=diff
>>
>> ===================================================================== 
>> =
>> ========
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Jul 25 19:31:09
>> 2007
>> @@ -143,14 +143,6 @@
>>        /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
>>        REP_MOVS,
>>
>> -      /// LOAD_PACK Load a 128-bit packed float / double value. It
>> has the same
>> -      /// operands as a normal load.
>> -      LOAD_PACK,
>> -
>> -      /// LOAD_UA Load an unaligned 128-bit value. It has the same
>> operands as
>> -      /// a normal load.
>> -      LOAD_UA,
>> -
>>        /// GlobalBaseReg - On Darwin, this node represents the
>> result of the popl
>>        /// at function entry, used for PIC code.
>>        GlobalBaseReg,
>>
>> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/
>> X86InstrSSE.td?rev=40504&r1=40503&r2=40504&view=diff
>>
>> ===================================================================== 
>> =
>> ========
>> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Jul 25 19:31:09 2007
>> @@ -21,8 +21,6 @@
>>  def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
>>                                              SDTCisFP<0>,
>> SDTCisInt<2> ]>;
>>
>> -def X86loadp   : SDNode<"X86ISD::LOAD_PACK", SDTLoad,
>> [SDNPHasChain]>;
>> -def X86loadu   : SDNode<"X86ISD::LOAD_UA",   SDTLoad,
>> [SDNPHasChain]>;
>>  def X86fmin    : SDNode<"X86ISD::FMIN",      SDTFPBinOp>;
>>  def X86fmax    : SDNode<"X86ISD::FMAX",      SDTFPBinOp>;
>>  def X86fand    : SDNode<"X86ISD::FAND",      SDTFPBinOp,
>> @@ -82,9 +80,6 @@
>>  // SSE pattern fragments
>>  //
>> ===------------------------------------------------------------------ 
>> -
>> ---===//
>>
>> -def X86loadpf32  : PatFrag<(ops node:$ptr), (f32   (X86loadp node:
>> $ptr))>;
>> -def X86loadpf64  : PatFrag<(ops node:$ptr), (f64   (X86loadp node:
>> $ptr))>;
>> -
>>  def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (load node:
>> $ptr))>;
>>  def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (load node:
>> $ptr))>;
>>  def loadv4i32    : PatFrag<(ops node:$ptr), (v4i32 (load node:
>> $ptr))>;
>> @@ -109,6 +104,8 @@
>>    return false;
>>  }]>;
>>
>> +def alignedloadf32   : PatFrag<(ops node:$ptr), (f32
>> (alignedload node:$ptr))>;
>> +def alignedloadf64   : PatFrag<(ops node:$ptr), (f64
>> (alignedload node:$ptr))>;
>>  def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32
>> (alignedload node:$ptr))>;
>>  def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64
>> (alignedload node:$ptr))>;
>>  def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32
>> (alignedload node:$ptr))>;
>> @@ -411,7 +408,7 @@
>>  // disregarded.
>>  def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins
>> f128mem:$src),
>>                       "movaps {$src, $dst|$dst, $src}",
>> -                     [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
>> +                     [(set FR32:$dst, (alignedloadf32 addr:$src))]>;
>>
>>  // Alias bitwise logical operations using SSE logical ops on
>> packed FP values.
>>  let isTwoAddress = 1 in {
>> @@ -430,15 +427,15 @@
>>  def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), (ins FR32:
>> $src1, f128mem:$src2),
>>                      "andps {$src2, $dst|$dst, $src2}",
>>                      [(set FR32:$dst, (X86fand FR32:$src1,
>> -                                      (X86loadpf32 addr:$src2)))]>;
>> +                                      (alignedloadf32 addr:
>> $src2)))]>;
>>  def FsORPSrm  : PSI<0x56, MRMSrcMem, (outs FR32:$dst), (ins FR32:
>> $src1, f128mem:$src2),
>>                      "orps {$src2, $dst|$dst, $src2}",
>>                      [(set FR32:$dst, (X86for FR32:$src1,
>> -                                      (X86loadpf32 addr:$src2)))]>;
>> +                                      (alignedloadf32 addr:
>> $src2)))]>;
>>  def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), (ins FR32:
>> $src1, f128mem:$src2),
>>                      "xorps {$src2, $dst|$dst, $src2}",
>>                      [(set FR32:$dst, (X86fxor FR32:$src1,
>> -                                      (X86loadpf32 addr:$src2)))]>;
>> +                                      (alignedloadf32 addr:
>> $src2)))]>;
>>
>>  def FsANDNPSrr : PSI<0x55, MRMSrcReg,
>>                       (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
>> @@ -1084,7 +1081,7 @@
>>  // disregarded.
>>  def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins
>> f128mem:$src),
>>                       "movapd {$src, $dst|$dst, $src}",
>> -                     [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
>> +                     [(set FR64:$dst, (alignedloadf64 addr:$src))]>;
>>
>>  // Alias bitwise logical operations using SSE logical ops on
>> packed FP values.
>>  let isTwoAddress = 1 in {
>> @@ -1103,15 +1100,15 @@
>>  def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), (ins FR64:
>> $src1, f128mem:$src2),
>>                      "andpd {$src2, $dst|$dst, $src2}",
>>                      [(set FR64:$dst, (X86fand FR64:$src1,
>> -                                      (X86loadpf64 addr:$src2)))]>;
>> +                                      (alignedloadf64 addr:
>> $src2)))]>;
>>  def FsORPDrm  : PDI<0x56, MRMSrcMem, (outs FR64:$dst), (ins FR64:
>> $src1, f128mem:$src2),
>>                      "orpd {$src2, $dst|$dst, $src2}",
>>                      [(set FR64:$dst, (X86for FR64:$src1,
>> -                                      (X86loadpf64 addr:$src2)))]>;
>> +                                      (alignedloadf64 addr:
>> $src2)))]>;
>>  def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), (ins FR64:
>> $src1, f128mem:$src2),
>>                      "xorpd {$src2, $dst|$dst, $src2}",
>>                      [(set FR64:$dst, (X86fxor FR64:$src1,
>> -                                      (X86loadpf64 addr:$src2)))]>;
>> +                                      (alignedloadf64 addr:
>> $src2)))]>;
>>
>>  def FsANDNPDrr : PDI<0x55, MRMSrcReg,
>>                       (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
>> @@ -2631,11 +2628,11 @@
>>                    (load addr:$src2))),
>>            (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
>>
>> -// Unaligned load
>> -def : Pat<(v4f32 (X86loadu addr:$src)), (MOVUPSrm addr:$src)>,
>> -      Requires<[HasSSE1]>;
>> -
>>  // Use movaps / movups for SSE integer load / store (one byte
>> shorter).
>> +def : Pat<(alignedloadv4i32 addr:$src),
>> +          (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
>> +def : Pat<(loadv4i32 addr:$src),
>> +          (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
>>  def : Pat<(alignedloadv2i64 addr:$src),
>>            (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
>>  def : Pat<(loadv2i64 addr:$src),
>>
>> Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/vec_shuffle.ll?rev=40504&r1=40503&r2=40504&view=diff
>>
>> ===================================================================== 
>> =
>> ========
>> --- llvm/trunk/test/CodeGen/X86/vec_shuffle.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/vec_shuffle.ll Wed Jul 25 19:31:09
>> 2007
>> @@ -1,6 +1,6 @@
>>  ; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 -
>> o %t -f
>>  ; RUN: grep shufp   %t | wc -l | grep 1
>> -; RUN: grep movups  %t | wc -l | grep 1
>> +; RUN: grep movupd  %t | wc -l | grep 1
>>  ; RUN: grep pshufhw %t | wc -l | grep 1
>>
>>  void %test_v4sf(<4 x float>* %P, float %X, float %Y) {
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits@cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits@cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

_______________________________________________
llvm-commits mailing list
llvm-commits@cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

Re: [llvm-commits] [llvm] r40504 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec_shuffle.ll

Reply via email to