Hi Scott, There are some compilation warnings. Please fix!
SPUISelLowering.cpp: In function 'llvm::SDOperand LowerEXTRACT_VECTOR_ELT(llvm::SDOperand, llvm::SelectionDAG&)': SPUISelLowering.cpp:2103: warning: 'prefslot_begin' may be used uninitialized in this function SPUISelLowering.cpp:2103: warning: 'prefslot_end' may be used uninitialized in this function SPUISelLowering.cpp: In function 'llvm::SDOperand LowerBUILD_VECTOR (llvm::SDOperand, llvm::SelectionDAG&)': SPUISelLowering.cpp:1685: warning: 'val' may be used uninitialized in this function SPUISelLowering.cpp: In function 'llvm::SDOperand LowerLOAD (llvm::SDOperand, llvm::SelectionDAG&, const llvm::SPUSubtarget*)': SPUISelLowering.cpp:559: warning: 'NewOpC' may be used uninitialized in this function Thanks, Evan On Dec 17, 2007, at 2:32 PM, Scott Michel wrote: > Author: pingbak > Date: Mon Dec 17 16:32:34 2007 > New Revision: 45130 > > URL: http://llvm.org/viewvc/llvm-project?rev=45130&view=rev > Log: > - Restore some i8 functionality in CellSPU > - New test case: nand.ll > > Added: > llvm/trunk/test/CodeGen/CellSPU/nand.ll > Modified: > llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp > llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp > llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp > llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td > llvm/trunk/lib/Target/CellSPU/SPUOperands.td > llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp > llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.td > llvm/trunk/test/CodeGen/CellSPU/and_ops.ll > > Modified: llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ > CellSPU/SPUISelDAGToDAG.cpp?rev=45130&r1=45129&r2=45130&view=diff > > ====================================================================== > ======== > --- llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp (original) > +++ llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp Mon Dec 17 > 16:32:34 2007 > @@ -63,6 +63,13 @@ > } > #endif > > + //! ConstantSDNode predicate for i32 unsigned 10-bit immediate > values > + bool > + isI32IntU10Immediate(ConstantSDNode *CN) > + { > + return isU10Constant((int) CN->getValue()); > + } > + > //! ConstantSDNode predicate for i16 sign-extended, 10-bit > immediate values > bool > isI16IntS10Immediate(ConstantSDNode *CN) > > Modified: llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ > CellSPU/SPUISelLowering.cpp?rev=45130&r1=45129&r2=45130&view=diff > > ====================================================================== > ======== > --- llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp (original) > +++ llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp Mon Dec 17 > 16:32:34 2007 > @@ -119,11 +119,13 @@ > // Set up the SPU's register classes: > // NOTE: i8 register class is not registered because we cannot > determine when > // we need to zero or sign extend for custom-lowered loads and > stores. > - addRegisterClass(MVT::i16, SPU::R16CRegisterClass); > - addRegisterClass(MVT::i32, SPU::R32CRegisterClass); > - addRegisterClass(MVT::i64, SPU::R64CRegisterClass); > - addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); > - addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); > + // NOTE: Ignore the previous note. For now. :-) > + addRegisterClass(MVT::i8, SPU::R8CRegisterClass); > + addRegisterClass(MVT::i16, SPU::R16CRegisterClass); > + addRegisterClass(MVT::i32, SPU::R32CRegisterClass); > + addRegisterClass(MVT::i64, SPU::R64CRegisterClass); > + addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); > + addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); > addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); > > // SPU has no sign or zero extended loads for i1, i8, i16: > @@ -925,7 +927,7 @@ > } > case MVT::i8: > if (!isVarArg && ArgRegIdx < NumArgRegs) { > - unsigned VReg = RegMap->createVirtualRegister > (&SPU::R16CRegClass); > + unsigned VReg = RegMap->createVirtualRegister > (&SPU::R8CRegClass); > MF.addLiveIn(ArgRegs[ArgRegIdx], VReg); > ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8); > ++ArgRegIdx; > > Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ > CellSPU/SPUInstrInfo.cpp?rev=45130&r1=45129&r2=45130&view=diff > > ====================================================================== > ======== > --- llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp (original) > +++ llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp Mon Dec 17 > 16:32:34 2007 > @@ -54,11 +54,11 @@ > case SPU::ORIr64: > case SPU::ORHIv8i16: > case SPU::ORHIr16: > - // case SPU::ORHI1To2: > + case SPU::ORHI1To2: > case SPU::ORBIv16i8: > - //case SPU::ORBIr8: > + case SPU::ORBIr8: > case SPU::ORI2To4: > - // case SPU::ORI1To4: > + case SPU::ORI1To4: > case SPU::AHIvec: > case SPU::AHIr16: > case SPU::AIvec: > > Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ > CellSPU/SPUInstrInfo.td?rev=45130&r1=45129&r2=45130&view=diff > > ====================================================================== > ======== > --- llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td (original) > +++ llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td Mon Dec 17 > 16:32:34 2007 > @@ -111,6 +111,11 @@ > "lqd\t$rT, $src", LoadStore, > [(set R16C:$rT, (load dform_addr:$src))]>; > > + def LQDr8: > + RI10Form<0b00101100, (outs R8C:$rT), (ins memri10:$src), > + "lqd\t$rT, $src", LoadStore, > + [(set R8C:$rT, (load dform_addr:$src))]>; > + > def LQAv16i8: > RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), > "lqa\t$rT, $src", LoadStore, > @@ -171,6 +176,11 @@ > "lqa\t$rT, $src", LoadStore, > [(set R16C:$rT, (load aform_addr:$src))]>; > > + def LQAr8: > + RI16Form<0b100001100, (outs R8C:$rT), (ins addr256k:$src), > + "lqa\t$rT, $src", LoadStore, > + [(set R8C:$rT, (load aform_addr:$src))]>; > + > def LQXv16i8: > RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), > "lqx\t$rT, $src", LoadStore, > @@ -231,14 +241,17 @@ > "lqx\t$rT, $src", LoadStore, > [(set R16C:$rT, (load xform_addr:$src))]>; > > + def LQXr8: > + RRForm<0b00100011100, (outs R8C:$rT), (ins memrr:$src), > + "lqx\t$rT, $src", LoadStore, > + [(set R8C:$rT, (load xform_addr:$src))]>; > + > /* Load quadword, PC relative: Not much use at this point in time. > Might be of use later for relocatable code. > def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm: > $disp), > "lqr\t$rT, $disp", LoadStore, > [(set VECREG:$rT, (load iaddr:$disp))]>; > */ > - > - // Catch-all for unaligned loads: > } > > // > ===------------------------------------------------------------------- > ---===// > @@ -295,6 +308,10 @@ > "stqd\t$rT, $src", LoadStore, > [(store R16C:$rT, dform_addr:$src)]>; > > + def STQDr8 : RI10Form<0b00100100, (outs), (ins R8C:$rT, memri10: > $src), > + "stqd\t$rT, $src", LoadStore, > + [(store R8C:$rT, dform_addr:$src)]>; > + > def STQAv16i8 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, > addr256k:$src), > "stqa\t$rT, $src", LoadStore, > [(store (v16i8 VECREG:$rT), aform_addr:$src)]>; > @@ -340,6 +357,14 @@ > "stqa\t$rT, $src", LoadStore, > [(store R64FP:$rT, aform_addr:$src)]>; > > + def STQAr16 : RI10Form<0b00100100, (outs), (ins R16C:$rT, > addr256k:$src), > + "stqa\t$rT, $src", LoadStore, > + [(store R16C:$rT, aform_addr:$src)]>; > + > + def STQAr8 : RI10Form<0b00100100, (outs), (ins R8C:$rT, addr256k: > $src), > + "stqa\t$rT, $src", LoadStore, > + [(store R8C:$rT, aform_addr:$src)]>; > + > def STQXv16i8 : RI10Form<0b00100100, (outs), (ins VECREG:$rT, > memrr:$src), > "stqx\t$rT, $src", LoadStore, > [(store (v16i8 VECREG:$rT), xform_addr:$src)]>; > @@ -368,26 +393,36 @@ > "stqx\t$rT, $src", LoadStore, > [(store GPRC:$rT, xform_addr:$src)]>; > > - def STQXr64 : RI10Form<0b00100100, (outs), (ins R64C:$rT, memrr: > $src), > + def STQXr64: > + RI10Form<0b00100100, (outs), (ins R64C:$rT, memrr:$src), > "stqx\t$rT, $src", LoadStore, > [(store R64C:$rT, xform_addr:$src)]>; > > - def STQXr32 : RI10Form<0b00100100, (outs), (ins R32C:$rT, memrr: > $src), > + def STQXr32: > + RI10Form<0b00100100, (outs), (ins R32C:$rT, memrr:$src), > "stqx\t$rT, $src", LoadStore, > [(store R32C:$rT, xform_addr:$src)]>; > > // Floating Point > - def STQXf32 : RI10Form<0b00100100, (outs), (ins R32FP:$rT, memrr: > $src), > + def STQXf32: > + RI10Form<0b00100100, (outs), (ins R32FP:$rT, memrr:$src), > "stqx\t$rT, $src", LoadStore, > [(store R32FP:$rT, xform_addr:$src)]>; > > - def STQXf64 : RI10Form<0b00100100, (outs), (ins R64FP:$rT, memrr: > $src), > + def STQXf64: > + RI10Form<0b00100100, (outs), (ins R64FP:$rT, memrr:$src), > "stqx\t$rT, $src", LoadStore, > [(store R64FP:$rT, xform_addr:$src)]>; > > - def STQXr16 : RI10Form<0b00100100, (outs), (ins R16C:$rT, memrr: > $src), > + def STQXr16: > + RI10Form<0b00100100, (outs), (ins R16C:$rT, memrr:$src), > "stqx\t$rT, $src", LoadStore, > [(store R16C:$rT, xform_addr:$src)]>; > + > + def STQXr8: > + RI10Form<0b00100100, (outs), (ins R8C:$rT, memrr:$src), > + "stqx\t$rT, $src", LoadStore, > + [(store R8C:$rT, xform_addr:$src)]>; > > /* Store quadword, PC relative: Not much use at this point in > time. Might > be useful for relocatable code. > @@ -448,6 +483,13 @@ > "ilh\t$rT, $val", ImmLoad, > [(set R16C:$rT, immSExt16:$val)]>; > > +// Cell SPU doesn't have a native 8-bit immediate load, but ILH > works ("with > +// the right constant") > +def ILHr8: > + RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val), > + "ilh\t$rT, $val", ImmLoad, > + [(set R8C:$rT, immSExt8:$val)]>; > + > // IL does sign extension! > def ILr64: > RI16Form<0b100000010, (outs R64C:$rT), (ins s16imm_i64:$val), > @@ -626,25 +668,32 @@ > "a\t$rT, $rA, $rB", IntegerOp, > [(set R32C:$rT, (add R32C:$rA, R32C:$rB))]>; > > +def Ar8: > + RRForm<0b00000011000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), > + "a\t$rT, $rA, $rB", IntegerOp, > + [(set R8C:$rT, (add R8C:$rA, R8C:$rB))]>; > + > def AIvec: > RI10Form<0b00111000, (outs VECREG:$rT), (ins VECREG:$rA, > s10imm:$val), > "ai\t$rT, $rA, $val", IntegerOp, > [(set (v4i32 VECREG:$rT), (add (v4i32 VECREG:$rA), > v4i32SExt10Imm:$val))]>; > > -def AIr32 : RI10Form<0b00111000, (outs R32C:$rT), > - (ins R32C:$rA, s10imm_i32:$val), > - "ai\t$rT, $rA, $val", IntegerOp, > - [(set R32C:$rT, (add R32C:$rA, i32ImmSExt10:$val))]>; > - > -def SFHvec : RRForm<0b00010010000, (outs VECREG:$rT), > - (ins VECREG:$rA, VECREG:$rB), > - "sfh\t$rT, $rA, $rB", IntegerOp, > - [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA), (v8i16 VECREG: > $rB)))]>; > - > -def SFHr16 : RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, > R16C:$rB), > - "sfh\t$rT, $rA, $rB", IntegerOp, > - [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>; > +def AIr32: > + RI10Form<0b00111000, (outs R32C:$rT), (ins R32C:$rA, > s10imm_i32:$val), > + "ai\t$rT, $rA, $val", IntegerOp, > + [(set R32C:$rT, (add R32C:$rA, i32ImmSExt10:$val))]>; > + > +def SFHvec: > + RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, > VECREG:$rB), > + "sfh\t$rT, $rA, $rB", IntegerOp, > + [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA), > + (v8i16 VECREG:$rB)))]>; > + > +def SFHr16: > + RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), > + "sfh\t$rT, $rA, $rB", IntegerOp, > + [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>; > > def SFHIvec: > RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, > s10imm:$val), > @@ -977,6 +1026,11 @@ > "xsbh\t$rDst, $rSrc", IntegerOp, > [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>; > > +def XSBHr8: > + RRForm_1<0b01101101010, (outs R16C:$rDst), (ins R8C:$rSrc), > + "xsbh\t$rDst, $rSrc", IntegerOp, > + [(set R16C:$rDst, (sext R8C:$rSrc))]>; > + > // 32-bit form for XSBH: used to sign extend 8-bit quantities to > 16-bit > // quantities to 32-bit quantities via a 32-bit register (see the > sext 8->32 > // pattern below). Intentionally doesn't match a pattern because > we want the > @@ -1070,6 +1124,11 @@ > "and\t$rT, $rA, $rB", IntegerOp, > [(set R16C:$rT, (and R16C:$rA, R16C:$rB))]>; > > +def ANDr8: > + RRForm<0b10000011000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), > + "and\t$rT, $rA, $rB", IntegerOp, > + [(set R8C:$rT, (and R8C:$rA, R8C:$rB))]>; > + > // Hacked form of AND to zero-extend 16-bit quantities to 32-bit > // quantities -- see 16->32 zext pattern. > // > @@ -1112,12 +1171,22 @@ > "andc\t$rT, $rA, $rB", IntegerOp, > [(set R16C:$rT, (and R16C:$rA, (not R16C:$rB)))]>; > > +def ANDCr8: > + RRForm<0b10000011010, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), > + "andc\t$rT, $rA, $rB", IntegerOp, > + [(set R8C:$rT, (and R8C:$rA, (not R8C:$rB)))]>; > + > def ANDBIv16i8: > RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, > u10imm:$val), > "andbi\t$rT, $rA, $val", IntegerOp, > [(set (v16i8 VECREG:$rT), > (and (v16i8 VECREG:$rA), (v16i8 v16i8U8Imm:$val)))]>; > > +def ANDBIr8: > + RI10Form<0b01101000, (outs R8C:$rT), (ins R8C:$rA, u10imm_i8: > $val), > + "andbi\t$rT, $rA, $val", IntegerOp, > + [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>; > + > def ANDHIv8i16: > RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, > s10imm:$val), > "andhi\t$rT, $rA, $val", IntegerOp, > @@ -1127,7 +1196,12 @@ > def ANDHIr16: > RI10Form<0b10101000, (outs R16C:$rT), (ins R16C:$rA, s10imm: > $val), > "andhi\t$rT, $rA, $val", IntegerOp, > - [(set R16C:$rT, (and R16C:$rA, i16ImmU10:$val))]>; > + [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>; > + > +def ANDHI1To2: > + RI10Form<0b10101000, (outs R16C:$rT), (ins R8C:$rA, s10imm:$val), > + "andhi\t$rT, $rA, $val", IntegerOp, > + [(set R16C:$rT, (and (zext R8C:$rA), i16ImmSExt10:$val))]>; > > def ANDIv4i32: > RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, > s10imm:$val), > @@ -1140,6 +1214,13 @@ > "andi\t$rT, $rA, $val", IntegerOp, > [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>; > > +// Hacked form of ANDI to zero-extend i8 quantities to i32. See > the zext 8->32 > +// pattern below. > +def ANDI1To4: > + RI10Form<0b10101000, (outs R32C:$rT), (ins R8C:$rA, s10imm_i32: > $val), > + "andi\t$rT, $rA, $val", IntegerOp, > + [(set R32C:$rT, (and (zext R8C:$rA), i32ImmSExt10:$val))]>; > + > // Hacked form of ANDI to zero-extend i16 quantities to i32. See the > // zext 16->32 pattern below. > // > @@ -1199,7 +1280,20 @@ > "or\t$rT, $rA, $rB", IntegerOp, > [(set R16C:$rT, (or R16C:$rA, R16C:$rB))]>; > > +def ORr8: > + RRForm<0b10000010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), > + "or\t$rT, $rA, $rB", IntegerOp, > + [(set R8C:$rT, (or R8C:$rA, R8C:$rB))]>; > + > // ORv*_*: Used in scalar->vector promotions: > +def ORv16i8_i8: > + RRForm<0b10000010000, (outs VECREG:$rT), (ins R8C:$rA, R8C:$rB), > + "or\t$rT, $rA, $rB", IntegerOp, > + [/* no pattern */]>; > + > +def : Pat<(v16i8 (SPUpromote_scalar R8C:$rA)), > + (ORv16i8_i8 R8C:$rA, R8C:$rA)>; > + > def ORv8i16_i16: > RRForm<0b10000010000, (outs VECREG:$rT), (ins R16C:$rA, R16C: > $rB), > "or\t$rT, $rA, $rB", IntegerOp, > @@ -1241,6 +1335,14 @@ > (ORv2f64_f64 R64FP:$rA, R64FP:$rA)>; > > // ORi*_v*: Used to extract vector element 0 (the preferred slot) > +def ORi8_v16i8: > + RRForm<0b10000010000, (outs R8C:$rT), (ins VECREG:$rA, VECREG: > $rB), > + "or\t$rT, $rA, $rB", IntegerOp, > + [/* no pattern */]>; > + > +def : Pat<(SPUextract_elt0 (v16i8 VECREG:$rA)), > + (ORi8_v16i8 VECREG:$rA, VECREG:$rA)>; > + > def ORi16_v8i16: > RRForm<0b10000010000, (outs R16C:$rT), (ins VECREG:$rA, VECREG: > $rB), > "or\t$rT, $rA, $rB", IntegerOp, > @@ -1325,6 +1427,11 @@ > "orc\t$rT, $rA, $rB", IntegerOp, > [(set R16C:$rT, (or R16C:$rA, (not R16C:$rB)))]>; > > +def ORCr8: > + RRForm<0b10010010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), > + "orc\t$rT, $rA, $rB", IntegerOp, > + [(set R8C:$rT, (or R8C:$rA, (not R8C:$rB)))]>; > + > // OR byte immediate > def ORBIv16i8: > RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, > u10imm:$val), > @@ -1332,29 +1439,40 @@ > [(set (v16i8 VECREG:$rT), > (or (v16i8 VECREG:$rA), (v16i8 v16i8U8Imm:$val)))]>; > > +def ORBIr8: > + RI10Form<0b01100000, (outs R8C:$rT), (ins R8C:$rA, u10imm_i8: > $val), > + "orbi\t$rT, $rA, $val", IntegerOp, > + [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>; > + > // OR halfword immediate > def ORHIv8i16: > - RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, > s10imm:$val), > + RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, > u10imm:$val), > "orhi\t$rT, $rA, $val", IntegerOp, > [(set (v8i16 VECREG:$rT), (or (v8i16 VECREG:$rA), > - v8i16SExt10Imm:$val))]>; > + v8i16Uns10Imm:$val))]>; > > def ORHIr16: > - RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm: > $val), > + RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, u10imm: > $val), > "orhi\t$rT, $rA, $val", IntegerOp, > - [(set R16C:$rT, (or R16C:$rA, i16ImmSExt10:$val))]>; > + [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>; > + > +// Hacked form of ORHI used to promote 8-bit registers to 16-bit > +def ORHI1To2: > + RI10Form<0b10100000, (outs R16C:$rT), (ins R8C:$rA, s10imm:$val), > + "orhi\t$rT, $rA, $val", IntegerOp, > + [(set R16C:$rT, (or (anyext R8C:$rA), i16ImmSExt10:$val))]>; > > // Bitwise "or" with immediate > def ORIv4i32: > - RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, > s10imm:$val), > + RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, > u10imm:$val), > "ori\t$rT, $rA, $val", IntegerOp, > [(set (v4i32 VECREG:$rT), (or (v4i32 VECREG:$rA), > - v4i32SExt10Imm:$val))]>; > + v4i32Uns10Imm:$val))]>; > > def ORIr32: > - RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, > s10imm_i32:$val), > + RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, > u10imm_i32:$val), > "ori\t$rT, $rA, $val", IntegerOp, > - [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>; > + [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>; > > // Hacked forms of or immediate to copy one 32- and 64-bit FP > register > // to another. Do not match patterns. > @@ -1381,15 +1499,24 @@ > "ori\t$rT, $rA, $val", IntegerOp, > [(set R32C:$rT, (or (anyext R16C:$rA), i32ImmSExt10:$val))]>; > > +// ORI1To4: Hacked version of the ORI instruction to extend 16-bit > quantities > +// to 32-bit quantities. Used exclusively to match "anyext" > conversions (vide > +// infra "anyext 16->32" pattern.) > +def ORI1To4: > + RI10Form<0b00100000, (outs R32C:$rT), (ins R8C:$rA, s10imm_i32: > $val), > + "ori\t$rT, $rA, $val", IntegerOp, > + [(set R32C:$rT, (or (anyext R8C:$rA), i32ImmSExt10:$val))]>; > + > // ORX: "or" across the vector: or's $rA's word slots leaving the > result in > // $rT[0], slots 1-3 are zeroed. > // > -// Needs to match an intrinsic pattern. > +// FIXME: Needs to match an intrinsic pattern. > def ORXv4i32: > RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, > VECREG:$rB), > "orx\t$rT, $rA, $rB", IntegerOp, > []>; > > +// XOR: > def XORv16i8: > RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, > VECREG:$rB), > "xor\t$rT, $rA, $rB", IntegerOp, > @@ -1441,11 +1568,21 @@ > "xor\t$rT, $rA, $rB", IntegerOp, > [(set R16C:$rT, (xor R16C:$rA, R16C:$rB))]>; > > +def XORr8: > + RRForm<0b10010010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), > + "xor\t$rT, $rA, $rB", IntegerOp, > + [(set R8C:$rT, (xor R8C:$rA, R8C:$rB))]>; > + > def XORBIv16i8: > RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, > u10imm:$val), > "xorbi\t$rT, $rA, $val", IntegerOp, > [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), > v16i8U8Imm:$val))]>; > > +def XORBIr8: > + RI10Form<0b01100000, (outs R8C:$rT), (ins R8C:$rA, u10imm_i8: > $val), > + "xorbi\t$rT, $rA, $val", IntegerOp, > + [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>; > + > def XORHIv8i16: > RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, > s10imm:$val), > "xorhi\t$rT, $rA, $val", IntegerOp, > @@ -1497,6 +1634,11 @@ > "nand\t$rT, $rA, $rB", IntegerOp, > [(set R16C:$rT, (not (and R16C:$rA, R16C:$rB)))]>; > > +def NANDr8: > + RRForm<0b10010010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), > + "nand\t$rT, $rA, $rB", IntegerOp, > + [(set R8C:$rT, (not (and R8C:$rA, R8C:$rB)))]>; > + > // NOR: > def NORv16i8: > RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, > VECREG:$rB), > @@ -1526,6 +1668,11 @@ > "nor\t$rT, $rA, $rB", IntegerOp, > [(set R16C:$rT, (not (or R16C:$rA, R16C:$rB)))]>; > > +def NORr8: > + RRForm<0b10010010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), > + "nor\t$rT, $rA, $rB", IntegerOp, > + [(set R8C:$rT, (not (or R8C:$rA, R8C:$rB)))]>; > + > // EQV: Equivalence (1 for each same bit, otherwise 0) > def EQVv16i8: > RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, > VECREG:$rB), > @@ -1593,6 +1740,18 @@ > def : Pat<(xor (not R16C:$rA), R16C:$rB), > (EQVr16 R16C:$rA, R16C:$rB)>; > > +def EQVr8: > + RRForm<0b10010010000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), > + "eqv\t$rT, $rA, $rB", IntegerOp, > + [(set R8C:$rT, (or (and R8C:$rA, R8C:$rB), > + (and (not R8C:$rA), (not R8C:$rB))))]>; > + > +def : Pat<(xor R8C:$rA, (not R8C:$rB)), > + (EQVr8 R8C:$rA, R8C:$rB)>; > + > +def : Pat<(xor (not R8C:$rA), R8C:$rB), > + (EQVr8 R8C:$rA, R8C:$rB)>; > + > // gcc optimizes (p & q) | (~p & ~q) -> ~(p | q) | (p & q), so > match that > // pattern also: > def : Pat<(or (vnot (or (v16i8 VECREG:$rA), (v16i8 VECREG:$rB))), > @@ -1613,6 +1772,9 @@ > def : Pat<(or (not (or R16C:$rA, R16C:$rB)), (and R16C:$rA, R16C: > $rB)), > (EQVr16 R16C:$rA, R16C:$rB)>; > > +def : Pat<(or (not (or R8C:$rA, R8C:$rB)), (and R8C:$rA, R8C:$rB)), > + (EQVr8 R8C:$rA, R8C:$rB)>; > + > // Select bits: > def SELBv16i8: > RRRForm<0b1000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG: > $rB, VECREG:$rC), > @@ -1901,6 +2063,43 @@ > def : Pat<(or (and (not R16C:$rC), R16C:$rA), > (and R16C:$rC, R16C:$rB)), > (SELBr16 R16C:$rA, R16C:$rB, R16C:$rC)>; > + > +def SELBr8: > + RRRForm<0b1000, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB, R8C:$rC), > + "selb\t$rT, $rA, $rB, $rC", IntegerOp, > + []>; > + > +def : Pat<(or (and R8C:$rA, R8C:$rC), > + (and R8C:$rB, (not R8C:$rC))), > + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; > + > +def : Pat<(or (and R8C:$rC, R8C:$rA), > + (and R8C:$rB, (not R8C:$rC))), > + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; > + > +def : Pat<(or (and R8C:$rA, R8C:$rC), > + (and (not R8C:$rC), R8C:$rB)), > + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; > + > +def : Pat<(or (and R8C:$rC, R8C:$rA), > + (and (not R8C:$rC), R8C:$rB)), > + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; > + > +def : Pat<(or (and R8C:$rA, (not R8C:$rC)), > + (and R8C:$rB, R8C:$rC)), > + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; > + > +def : Pat<(or (and R8C:$rA, (not R8C:$rC)), > + (and R8C:$rC, R8C:$rB)), > + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; > + > +def : Pat<(or (and (not R8C:$rC), R8C:$rA), > + (and R8C:$rB, R8C:$rC)), > + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; > + > +def : Pat<(or (and (not R8C:$rC), R8C:$rA), > + (and R8C:$rC, R8C:$rB)), > + (SELBr8 R8C:$rA, R8C:$rB, R8C:$rC)>; > > // > ===------------------------------------------------------------------- > ---===// > // Vector shuffle... > @@ -1958,10 +2157,13 @@ > [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>; > > def SHLHIv8i16: > - RI7Form<0b11111010000, (outs VECREG:$rT), (ins VECREG:$rA, > u7imm:$val), > + RI7Form<0b11111010000, (outs VECREG:$rT), (ins VECREG:$rA, > u7imm_i8:$val), > "shlhi\t$rT, $rA, $val", RotateShift, > [(set (v8i16 VECREG:$rT), > - (SPUvec_shl_v8i16 (v8i16 VECREG:$rA), (i16 uimm7: > $val)))]>; > + (SPUvec_shl_v8i16 (v8i16 VECREG:$rA), (i8 uimm7: > $val)))]>; > + > +def : Pat<(SPUvec_shl_v8i16 (v8i16 VECREG:$rA), (i16 uimm7:$val)), > + (SHLHIv8i16 VECREG:$rA, imm:$val)>; > > def : Pat<(SPUvec_shl_v8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val)), > (SHLHIv8i16 VECREG:$rA, imm:$val)>; > @@ -1970,6 +2172,9 @@ > RI7Form<0b11111010000, (outs R16C:$rT), (ins R16C:$rA, > u7imm_i32:$val), > "shlhi\t$rT, $rA, $val", RotateShift, > [(set R16C:$rT, (shl R16C:$rA, (i32 uimm7:$val)))]>; > + > +def : Pat<(shl R16C:$rA, (i8 uimm7:$val)), > + (SHLHIr16 R16C:$rA, uimm7:$val)>; > > def : Pat<(shl R16C:$rA, (i16 uimm7:$val)), > (SHLHIr16 R16C:$rA, uimm7:$val)>; > @@ -1986,10 +2191,13 @@ > [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>; > > def SHLIv4i32: > - RI7Form<0b11111010000, (outs VECREG:$rT), (ins VECREG:$rA, > u7imm:$val), > + RI7Form<0b11111010000, (outs VECREG:$rT), (ins VECREG:$rA, > u7imm_i8:$val), > "shli\t$rT, $rA, $val", RotateShift, > [(set (v4i32 VECREG:$rT), > - (SPUvec_shl_v4i32 (v4i32 VECREG:$rA), (i16 uimm7: > $val)))]>; > + (SPUvec_shl_v4i32 (v4i32 VECREG:$rA), (i8 uimm7: > $val)))]>; > + > +def: Pat<(SPUvec_shl_v4i32 (v4i32 VECREG:$rA), (i16 uimm7:$val)), > + (SHLIv4i32 VECREG:$rA, uimm7:$val)>; > > def: Pat<(SPUvec_shl_v4i32 (v4i32 VECREG:$rA), (i32 uimm7:$val)), > (SHLIv4i32 VECREG:$rA, uimm7:$val)>; > @@ -2002,6 +2210,9 @@ > def : Pat<(shl R32C:$rA, (i16 uimm7:$val)), > (SHLIr32 R32C:$rA, uimm7:$val)>; > > +def : Pat<(shl R32C:$rA, (i8 uimm7:$val)), > + (SHLIr32 R32C:$rA, uimm7:$val)>; > + > // SHLQBI vec form: Note that this will shift the entire vector > (the 128-bit > // register) to the left. Vector form is here to ensure type > correctness. > def SHLQBIvec: > @@ -2044,11 +2255,27 @@ > "roth\t$rT, $rA, $rB", RotateShift, > [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>; > > +// The rotate amount is in the same bits whether we've got an 8- > bit, 16-bit or > +// 32-bit register > +def ROTHr16_r8: > + RRForm<0b00111010000, (outs R16C:$rT), (ins R16C:$rA, R8C:$rB), > + "roth\t$rT, $rA, $rB", RotateShift, > + [(set R16C:$rT, (rotl R16C:$rA, (i32 (zext R8C:$rB))))]>; > + > +def : Pat<(rotl R16C:$rA, (i32 (sext R8C:$rB))), > + (ROTHr16_r8 R16C:$rA, R8C:$rB)>; > + > +def : Pat<(rotl R16C:$rA, (i32 (zext R8C:$rB))), > + (ROTHr16_r8 R16C:$rA, R8C:$rB)>; > + > +def : Pat<(rotl R16C:$rA, (i32 (anyext R8C:$rB))), > + (ROTHr16_r8 R16C:$rA, R8C:$rB)>; > + > def ROTHIv8i16: > - RI7Form<0b00111110000, (outs VECREG:$rT), (ins VECREG:$rA, > u7imm:$val), > + RI7Form<0b00111110000, (outs VECREG:$rT), (ins VECREG:$rA, > u7imm_i8:$val), > "rothi\t$rT, $rA, $val", RotateShift, > [(set (v8i16 VECREG:$rT), > - (SPUvec_rotl_v8i16 VECREG:$rA, (i16 uimm7:$val)))]>; > + (SPUvec_rotl_v8i16 VECREG:$rA, (i8 uimm7:$val)))]>; > > def : Pat<(SPUvec_rotl_v8i16 VECREG:$rA, (i16 uimm7:$val)), > (ROTHIv8i16 VECREG:$rA, imm:$val)>; > @@ -2066,6 +2293,11 @@ > "rothi\t$rT, $rA, $val", RotateShift, > [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>; > > +def ROTHIr16_i8: > + RI7Form<0b00111110000, (outs R16C:$rT), (ins R16C:$rA, > u7imm_i8:$val), > + "rothi\t$rT, $rA, $val", RotateShift, > + [(set R16C:$rT, (rotl R16C:$rA, (i8 uimm7:$val)))]>; > + > def ROTv4i32: > RRForm<0b00011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C: > $rB), > "rot\t$rT, $rA, $rB", RotateShift, > @@ -2077,6 +2309,30 @@ > "rot\t$rT, $rA, $rB", RotateShift, > [(set R32C:$rT, (rotl R32C:$rA, R32C:$rB))]>; > > +// The rotate amount is in the same bits whether we've got an 8- > bit, 16-bit or > +// 32-bit register > +def ROTr32_r16_anyext: > + RRForm<0b00011010000, (outs R32C:$rT), (ins R32C:$rA, R16C:$rB), > + "rot\t$rT, $rA, $rB", RotateShift, > + [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>; > + > +def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))), > + (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; > + > +def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))), > + (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; > + > +def ROTr32_r8_anyext: > + RRForm<0b00011010000, (outs R32C:$rT), (ins R32C:$rA, R8C:$rB), > + "rot\t$rT, $rA, $rB", RotateShift, > + [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>; > + > +def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))), > + (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; > + > +def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))), > + (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; > + > def ROTIv4i32: > RI7Form<0b00011110000, (outs VECREG:$rT), (ins VECREG:$rA, > u7imm_i32:$val), > "roti\t$rT, $rA, $val", RotateShift, > @@ -2086,6 +2342,9 @@ > def : Pat<(SPUvec_rotl_v4i32 (v4i32 VECREG:$rA), (i16 uimm7:$val)), > (ROTIv4i32 VECREG:$rA, imm:$val)>; > > +def : Pat<(SPUvec_rotl_v4i32 (v4i32 VECREG:$rA), (i8 uimm7:$val)), > + (ROTIv4i32 VECREG:$rA, imm:$val)>; > + > def ROTIr32: > RI7Form<0b00011110000, (outs R32C:$rT), (ins R32C:$rA, > u7imm_i32:$val), > "roti\t$rT, $rA, $val", RotateShift, > @@ -2096,6 +2355,11 @@ > "roti\t$rT, $rA, $val", RotateShift, > [(set R32C:$rT, (rotl R32C:$rA, (i16 uimm7:$val)))]>; > > +def ROTIr32_i8: > + RI7Form<0b00111110000, (outs R32C:$rT), (ins R32C:$rA, > u7imm_i8:$val), > + "roti\t$rT, $rA, $val", RotateShift, > + [(set R32C:$rT, (rotl R32C:$rA, (i8 uimm7:$val)))]>; > + > // ROTQBY* vector forms: This rotates the entire vector, but > vector registers > // are used here for type checking (instances where ROTQBI is used > actually > // use vector registers) > @@ -2155,9 +2419,9 @@ > (ROTHMv8i16 VECREG:$rA, > (SFIr32 (XSHWr16 R16C:$rB), 0))>; > > -def : Pat<(SPUvec_srl_v8i16 (v8i16 VECREG:$rA), /* R8C */ R16C:$rB), > +def : Pat<(SPUvec_srl_v8i16 (v8i16 VECREG:$rA), R8C:$rB), > (ROTHMv8i16 VECREG:$rA, > - (SFIr32 (XSHWr16 /* (XSBHr8 R8C */ R16C: > $rB) /*)*/, 0))>; > + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; > > // ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at > the left > // Note: This instruction doesn't match a pattern because rB must > be negated > @@ -2174,9 +2438,9 @@ > (ROTHMr16 R16C:$rA, > (SFIr32 (XSHWr16 R16C:$rB), 0))>; > > -def : Pat<(srl R16C:$rA, /* R8C */ R16C:$rB), > +def : Pat<(srl R16C:$rA, R8C:$rB), > (ROTHMr16 R16C:$rA, > - (SFIr32 (XSHWr16 /* (XSBHr8 R8C */ R16C:$rB) / > * ) */, 0))>; > + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; > > // ROTHMI v8i16 form: See the comment for ROTHM v8i16. The > difference here is > // that the immediate can be complemented, so that the user > doesn't have to > @@ -2189,6 +2453,9 @@ > > def: Pat<(SPUvec_srl_v8i16 (v8i16 VECREG:$rA), (i16 imm:$val)), > (ROTHMIv8i16 VECREG:$rA, imm:$val)>; > + > +def: Pat<(SPUvec_srl_v8i16 (v8i16 VECREG:$rA), (i8 imm:$val)), > + (ROTHMIv8i16 VECREG:$rA, imm:$val)>; > > def ROTHMIr16: > RI7Form<0b10111110000, (outs R16C:$rT), (ins R16C:$rA, > rothNeg7imm:$val), > @@ -2198,6 +2465,9 @@ > def: Pat<(srl R16C:$rA, (i16 uimm7:$val)), > (ROTHMIr16 R16C:$rA, uimm7:$val)>; > > +def: Pat<(srl R16C:$rA, (i8 uimm7:$val)), > + (ROTHMIr16 R16C:$rA, uimm7:$val)>; > + > // ROTM v4i32 form: See the ROTHM v8i16 comments. > def ROTMv4i32: > RRForm<0b10011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C: > $rB), > @@ -2227,6 +2497,10 @@ > (ROTMr32 R32C:$rA, > (SFIr32 (XSHWr16 R16C:$rB), 0))>; > > +def : Pat<(srl R32C:$rA, R8C:$rB), > + (ROTMr32 R32C:$rA, > + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; > + > // ROTMI v4i32 form: See the comment for ROTHM v8i16. > def ROTMIv4i32: > RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, > rotNeg7imm:$val), > @@ -2236,6 +2510,9 @@ > > def : Pat<(SPUvec_srl_v4i32 VECREG:$rA, (i16 uimm7:$val)), > (ROTMIv4i32 VECREG:$rA, uimm7:$val)>; > + > +def : Pat<(SPUvec_srl_v4i32 VECREG:$rA, (i8 uimm7:$val)), > + (ROTMIv4i32 VECREG:$rA, uimm7:$val)>; > > // ROTMI r32 form: know how to complement the immediate value. > def ROTMIr32: > @@ -2246,6 +2523,9 @@ > def : Pat<(srl R32C:$rA, (i16 imm:$val)), > (ROTMIr32 R32C:$rA, uimm7:$val)>; > > +def : Pat<(srl R32C:$rA, (i8 imm:$val)), > + (ROTMIr32 R32C:$rA, uimm7:$val)>; > + > // ROTQMBYvec: This is a vector form merely so that when used in an > // instruction pattern, type checking will succeed. This > instruction assumes > // that the user knew to complement $rB. > @@ -2291,6 +2571,10 @@ > (ROTMAHv8i16 VECREG:$rA, > (SFIr32 (XSHWr16 R16C:$rB), 0))>; > > +def : Pat<(SPUvec_sra_v8i16 VECREG:$rA, R8C:$rB), > + (ROTMAHv8i16 VECREG:$rA, > + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; > + > def ROTMAHr16: > RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB), > "rotmah\t$rT, $rA, $rB", RotateShift, > @@ -2303,6 +2587,10 @@ > (ROTMAHr16 R16C:$rA, > (SFIr32 (XSHWr16 R16C:$rB), 0))>; > > +def : Pat<(sra R16C:$rA, R8C:$rB), > + (ROTMAHr16 R16C:$rA, > + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; > + > def ROTMAHIv8i16: > RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, > rothNeg7imm:$val), > "rotmahi\t$rT, $rA, $val", RotateShift, > @@ -2312,6 +2600,9 @@ > def : Pat<(SPUvec_sra_v8i16 (v8i16 VECREG:$rA), (i16 uimm7:$val)), > (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>; > > +def : Pat<(SPUvec_sra_v8i16 (v8i16 VECREG:$rA), (i8 uimm7:$val)), > + (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>; > + > def ROTMAHIr16: > RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, > rothNeg7imm_i16:$val), > "rotmahi\t$rT, $rA, $val", RotateShift, > @@ -2320,6 +2611,9 @@ > def : Pat<(sra R16C:$rA, (i32 imm:$val)), > (ROTMAHIr16 R16C:$rA, uimm7:$val)>; > > +def : Pat<(sra R16C:$rA, (i8 imm:$val)), > + (ROTMAHIr16 R16C:$rA, uimm7:$val)>; > + > def ROTMAv4i32: > RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C: > $rB), > "rotma\t$rT, $rA, $rB", RotateShift, > @@ -2332,6 +2626,10 @@ > (ROTMAv4i32 (v4i32 VECREG:$rA), > (SFIr32 (XSHWr16 R16C:$rB), 0))>; > > +def : Pat<(SPUvec_sra_v4i32 VECREG:$rA, R8C:$rB), > + (ROTMAv4i32 (v4i32 VECREG:$rA), > + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; > + > def ROTMAr32: > RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), > "rotma\t$rT, $rA, $rB", RotateShift, > @@ -2344,6 +2642,10 @@ > (ROTMAr32 R32C:$rA, > (SFIr32 (XSHWr16 R16C:$rB), 0))>; > > +def : Pat<(sra R32C:$rA, R8C:$rB), > + (ROTMAr32 R32C:$rA, > + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; > + > def ROTMAIv4i32: > RRForm<0b01011110000, (outs VECREG:$rT), (ins VECREG:$rA, > rotNeg7imm:$val), > "rotmai\t$rT, $rA, $val", RotateShift, > @@ -2361,6 +2663,9 @@ > def : Pat<(sra R32C:$rA, (i16 uimm7:$val)), > (ROTMAIr32 R32C:$rA, uimm7:$val)>; > > +def : Pat<(sra R32C:$rA, (i8 uimm7:$val)), > + (ROTMAIr32 R32C:$rA, uimm7:$val)>; > + > // > ===------------------------------------------------------------------- > ---===// > // Branch and conditionals: > // > ===------------------------------------------------------------------- > ---===// > @@ -2401,12 +2706,21 @@ > } > > // Comparison operators: > +def CEQBr8: > + RRForm<0b00001011110, (outs R8C:$rT), (ins R8C:$rA, R8C:$rB), > + "ceqb\t$rT, $rA, $rB", ByteOp, > + [/* no pattern to match */]>; > > def CEQBv16i8: > RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG: > $rB), > "ceqb\t$rT, $rA, $rB", ByteOp, > [/* no pattern to match: intrinsic */]>; > > +def CEQBIr8: > + RI10Form<0b01111110, (outs R8C:$rT), (ins R8C:$rA, s7imm:$val), > + "ceqbi\t$rT, $rA, $val", ByteOp, > + [/* no pattern to match: intrinsic */]>; > + > def CEQBIv16i8: > RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, s7imm: > $val), > "ceqbi\t$rT, $rA, $val", ByteOp, > @@ -3075,6 +3389,10 @@ > def : Pat<(v4i32 v4i32Imm:$imm), > (IOHLvec (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))), > (LO16_vec v4i32Imm:$imm))>; > + > +// 8-bit constants > +def : Pat<(i8 imm:$imm), > + (ILHr8 imm:$imm)>; > > // > ===------------------------------------------------------------------- > ---===// > // Call instruction patterns: > @@ -3095,14 +3413,34 @@ > def : Pat<(sext_inreg R32C:$rSrc, i8), > (XSHWr32 (XSBHr32 R32C:$rSrc))>; > > +def : Pat<(i32 (sext R8C:$rSrc)), > + (XSHWr16 (XSBHr8 R8C:$rSrc))>; > + > def : Pat<(SPUextract_i8_sext VECREG:$rSrc), > (XSHWr32 (XSBHr32 (ORi32_v4i32 (v4i32 VECREG:$rSrc), > (v4i32 VECREG:$rSrc))))>; > > +// zext 8->16: Zero extend bytes to halfwords > +def : Pat<(i16 (zext R8C:$rSrc)), > + (ANDHI1To2 R8C:$rSrc, 0xff)>; > + > +// zext 8->32 from preferred slot in load/store > def : Pat<(SPUextract_i8_zext VECREG:$rSrc), > (ANDIr32 (ORi32_v4i32 (v4i32 VECREG:$rSrc), (v4i32 > VECREG:$rSrc)), > 0xff)>; > > +// zext 8->32: Zero extend bytes to words > +def : Pat<(i32 (zext R8C:$rSrc)), > + (ANDI1To4 R8C:$rSrc, 0xff)>; > + > +// anyext 8->16: Extend 8->16 bits, irrespective of sign > +def : Pat<(i16 (anyext R8C:$rSrc)), > + (ORHI1To2 R8C:$rSrc, 0)>; > + > +// anyext 8->32: Extend 8->32 bits, irrespective of sign > +def : Pat<(i32 (anyext R8C:$rSrc)), > + (ORI1To4 R8C:$rSrc, 0)>; > + > // zext 16->32: Zero extend halfwords to words (note that we have > to juggle the > // 0xffff constant since it will not fit into an immediate.) > def : Pat<(i32 (zext R16C:$rSrc)), > > Modified: llvm/trunk/lib/Target/CellSPU/SPUOperands.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ > CellSPU/SPUOperands.td?rev=45130&r1=45129&r2=45130&view=diff > > ====================================================================== > ======== > --- llvm/trunk/lib/Target/CellSPU/SPUOperands.td (original) > +++ llvm/trunk/lib/Target/CellSPU/SPUOperands.td Mon Dec 17 > 16:32:34 2007 > @@ -99,15 +99,21 @@ > return isI32IntS10Immediate(N); > }]>; > > +// i32ImmUns10 predicate - True if the i32 immediate fits in a 10- > bit unsigned > +// field. Used by RI10Form instructions like 'ldq'. > +def i32ImmUns10 : PatLeaf<(imm), [{ > + return isI32IntU10Immediate(N); > +}]>; > + > // i16ImmSExt10 predicate - True if the i16 immediate fits in a 10- > bit sign > // extended field. Used by RI10Form instructions like 'ldq'. > def i16ImmSExt10 : PatLeaf<(imm), [{ > return isI16IntS10Immediate(N); > }]>; > > -// i16ImmU10 predicate - True if the i16 immediate fits into a 10- > bit unsigned > +// i16ImmUns10 predicate - True if the i16 immediate fits into a > 10-bit unsigned > // value. Used by RI10Form instructions. > -def i16ImmU10 : PatLeaf<(imm), [{ > +def i16ImmUns10 : PatLeaf<(imm), [{ > return isI16IntU10Immediate(N); > }]>; > > @@ -261,9 +267,21 @@ > return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).Val != 0; > }], v8i16SExt10Imm_xform>; > > +// v8i16Uns10Imm_xform function: convert build_vector to 16-bit > unsigned > +// immediate constant load for v8i16 vectors. > +def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{ > + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16); > +}]>; > + > +// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate > constant > +// load, works in conjunction with its transform function. > +def v8i16Uns10Imm: PatLeaf<(build_vector), [{ > + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).Val != 0; > +}], v8i16Uns10Imm_xform>; > + > // v8i16SExt16Imm_xform function: convert build_vector to 16-bit > sign extended > // immediate constant load for v8i16 vectors. > -def v8i16SExt16Imm_xform: SDNodeXForm<build_vector, [{ > +def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{ > return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16); > }]>; > > @@ -271,7 +289,7 @@ > // load, works in conjunction with its transform function. > def v8i16SExt16Imm: PatLeaf<(build_vector), [{ > return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).Val != 0; > -}], v8i16SExt16Imm_xform>; > +}], v8i16Uns16Imm_xform>; > > // v4i32SExt10Imm_xform function: convert build_vector to 10-bit > sign extended > // immediate constant load for v4i32 vectors. > @@ -285,6 +303,18 @@ > return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).Val != 0; > }], v4i32SExt10Imm_xform>; > > +// v4i32Uns10Imm_xform function: convert build_vector to 10-bit > unsigned > +// immediate constant load for v4i32 vectors. > +def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{ > + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32); > +}]>; > + > +// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate > constant > +// load, works in conjunction with its transform function. > +def v4i32Uns10Imm: PatLeaf<(build_vector), [{ > + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).Val != 0; > +}], v4i32Uns10Imm_xform>; > + > // v4i32SExt16Imm_xform function: convert build_vector to 16-bit > sign extended > // immediate constant load for v4i32 vectors. > def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{ > @@ -390,6 +420,10 @@ > let PrintMethod = "printU7ImmOperand"; > } > > +def u7imm_i8: Operand<i8> { > + let PrintMethod = "printU7ImmOperand"; > +} > + > def u7imm_i32: Operand<i32> { > let PrintMethod = "printU7ImmOperand"; > } > @@ -412,6 +446,10 @@ > let PrintMethod = "printU10ImmOperand"; > } > > +def u10imm_i8: Operand<i8> { > + let PrintMethod = "printU10ImmOperand"; > +} > + > def u10imm_i32: Operand<i32> { > let PrintMethod = "printU10ImmOperand"; > } > @@ -420,6 +458,10 @@ > let PrintMethod = "printS16ImmOperand"; > } > > +def s16imm_i8: Operand<i8> { > + let PrintMethod = "printS16ImmOperand"; > +} > + > def s16imm_i32: Operand<i32> { > let PrintMethod = "printS16ImmOperand"; > } > > Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ > CellSPU/SPURegisterInfo.cpp?rev=45130&r1=45129&r2=45130&view=diff > > ====================================================================== > ======== > --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp (original) > +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp Mon Dec 17 > 16:32:34 2007 > @@ -328,7 +328,9 @@ > /* do what loadRegFromStackSlot does here... */ > } else { > unsigned Opc = 0; > - if (RC == SPU::R16CRegisterClass) { > + if (RC == SPU::R8CRegisterClass) { > + /* do brilliance here */ > + } else if (RC == SPU::R16CRegisterClass) { > /* Opc = PPC::LWZ; */ > } else if (RC == SPU::R32CRegisterClass) { > /* Opc = PPC::LD; */ > @@ -369,10 +371,9 @@ > abort(); > } > > - /* if (DestRC == SPU::R8CRegisterClass) { > + if (DestRC == SPU::R8CRegisterClass) { > BuildMI(MBB, MI, TII.get(SPU::ORBIr8), DestReg).addReg > (SrcReg).addImm(0); > - } else */ > - if (DestRC == SPU::R16CRegisterClass) { > + } else if (DestRC == SPU::R16CRegisterClass) { > BuildMI(MBB, MI, TII.get(SPU::ORHIr16), DestReg).addReg > (SrcReg).addImm(0); > } else if (DestRC == SPU::R32CRegisterClass) { > BuildMI(MBB, MI, TII.get(SPU::ORIr32), DestReg).addReg > (SrcReg).addImm(0); > > Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ > CellSPU/SPURegisterInfo.td?rev=45130&r1=45129&r2=45130&view=diff > > ====================================================================== > ======== > --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.td (original) > +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.td Mon Dec 17 > 16:32:34 2007 > @@ -359,6 +359,40 @@ > }]; > } > > +// The SPU's registers as 8-bit wide (byte) "preferred slot": > +def R8C : RegisterClass<"SPU", [i8], 128, > + [ > + /* volatile register */ > + R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, > + R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, > R29, R30, R31, > + R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, > R44, R45, R46, > + R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, > R59, R60, R61, > + R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, > R74, R75, R76, > + R77, R78, R79, > + /* non-volatile register: take hint from PPC and allocate in > reverse order */ > + R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, > R117, R116, R115, > + R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, > R104, R103, R102, > + R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, > R89, R88, R87, > + R86, R85, R84, R83, R82, R81, R80, > + /* environment ptr, SP, LR */ > + R2, R1, R0 ]> > +{ > + let MethodProtos = [{ > + iterator allocation_order_begin(const MachineFunction &MF) const; > + iterator allocation_order_end(const MachineFunction &MF) const; > + }]; > + let MethodBodies = [{ > + R8CClass::iterator > + R8CClass::allocation_order_begin(const MachineFunction &MF) > const { > + return begin(); > + } > + R8CClass::iterator > + R8CClass::allocation_order_end(const MachineFunction &MF) const { > + return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr) > + } > + }]; > +} > + > // The SPU's registers as vector registers: > def VECREG : RegisterClass<"SPU", > [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128, > [ > > Modified: llvm/trunk/test/CodeGen/CellSPU/and_ops.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ > CellSPU/and_ops.ll?rev=45130&r1=45129&r2=45130&view=diff > > ====================================================================== > ======== > --- llvm/trunk/test/CodeGen/CellSPU/and_ops.ll (original) > +++ llvm/trunk/test/CodeGen/CellSPU/and_ops.ll Mon Dec 17 16:32:34 > 2007 > @@ -1,9 +1,9 @@ > ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s > -; RUN: grep and %t1.s | count 227 > +; RUN: grep and %t1.s | count 232 > ; RUN: grep andc %t1.s | count 85 > ; RUN: grep andi %t1.s | count 36 > -; RUN: grep andhi %t1.s | count 31 > -; RUN: grep andbi %t1.s | count 1 > +; RUN: grep andhi %t1.s | count 30 > +; RUN: grep andbi %t1.s | count 4 > > ; AND instruction generation: > define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { > @@ -258,13 +258,19 @@ > } > > define i8 @and_u8(i8 zeroext %in) zeroext { > - ; ANDI generated: > - %tmp37 = and i8 %in, 37 ; <i8> [#uses=1] > + ; ANDBI generated: > + %tmp37 = and i8 %in, 37 > ret i8 %tmp37 > } > > -define i8 @and_i8(i8 signext %in) signext { > - ; ANDHI generated > - %tmp38 = and i8 %in, 37 ; <i8> [#uses=1] > +define i8 @and_sext8(i8 signext %in) signext { > + ; ANDBI generated > + %tmp38 = and i8 %in, 37 > + ret i8 %tmp38 > +} > + > +define i8 @and_i8(i8 %in) { > + ; ANDBI generated > + %tmp38 = and i8 %in, 205 > ret i8 %tmp38 > } > > Added: llvm/trunk/test/CodeGen/CellSPU/nand.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ > CellSPU/nand.ll?rev=45130&view=auto > > ====================================================================== > ======== > --- llvm/trunk/test/CodeGen/CellSPU/nand.ll (added) > +++ llvm/trunk/test/CodeGen/CellSPU/nand.ll Mon Dec 17 16:32:34 2007 > @@ -0,0 +1,119 @@ > +; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s > +; RUN: grep nand %t1.s | count 90 > +; RUN: grep and %t1.s | count 94 > +; RUN: grep xsbh %t1.s | count 2 > +; RUN: grep xshw %t1.s | count 4 > + > +define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { > + %A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1] > + %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 > > + ret <4 x i32> %B > +} > + > +define <4 x i32> @nand_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { > + %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] > + %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 > > + ret <4 x i32> %B > +} > + > +define <8 x i16> @nand_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) { > + %A = and <8 x i16> %arg2, %arg1 ; <<8 x i16>> [#uses=1] > + %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1, > + i16 -1, i16 -1, i16 -1, i16 -1 > > + ret <8 x i16> %B > +} > + > +define <8 x i16> @nand_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) { > + %A = and <8 x i16> %arg1, %arg2 ; <<8 x i16>> [#uses=1] > + %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1, > + i16 -1, i16 -1, i16 -1, i16 -1 > > + ret <8 x i16> %B > +} > + > +define <16 x i8> @nand_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) { > + %A = and <16 x i8> %arg2, %arg1 ; <<16 x i8>> [#uses=1] > + %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 > -1, i8 -1, > + i8 -1, i8 -1, i8 -1, i8 -1, i8 > -1, i8 -1, > + i8 -1, i8 -1, i8 -1, i8 -1 > > + ret <16 x i8> %B > +} > + > +define <16 x i8> @nand_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { > + %A = and <16 x i8> %arg1, %arg2 ; <<16 x i8>> [#uses=1] > + %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 > -1, i8 -1, > + i8 -1, i8 -1, i8 -1, i8 -1, i8 > -1, i8 -1, > + i8 -1, i8 -1, i8 -1, i8 -1 > > + ret <16 x i8> %B > +} > + > +define i32 @nand_i32_1(i32 %arg1, i32 %arg2) { > + %A = and i32 %arg2, %arg1 ; <i32> [#uses=1] > + %B = xor i32 %A, -1 ; <i32> [#uses=1] > + ret i32 %B > +} > + > +define i32 @nand_i32_2(i32 %arg1, i32 %arg2) { > + %A = and i32 %arg1, %arg2 ; <i32> [#uses=1] > + %B = xor i32 %A, -1 ; <i32> [#uses=1] > + ret i32 %B > +} > + > +define i16 @nand_i16_1(i16 signext %arg1, i16 signext %arg2) > signext { > + %A = and i16 %arg2, %arg1 ; <i16> [#uses=1] > + %B = xor i16 %A, -1 ; <i16> [#uses=1] > + ret i16 %B > +} > + > +define i16 @nand_i16_2(i16 signext %arg1, i16 signext %arg2) > signext { > + %A = and i16 %arg1, %arg2 ; <i16> [#uses=1] > + %B = xor i16 %A, -1 ; <i16> [#uses=1] > + ret i16 %B > +} > + > +define i16 @nand_i16u_1(i16 zeroext %arg1, i16 zeroext %arg2) > zeroext { > + %A = and i16 %arg2, %arg1 ; <i16> [#uses=1] > + %B = xor i16 %A, -1 ; <i16> [#uses=1] > + ret i16 %B > +} > + > +define i16 @nand_i16u_2(i16 zeroext %arg1, i16 zeroext %arg2) > zeroext { > + %A = and i16 %arg1, %arg2 ; <i16> [#uses=1] > + %B = xor i16 %A, -1 ; <i16> [#uses=1] > + ret i16 %B > +} > + > +define i8 @nand_i8u_1(i8 zeroext %arg1, i8 zeroext %arg2) > zeroext { > + %A = and i8 %arg2, %arg1 ; <i8> [#uses=1] > + %B = xor i8 %A, -1 ; <i8> [#uses=1] > + ret i8 %B > +} > + > +define i8 @nand_i8u_2(i8 zeroext %arg1, i8 zeroext %arg2) > zeroext { > + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] > + %B = xor i8 %A, -1 ; <i8> [#uses=1] > + ret i8 %B > +} > + > +define i8 @nand_i8_1(i8 signext %arg1, i8 signext %arg2) signext { > + %A = and i8 %arg2, %arg1 ; <i8> [#uses=1] > + %B = xor i8 %A, -1 ; <i8> [#uses=1] > + ret i8 %B > +} > + > +define i8 @nand_i8_2(i8 signext %arg1, i8 signext %arg2) signext { > + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] > + %B = xor i8 %A, -1 ; <i8> [#uses=1] > + ret i8 %B > +} > + > +define i8 @nand_i8_3(i8 %arg1, i8 %arg2) { > + %A = and i8 %arg2, %arg1 ; <i8> [#uses=1] > + %B = xor i8 %A, -1 ; <i8> [#uses=1] > + ret i8 %B > +} > + > +define i8 @nand_i8_4(i8 %arg1, i8 %arg2) { > + %A = and i8 %arg1, %arg2 ; <i8> [#uses=1] > + %B = xor i8 %A, -1 ; <i8> [#uses=1] > + ret i8 %B > +} > > > _______________________________________________ > llvm-commits mailing list > llvm-commits@cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits