Changes in directory llvm/lib/Target/X86:
X86InstrFPStack.td updated: 1.2 -> 1.3 X86InstrInfo.td updated: 1.245 -> 1.246 X86InstrMMX.td updated: 1.1 -> 1.2 X86InstrSSE.td updated: 1.3 -> 1.4 --- Log message: One more round of reorg so sabre doesn't freak out. :-) --- Diffs of the changes: (+240 -222) X86InstrFPStack.td | 39 ++++++ X86InstrInfo.td | 94 +++------------- X86InstrMMX.td | 22 --- X86InstrSSE.td | 307 +++++++++++++++++++++++++++++++---------------------- 4 files changed, 240 insertions(+), 222 deletions(-) Index: llvm/lib/Target/X86/X86InstrFPStack.td diff -u llvm/lib/Target/X86/X86InstrFPStack.td:1.2 llvm/lib/Target/X86/X86InstrFPStack.td:1.3 --- llvm/lib/Target/X86/X86InstrFPStack.td:1.2 Tue Feb 21 13:26:52 2006 +++ llvm/lib/Target/X86/X86InstrFPStack.td Tue Feb 21 14:00:20 2006 @@ -13,6 +13,26 @@ // //===----------------------------------------------------------------------===// +// Some 'special' instructions +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. + def FP_TO_INT16_IN_MEM : I<0, Pseudo, + (ops i16mem:$dst, RFP:$src), + "#FP_TO_INT16_IN_MEM PSEUDO!", + [(X86fp_to_i16mem RFP:$src, addr:$dst)]>; + def FP_TO_INT32_IN_MEM : I<0, Pseudo, + (ops i32mem:$dst, RFP:$src), + "#FP_TO_INT32_IN_MEM PSEUDO!", + [(X86fp_to_i32mem RFP:$src, addr:$dst)]>; + def FP_TO_INT64_IN_MEM : I<0, Pseudo, + (ops i64mem:$dst, RFP:$src), + "#FP_TO_INT64_IN_MEM PSEUDO!", + [(X86fp_to_i64mem RFP:$src, addr:$dst)]>; +} + +let isTerminator = 1 in + let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in + def FP_REG_KILL : I<0, Pseudo, (ops), "#FP_REG_KILL", []>; + // All FP Stack operations are represented with two instructions here. The // first instruction, generated by the instruction selector, uses "RFP" // registers: a traditional register file to reference floating point values. @@ -379,3 +399,22 @@ (ops i16mem:$dst), "fnstcw $dst", []>; def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] (ops i16mem:$dst), "fldcw $dst", []>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// Required for RET of f32 / f64 values. +def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>; +def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>; + +// Required for CALL which return f32 / f64 values. +def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>; +def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>; + +// Floating point constant -0.0 and -1.0 +def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>; +def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>; + +// Used to conv. i64 to f64 since there isn't a SSE version. +def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>; Index: llvm/lib/Target/X86/X86InstrInfo.td diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.245 llvm/lib/Target/X86/X86InstrInfo.td:1.246 --- llvm/lib/Target/X86/X86InstrInfo.td:1.245 Tue Feb 21 13:30:30 2006 +++ llvm/lib/Target/X86/X86InstrInfo.td Tue Feb 21 14:00:20 2006 @@ -403,47 +403,6 @@ def IMPLICIT_DEF_R32 : I<0, Pseudo, (ops R32:$dst), "#IMPLICIT_DEF $dst", [(set R32:$dst, (undef))]>; -def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst), - "#IMPLICIT_DEF $dst", - [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>; -def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst), - "#IMPLICIT_DEF $dst", - [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>; - - -// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the -// scheduler into a branch sequence. -let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. - def CMOV_FR32 : I<0, Pseudo, - (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond), - "#CMOV_FR32 PSEUDO!", - [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>; - def CMOV_FR64 : I<0, Pseudo, - (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond), - "#CMOV_FR64 PSEUDO!", - [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>; -} - -let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. - def FP_TO_INT16_IN_MEM : I<0, Pseudo, - (ops i16mem:$dst, RFP:$src), - "#FP_TO_INT16_IN_MEM PSEUDO!", - [(X86fp_to_i16mem RFP:$src, addr:$dst)]>; - def FP_TO_INT32_IN_MEM : I<0, Pseudo, - (ops i32mem:$dst, RFP:$src), - "#FP_TO_INT32_IN_MEM PSEUDO!", - [(X86fp_to_i32mem RFP:$src, addr:$dst)]>; - def FP_TO_INT64_IN_MEM : I<0, Pseudo, - (ops i64mem:$dst, RFP:$src), - "#FP_TO_INT64_IN_MEM PSEUDO!", - [(X86fp_to_i64mem RFP:$src, addr:$dst)]>; -} - - -let isTerminator = 1 in - let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in - def FP_REG_KILL : I<0, Pseudo, (ops), "#FP_REG_KILL", []>; - // Nop def NOOP : I<0x90, RawFrm, (ops), "nop", []>; @@ -1690,7 +1649,6 @@ // Double shift instructions (generalizations of rotate) - def SHLD32rrCL : I<0xA5, MRMDestReg, (ops R32:$dst, R32:$src1, R32:$src2), "shld{l} {%cl, $src2, $dst|$dst, $src2, %CL}", [(set R32:$dst, (X86shld R32:$src1, R32:$src2, CL))]>, @@ -2390,24 +2348,6 @@ [(set R32:$dst, 0)]>; //===----------------------------------------------------------------------===// -// Floating Point Stack Support -//===----------------------------------------------------------------------===// - -include "X86InstrFPStack.td" - -//===----------------------------------------------------------------------===// -// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2) -//===----------------------------------------------------------------------===// - -include "X86InstrMMX.td" - -//===----------------------------------------------------------------------===// -// XMM Floating point support (requires SSE / SSE2) -//===----------------------------------------------------------------------===// - -include "X86InstrSSE.td" - -//===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// @@ -2460,21 +2400,6 @@ def : Pat<(i32 (anyext R8 :$src)), (MOVZX32rr8 R8 :$src)>; def : Pat<(i32 (anyext R16:$src)), (MOVZX32rr16 R16:$src)>; -// Required for RET of f32 / f64 values. -def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>; -def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>; - -// Required for CALL which return f32 / f64 values. -def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>; -def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>; - -// Floating point constant -0.0 and -1.0 -def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>; -def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>; - -// Used to conv. i64 to f64 since there isn't a SSE version. -def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>; - //===----------------------------------------------------------------------===// // Some peepholes //===----------------------------------------------------------------------===// @@ -2519,3 +2444,22 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt), (srl R16:$src2, (sub 16, CL:$amt))), addr:$dst), (SHLD16mrCL addr:$dst, R16:$src2)>; + + +//===----------------------------------------------------------------------===// +// Floating Point Stack Support +//===----------------------------------------------------------------------===// + +include "X86InstrFPStack.td" + +//===----------------------------------------------------------------------===// +// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2) +//===----------------------------------------------------------------------===// + +include "X86InstrMMX.td" + +//===----------------------------------------------------------------------===// +// XMM Floating point support (requires SSE / SSE2) +//===----------------------------------------------------------------------===// + +include "X86InstrSSE.td" Index: llvm/lib/Target/X86/X86InstrMMX.td diff -u llvm/lib/Target/X86/X86InstrMMX.td:1.1 llvm/lib/Target/X86/X86InstrMMX.td:1.2 --- llvm/lib/Target/X86/X86InstrMMX.td:1.1 Tue Feb 21 13:13:53 2006 +++ llvm/lib/Target/X86/X86InstrMMX.td Tue Feb 21 14:00:20 2006 @@ -24,17 +24,6 @@ "movd {$src, $dst|$dst, $src}", []>, TB, Requires<[HasMMX]>; -def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; -def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; -def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; - - def MOVQ64rr : I<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src), "movq {$src, $dst|$dst, $src}", []>, TB, Requires<[HasMMX]>; @@ -44,14 +33,3 @@ def MOVQ64mr : I<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src), "movq {$src, $dst|$dst, $src}", []>, TB, Requires<[HasMMX]>; - -def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src), - "movq {$src, $dst|$dst, $src}", []>, XS, - Requires<[HasSSE2]>; -def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), - "movq {$src, $dst|$dst, $src}", []>, XS, - Requires<[HasSSE2]>; -def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src), - "movq {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; - Index: llvm/lib/Target/X86/X86InstrSSE.td diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.3 llvm/lib/Target/X86/X86InstrSSE.td:1.4 --- llvm/lib/Target/X86/X86InstrSSE.td:1.3 Tue Feb 21 13:30:30 2006 +++ llvm/lib/Target/X86/X86InstrSSE.td Tue Feb 21 14:00:20 2006 @@ -13,93 +13,32 @@ // //===----------------------------------------------------------------------===// -def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), - "movaps {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, TB; -def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), - "movapd {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE2]>, TB, OpSize; - -def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), - "movaps {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, TB; -def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src), - "movaps {$src, $dst|$dst, $src}",[]>, - Requires<[HasSSE1]>, TB; -def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src), - "movapd {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, TB, OpSize; -def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src), - "movapd {$src, $dst|$dst, $src}",[]>, - Requires<[HasSSE2]>, TB, OpSize; - -// Logical -let isTwoAddress = 1 in { -let isCommutable = 1 in { -def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), - "andps {$src2, $dst|$dst, $src2}", - [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>, - Requires<[HasSSE1]>, TB; -def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), - "andpd {$src2, $dst|$dst, $src2}", - [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; -def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), - "orps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), - "xorps {$src2, $dst|$dst, $src2}", - [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>, - Requires<[HasSSE1]>, TB; -def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), - "xorpd {$src2, $dst|$dst, $src2}", - [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; -} -def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), - "andps {$src2, $dst|$dst, $src2}", - [(set V4F32:$dst, (X86fand V4F32:$src1, - (X86loadpv4f32 addr:$src2)))]>, - Requires<[HasSSE1]>, TB; -def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), - "andpd {$src2, $dst|$dst, $src2}", - [(set V2F64:$dst, (X86fand V2F64:$src1, - (X86loadpv2f64 addr:$src2)))]>, - Requires<[HasSSE2]>, TB, OpSize; -def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), - "orps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), - "xorps {$src2, $dst|$dst, $src2}", - [(set V4F32:$dst, (X86fxor V4F32:$src1, - (X86loadpv4f32 addr:$src2)))]>, - Requires<[HasSSE1]>, TB; -def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), - "xorpd {$src2, $dst|$dst, $src2}", - [(set V2F64:$dst, (X86fxor V2F64:$src1, - (X86loadpv2f64 addr:$src2)))]>, - Requires<[HasSSE2]>, TB, OpSize; +//===----------------------------------------------------------------------===// +// SSE scalar FP Instructions +//===----------------------------------------------------------------------===// -def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), - "andnps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), - "andnps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), - "andnpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), - "andnpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; +// Some 'special' instructions +def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst), + "#IMPLICIT_DEF $dst", + [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>; +def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst), + "#IMPLICIT_DEF $dst", + [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>; + +// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the +// scheduler into a branch sequence. +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. + def CMOV_FR32 : I<0, Pseudo, + (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond), + "#CMOV_FR32 PSEUDO!", + [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>; + def CMOV_FR64 : I<0, Pseudo, + (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond), + "#CMOV_FR64 PSEUDO!", + [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>; } +// Move Instructions def MOVSSrr : I<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src), "movss {$src, $dst|$dst, $src}", []>, Requires<[HasSSE1]>, XS; @@ -124,6 +63,7 @@ [(store FR64:$src, addr:$dst)]>, Requires<[HasSSE2]>, XD; +// Conversion instructions def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src), "cvttss2si {$src, $dst|$dst, $src}", [(set R32:$dst, (fp_to_sint FR32:$src))]>, @@ -173,42 +113,8 @@ [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>, Requires<[HasSSE2]>, XD; -def SQRTSSrr : I<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src), - "sqrtss {$src, $dst|$dst, $src}", - [(set FR32:$dst, (fsqrt FR32:$src))]>, - Requires<[HasSSE1]>, XS; -def SQRTSSrm : I<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src), - "sqrtss {$src, $dst|$dst, $src}", - [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>, - Requires<[HasSSE1]>, XS; -def SQRTSDrr : I<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src), - "sqrtsd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (fsqrt FR64:$src))]>, - Requires<[HasSSE2]>, XD; -def SQRTSDrm : I<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src), - "sqrtsd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>, - Requires<[HasSSE2]>, XD; - -def UCOMISSrr: I<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2), - "ucomiss {$src2, $src1|$src1, $src2}", - [(X86cmp FR32:$src1, FR32:$src2)]>, - Requires<[HasSSE1]>, TB; -def UCOMISSrm: I<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2), - "ucomiss {$src2, $src1|$src1, $src2}", - [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>, - Requires<[HasSSE1]>, TB; -def UCOMISDrr: I<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2), - "ucomisd {$src2, $src1|$src1, $src2}", - [(X86cmp FR64:$src1, FR64:$src2)]>, - Requires<[HasSSE2]>, TB, OpSize; -def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2), - "ucomisd {$src2, $src1|$src1, $src2}", - [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; - +// Arithmetic instructions let isTwoAddress = 1 in { -// SSE Scalar Arithmetic let isCommutable = 1 in { def ADDSSrr : I<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), "addss {$src2, $dst|$dst, $src2}", @@ -278,8 +184,27 @@ "subsd {$src2, $dst|$dst, $src2}", [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>, Requires<[HasSSE2]>, XD; +} -// SSE compare +def SQRTSSrr : I<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src), + "sqrtss {$src, $dst|$dst, $src}", + [(set FR32:$dst, (fsqrt FR32:$src))]>, + Requires<[HasSSE1]>, XS; +def SQRTSSrm : I<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src), + "sqrtss {$src, $dst|$dst, $src}", + [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>, + Requires<[HasSSE1]>, XS; +def SQRTSDrr : I<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src), + "sqrtsd {$src, $dst|$dst, $src}", + [(set FR64:$dst, (fsqrt FR64:$src))]>, + Requires<[HasSSE2]>, XD; +def SQRTSDrm : I<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src), + "sqrtsd {$src, $dst|$dst, $src}", + [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>, + Requires<[HasSSE2]>, XD; + +// Comparison instructions +let isTwoAddress = 1 in { def CMPSSrr : I<0xC2, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc), "cmp${cc}ss {$src, $dst|$dst, $src}", []>, @@ -298,10 +223,25 @@ Requires<[HasSSE2]>, XD; } +def UCOMISSrr: I<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2), + "ucomiss {$src2, $src1|$src1, $src2}", + [(X86cmp FR32:$src1, FR32:$src2)]>, + Requires<[HasSSE1]>, TB; +def UCOMISSrm: I<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2), + "ucomiss {$src2, $src1|$src1, $src2}", + [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>, + Requires<[HasSSE1]>, TB; +def UCOMISDrr: I<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2), + "ucomisd {$src2, $src1|$src1, $src2}", + [(X86cmp FR64:$src1, FR64:$src2)]>, + Requires<[HasSSE2]>, TB, OpSize; +def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2), + "ucomisd {$src2, $src1|$src1, $src2}", + [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>, + Requires<[HasSSE2]>, TB, OpSize; -//===----------------------------------------------------------------------===// -// Alias Instructions -//===----------------------------------------------------------------------===// +// Aliases of packed instructions for scalar use. These all have names that +// start with 'Fs'. // Alias instructions that map fld0 to pxor for sse. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. @@ -314,10 +254,10 @@ // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd. // Upper bits are disregarded. -def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), +def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src), "movaps {$src, $dst|$dst, $src}", []>, Requires<[HasSSE1]>, TB; -def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), +def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src), "movapd {$src, $dst|$dst, $src}", []>, Requires<[HasSSE2]>, TB, OpSize; @@ -398,3 +338,120 @@ "andnpd {$src2, $dst|$dst, $src2}", []>, Requires<[HasSSE2]>, TB, OpSize; } + +//===----------------------------------------------------------------------===// +// SSE packed FP Instructions +//===----------------------------------------------------------------------===// + +// Move Instructions +def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "movaps {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE1]>, TB; +def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), + "movapd {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE2]>, TB, OpSize; + +def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), + "movaps {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE1]>, TB; +def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src), + "movaps {$src, $dst|$dst, $src}",[]>, + Requires<[HasSSE1]>, TB; +def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src), + "movapd {$src, $dst|$dst, $src}", []>, + Requires<[HasSSE1]>, TB, OpSize; +def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src), + "movapd {$src, $dst|$dst, $src}",[]>, + Requires<[HasSSE2]>, TB, OpSize; + +// Logical +let isTwoAddress = 1 in { +let isCommutable = 1 in { +def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "andps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>, + Requires<[HasSSE1]>, TB; +def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "andpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>, + Requires<[HasSSE2]>, TB, OpSize; +def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "orps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "orpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "xorps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>, + Requires<[HasSSE1]>, TB; +def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "xorpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>, + Requires<[HasSSE2]>, TB, OpSize; +} +def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "andps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (X86fand V4F32:$src1, + (X86loadpv4f32 addr:$src2)))]>, + Requires<[HasSSE1]>, TB; +def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "andpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (X86fand V2F64:$src1, + (X86loadpv2f64 addr:$src2)))]>, + Requires<[HasSSE2]>, TB, OpSize; +def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "orps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "orpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "xorps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (X86fxor V4F32:$src1, + (X86loadpv4f32 addr:$src2)))]>, + Requires<[HasSSE1]>, TB; +def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "xorpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (X86fxor V2F64:$src1, + (X86loadpv2f64 addr:$src2)))]>, + Requires<[HasSSE2]>, TB, OpSize; + +def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "andnps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "andnps {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE1]>, TB; +def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "andnpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "andnpd {$src2, $dst|$dst, $src2}", []>, + Requires<[HasSSE2]>, TB, OpSize; +} + +//===----------------------------------------------------------------------===// +// SSE integer instructions +//===----------------------------------------------------------------------===// + +// Move Instructions +def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src), + "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, + Requires<[HasSSE2]>; +def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), + "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, + Requires<[HasSSE2]>; +def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), + "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, + Requires<[HasSSE2]>; + +def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src), + "movq {$src, $dst|$dst, $src}", []>, XS, + Requires<[HasSSE2]>; +def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), + "movq {$src, $dst|$dst, $src}", []>, XS, + Requires<[HasSSE2]>; +def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src), + "movq {$src, $dst|$dst, $src}", []>, TB, OpSize, + Requires<[HasSSE2]>; _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits