Changes in directory llvm/lib/Target/X86:
X86InstrSSE.td updated: 1.66 -> 1.67 --- Log message: Added patterns for MOVHPSmr and MOVLPSmr. --- Diffs of the changes: (+42 -4) X86InstrSSE.td | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 42 insertions(+), 4 deletions(-) Index: llvm/lib/Target/X86/X86InstrSSE.td diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.66 llvm/lib/Target/X86/X86InstrSSE.td:1.67 --- llvm/lib/Target/X86/X86InstrSSE.td:1.66 Thu Apr 6 18:53:29 2006 +++ llvm/lib/Target/X86/X86InstrSSE.td Fri Apr 7 16:20:58 2006 @@ -746,14 +746,23 @@ } def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src), - "movlps {$src, $dst|$dst, $src}", []>; + "movlps {$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), + (i32 0))), addr:$dst)]>; def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src), "movlpd {$src, $dst|$dst, $src}", [(store (f64 (vector_extract (v2f64 VR128:$src), (i32 0))), addr:$dst)]>; +// v2f64 extract element 1 is always custom lowered to unpack high to low +// and extract element 0 so the non-store version isn't too horrible. def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src), - "movhps {$src, $dst|$dst, $src}", []>; + "movhps {$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (vector_shuffle + (bc_v2f64 (v4f32 VR128:$src)), (undef), + UNPCKH_shuffle_mask)), (i32 0))), + addr:$dst)]>; def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src), "movhpd {$src, $dst|$dst, $src}", [(store (f64 (vector_extract @@ -1703,31 +1712,60 @@ Requires<[HasSSE2]>; def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>, + Requires<[HasSSE2]>; def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>, + Requires<[HasSSE2]>; def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>, + Requires<[HasSSE2]>; def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v4i32 VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>, Requires<[HasSSE2]>; - -def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>, +def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>, + Requires<[HasSSE2]>; def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>, + Requires<[HasSSE2]>; +def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>, + Requires<[HasSSE2]>; // Zeroing a VR128 then do a MOVS* to the lower bits. def : Pat<(v2f64 (X86zexts2vec FR64:$src)), _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits