Author: djg Date: Thu Aug 2 16:17:01 2007 New Revision: 40756 URL: http://llvm.org/viewvc/llvm-project?rev=40756&view=rev Log: Fix the alignment requirements of several unpck and shuf instructions. Generalize isPSHUFDMask and add a unary SHUFPD pattern so that SHUFPD's memory operand alignment can be tested as well, with a fix to avoid breaking MMX's use of isPSHUFDMask.
Added: llvm/trunk/test/CodeGen/X86/sse-align-12.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=40756&r1=40755&r2=40756&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Aug 2 16:17:01 2007 @@ -1578,7 +1578,7 @@ bool X86::isPSHUFDMask(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR); - if (N->getNumOperands() != 4) + if (N->getNumOperands() != 2 && N->getNumOperands() != 4) return false; // Check if the value doesn't reference the second vector. @@ -1586,7 +1586,7 @@ SDOperand Arg = N->getOperand(i); if (Arg.getOpcode() == ISD::UNDEF) continue; assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); - if (cast<ConstantSDNode>(Arg)->getValue() >= 4) + if (cast<ConstantSDNode>(Arg)->getValue() >= e) return false; } @@ -2767,7 +2767,10 @@ // If VT is integer, try PSHUF* first, then SHUFP*. if (MVT::isInteger(VT)) { - if (X86::isPSHUFDMask(PermMask.Val) || + // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically + // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. + if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && + X86::isPSHUFDMask(PermMask.Val)) || X86::isPSHUFHWMask(PermMask.Val) || X86::isPSHUFLWMask(PermMask.Val)) { if (V2.getOpcode() != ISD::UNDEF) Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=40756&r1=40755&r2=40756&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Aug 2 16:17:01 2007 @@ -808,7 +808,7 @@ "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (v4f32 (vector_shuffle - VR128:$src1, (load addr:$src2), + VR128:$src1, (memopv4f32 addr:$src2), SHUFP_shuffle_mask:$src3)))]>; let AddedComplexity = 10 in { @@ -824,7 +824,7 @@ "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (vector_shuffle - VR128:$src1, (load addr:$src2), + VR128:$src1, (memopv4f32 addr:$src2), UNPCKH_shuffle_mask)))]>; def UNPCKLPSrr : PSI<0x14, MRMSrcReg, @@ -839,7 +839,7 @@ "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (vector_shuffle - VR128:$src1, (load addr:$src2), + VR128:$src1, (memopv4f32 addr:$src2), UNPCKL_shuffle_mask)))]>; } // AddedComplexity } // isTwoAddress @@ -1561,7 +1561,7 @@ "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (v2f64 (vector_shuffle - VR128:$src1, (load addr:$src2), + VR128:$src1, (memopv2f64 addr:$src2), SHUFP_shuffle_mask:$src3)))]>; let AddedComplexity = 10 in { @@ -1577,7 +1577,7 @@ "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2f64 (vector_shuffle - VR128:$src1, (load addr:$src2), + VR128:$src1, (memopv2f64 addr:$src2), UNPCKH_shuffle_mask)))]>; def UNPCKLPDrr : PDI<0x14, MRMSrcReg, @@ -1592,7 +1592,7 @@ "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2f64 (vector_shuffle - VR128:$src1, (load addr:$src2), + VR128:$src1, (memopv2f64 addr:$src2), UNPCKL_shuffle_mask)))]>; } // AddedComplexity } // isTwoAddress @@ -1782,7 +1782,7 @@ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "pandn\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - (load addr:$src2))))]>; + (memopv2i64 addr:$src2))))]>; } // SSE2 Integer comparison @@ -2419,6 +2419,11 @@ SHUFP_unary_shuffle_mask:$sm), (SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, Requires<[HasSSE1]>; +// Special unary SHUFPDrri case. +def : Pat<(vector_shuffle (v2f64 VR128:$src1), (undef), + SHUFP_unary_shuffle_mask:$sm), + (SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, + Requires<[HasSSE2]>; // Unary v4f32 shuffle with PSHUF* in order to fold a load. def : Pat<(vector_shuffle (memopv4f32 addr:$src1), (undef), SHUFP_unary_shuffle_mask:$sm), @@ -2583,13 +2588,13 @@ (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))), - (load addr:$src2))), + (memopv2i64 addr:$src2))), (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))), - (load addr:$src2))), + (memopv2i64 addr:$src2))), (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))), - (load addr:$src2))), + (memopv2i64 addr:$src2))), (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; // Use movaps / movups for SSE integer load / store (one byte shorter). Added: llvm/trunk/test/CodeGen/X86/sse-align-12.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-align-12.ll?rev=40756&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/sse-align-12.ll (added) +++ llvm/trunk/test/CodeGen/X86/sse-align-12.ll Thu Aug 2 16:17:01 2007 @@ -0,0 +1,50 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep unpck | wc -l | grep 2 +; RUN: llvm-as < %s | llc -march=x86-64 | grep shuf | wc -l | grep 2 +; RUN: llvm-as < %s | llc -march=x86-64 | grep ps | wc -l | grep 4 +; RUN: llvm-as < %s | llc -march=x86-64 | grep pd | wc -l | grep 4 +; RUN: llvm-as < %s | llc -march=x86-64 | grep movup | wc -l | grep 4 + +define <4 x float> @a(<4 x float>* %y) +{ + %x = load <4 x float>* %y, align 4 + %a = extractelement <4 x float> %x, i32 0 + %b = extractelement <4 x float> %x, i32 1 + %c = extractelement <4 x float> %x, i32 2 + %d = extractelement <4 x float> %x, i32 3 + %p = insertelement <4 x float> undef, float %d, i32 0 + %q = insertelement <4 x float> %p, float %c, i32 1 + %r = insertelement <4 x float> %q, float %b, i32 2 + %s = insertelement <4 x float> %r, float %a, i32 3 + ret <4 x float> %s +} +define <4 x float> @b(<4 x float>* %y, <4 x float> %z) +{ + %x = load <4 x float>* %y, align 4 + %a = extractelement <4 x float> %x, i32 2 + %b = extractelement <4 x float> %x, i32 3 + %c = extractelement <4 x float> %z, i32 2 + %d = extractelement <4 x float> %z, i32 3 + %p = insertelement <4 x float> undef, float %c, i32 0 + %q = insertelement <4 x float> %p, float %a, i32 1 + %r = insertelement <4 x float> %q, float %d, i32 2 + %s = insertelement <4 x float> %r, float %b, i32 3 + ret <4 x float> %s +} +define <2 x double> @c(<2 x double>* %y) +{ + %x = load <2 x double>* %y, align 8 + %a = extractelement <2 x double> %x, i32 0 + %c = extractelement <2 x double> %x, i32 1 + %p = insertelement <2 x double> undef, double %c, i32 0 + %r = insertelement <2 x double> %p, double %a, i32 1 + ret <2 x double> %r +} +define <2 x double> @d(<2 x double>* %y, <2 x double> %z) +{ + %x = load <2 x double>* %y, align 8 + %a = extractelement <2 x double> %x, i32 1 + %c = extractelement <2 x double> %z, i32 1 + %p = insertelement <2 x double> undef, double %c, i32 0 + %r = insertelement <2 x double> %p, double %a, i32 1 + ret <2 x double> %r +} _______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits