On Fri, 28 Sep 2012, Uros Bizjak wrote:
2) {v[0]-v[1], v[0]-v[1]} is not recognized as a hsubpd because vec_duplicate doesn't match vec_concat. Do we really need to duplicate (no pun intended) the pattern?You can add this transformation to simplify-rtx.c. Probably vec_concat with two equal operands can be canonicalized as vec_duplicate.
Actually, it is replacing vec_duplicate with vec_concat that would help. Well, I'll see about that later.
Here is what I came up with, trying to follow your other advice (thanks a lot!).
Passes bootstrap+testsuite. 2012-10-08 Marc Glisse <marc.gli...@inria.fr> gcc/ PR target/54400 * config/i386/i386.md (type attribute): Add sseadd1. (unit attribute): Add support for sseadd1. * config/i386/sse.md (sse3_h<plusminus_insn>v2df3): split into... (sse3_haddv2df3): ... expander. (*sse3_haddv2df3): ... define_insn. Accept permuted operands. (sse3_hsubv2df3): ... define_insn. (*sse3_haddv2df3_low): New define_insn. (*sse3_hsubv2df3_low): New define_insn. gcc/testsuite/ PR target/54400 * gcc.target/i386/pr54400.c: New testcase. -- Marc Glisse
Index: gcc/testsuite/gcc.target/i386/pr54400.c =================================================================== --- gcc/testsuite/gcc.target/i386/pr54400.c (revision 0) +++ gcc/testsuite/gcc.target/i386/pr54400.c (revision 0) @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse3 -mfpmath=sse" } */ + +#include <x86intrin.h> + +double f (__m128d p) +{ + return p[0] - p[1]; +} + +double g1 (__m128d p) +{ + return p[0] + p[1]; +} + +double g2 (__m128d p) +{ + return p[1] + p[0]; +} + +__m128d h (__m128d p, __m128d q) +{ + __m128d r = { p[0] - p[1], q[0] - q[1] }; + return r; +} + +__m128d i1 (__m128d p, __m128d q) +{ + __m128d r = { p[0] + p[1], q[0] + q[1] }; + return r; +} + +__m128d i2 (__m128d p, __m128d q) +{ + __m128d r = { p[0] + p[1], q[1] + q[0] }; + return r; +} + +__m128d i3 (__m128d p, __m128d q) +{ + __m128d r = { p[1] + p[0], q[0] + q[1] }; + return r; +} + +__m128d i4 (__m128d p, __m128d q) +{ + __m128d r = { p[1] + p[0], q[1] + q[0] }; + return r; +} + +/* { dg-final { scan-assembler-times "hsubpd" 2 } } */ +/* { dg-final { scan-assembler-times "haddpd" 6 } } */ +/* { dg-final { scan-assembler-not "unpck" } } */ Property changes on: gcc/testsuite/gcc.target/i386/pr54400.c ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision URL Added: svn:eol-style + native Index: gcc/config/i386/i386.md =================================================================== --- gcc/config/i386/i386.md (revision 192206) +++ gcc/config/i386/i386.md (working copy) @@ -320,36 +320,36 @@ ;; provided in other attributes. (define_attr "type" "other,multi, alu,alu1,negnot,imov,imovx,lea, incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,imul,imulx,idiv, icmp,test,ibr,setcc,icmov, push,pop,call,callv,leave, str,bitmanip, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul, - sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins, - ssemuladd,sse4arg,lwp, + sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt, + ssediv,sseins,ssemuladd,sse4arg,lwp, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) ;; Main data type used by the insn (define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF" (const_string "unknown")) ;; The CPU unit operations uses. (define_attr "unit" "integer,i387,sse,mmx,unknown" (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") (const_string "i387") (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul, - sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt, + sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt, ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg") (const_string "sse") (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") (const_string "mmx") (eq_attr "type" "other") (const_string "unknown")] (const_string "integer"))) ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" Index: gcc/config/i386/sse.md =================================================================== --- gcc/config/i386/sse.md (revision 192206) +++ gcc/config/i386/sse.md (working copy) @@ -1209,42 +1209,120 @@ (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))) (plusminus:DF (vec_select:DF (match_dup 2) (parallel [(const_int 2)])) (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))] "TARGET_AVX" "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseadd") (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) -(define_insn "sse3_h<plusminus_insn>v2df3" +(define_expand "sse3_haddv2df3" + [(set (match_operand:V2DF 0 "register_operand") + (vec_concat:V2DF + (plus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) + (plus:DF + (vec_select:DF + (match_operand:V2DF 2 "nonimmediate_operand") + (parallel [(const_int 0)])) + (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_SSE3") + +(define_insn "*sse3_haddv2df3" [(set (match_operand:V2DF 0 "register_operand" "=x,x") (vec_concat:V2DF - (plusminus:DF + (plus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "0,x") + (parallel [(match_operand:SI 3 "const_0_to_1_operand")])) + (vec_select:DF + (match_dup 1) + (parallel [(match_operand:SI 4 "const_0_to_1_operand")]))) + (plus:DF + (vec_select:DF + (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm") + (parallel [(match_operand:SI 5 "const_0_to_1_operand")])) + (vec_select:DF + (match_dup 2) + (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))] + "TARGET_SSE3 && INTVAL (operands[3]) != INTVAL (operands[4]) + && INTVAL (operands[5]) != INTVAL (operands[6])" + "@ + haddpd\t{%2, %0|%0, %2} + vhaddpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V2DF")]) + +(define_insn "sse3_hsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_concat:V2DF + (minus:DF (vec_select:DF (match_operand:V2DF 1 "register_operand" "0,x") (parallel [(const_int 0)])) (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) - (plusminus:DF + (minus:DF (vec_select:DF (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm") (parallel [(const_int 0)])) (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] "TARGET_SSE3" "@ - h<plusminus_mnemonic>pd\t{%2, %0|%0, %2} - vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}" + hsubpd\t{%2, %0|%0, %2} + vhsubpd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") (set_attr "prefix" "orig,vex") (set_attr "mode" "V2DF")]) +(define_insn "*sse3_haddv2df3_low" + [(set (match_operand:DF 0 "register_operand" "=x,x") + (plus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "0,x") + (parallel [(match_operand:SI 2 "const_0_to_1_operand")])) + (vec_select:DF + (match_dup 1) + (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))] + "TARGET_SSE3 && INTVAL (operands[2]) != INTVAL (operands[3])" + "@ + haddpd\t{%0, %0|%0, %0} + vhaddpd\t{%1, %1, %0|%0, %1, %1}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd1") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V2DF")]) + +(define_insn "*sse3_hsubv2df3_low" + [(set (match_operand:DF 0 "register_operand" "=x,x") + (minus:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "0,x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_dup 1) + (parallel [(const_int 1)]))))] + "TARGET_SSE3" + "@ + hsubpd\t{%0, %0|%0, %0} + vhsubpd\t{%1, %1, %0|%0, %1, %1}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd1") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "V2DF")]) + (define_insn "avx_h<plusminus_insn>v8sf3" [(set (match_operand:V8SF 0 "register_operand" "=x") (vec_concat:V8SF (vec_concat:V4SF (vec_concat:V2SF (plusminus:SF (vec_select:SF (match_operand:V8SF 1 "register_operand" "x") (parallel [(const_int 0)])) (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))