Thanks. Moving pattern down helps. Now make check for the following patch passed:
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 2ef1384..8266f3e 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1417,6 +1417,22 @@ return true; }) +;; Return true if OP is a parallel for a palignr permute. +(define_predicate "palignr_operand" + (and (match_code "parallel") + (match_code "const_int" "a")) +{ + int elt = INTVAL (XVECEXP (op, 0, 0)); + int i, nelt = XVECLEN (op, 0); + + /* Check that an order in the permutation is suitable for palignr. + For example, {5 6 7 0 1 2 3 4} is "palignr 5, xmm, xmm". */ + for (i = 1; i < nelt; ++i) + if (INTVAL (XVECEXP (op, 0, i)) != ((elt + i) % nelt)) + return false; + return true; +}) + ;; Return true if OP is a proper third operand to vpblendw256. (define_predicate "avx2_pblendw_operand" (match_code "const_int") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c91626b..d907353 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -14551,6 +14551,35 @@ (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn "*ssse3_palignr<mode>_perm" + [(set (match_operand:V_128 0 "register_operand" "=x,x") + (vec_select:V_128 + (match_operand:V_128 1 "register_operand" "0,x") + (match_parallel 2 "palignr_operand" + [(match_operand 3 "const_int_operand" "n, n")])))] + "TARGET_SSSE3" +{ + enum machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0])); + operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode)); + + switch (which_alternative) + { + case 0: + return "palignr\t{%2, %1, %0|%0, %1, %2}"; + case 1: + return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseishft") + (set_attr "atom_unit" "sishuf") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex")]) + (define_expand "avx_vinsertf128<mode>" [(match_operand:V_256 0 "register_operand") (match_operand:V_256 1 "register_operand") On Wed, Jun 4, 2014 at 11:04 PM, Richard Henderson <r...@redhat.com> wrote: > On 06/04/2014 10:06 AM, Evgeny Stupachenko wrote: >> Is it ok to use the following pattern? >> >> patch passed bootstrap and make check, but one test failed: >> gcc/testsuite/gcc.target/i386/vect-rebuild.c >> It failed on /* { dg-final { scan-assembler-times "\tv?permilpd\[ \t\]" 1 } >> } */ >> which is now palignr. However, both palignr and permilpd costs 1 tick >> and take 6 bytes in the opcode. >> I vote for modifying the test to scan for palignr: >> /* { dg-final { scan-assembler-times "\tv?palignr\[ \t\]" 1 } } */ >> >> 2014-06-04 Evgeny Stupachenko <evstu...@gmail.com> >> >> * config/i386/sse.md (*ssse3_palignr<mode>_perm): New. >> * config/i386/predicates.md (palignr_operand): New. >> Indicates if permutation is suitable for palignr instruction. > > Surely permilpd avoids some sort of reformatting penalty when actually using > doubles. > > If you move this pattern down below the other vec_select patterns, we'll > prefer > the others for matching masks. > > > r~