That is covered by a separate part of the patch: (make check and bootstrap passed: 2 new passes for core-avx2) is it ok?
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d6155cf..68ee65a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -81,6 +81,7 @@ ;; For AVX2 support UNSPEC_VPERMVAR UNSPEC_VPERMTI + UNSPEC_VPALIGNRDI UNSPEC_GATHER UNSPEC_VSIBADDR @@ -14167,6 +14168,19 @@ (set_attr "prefix" "vex") (set_attr "mode" "OI")]) +(define_insn "avx2_palignrv4di" + [(set (match_operand:V4DI 0 "register_operand" "=x") + (unspec:V4DI + [(match_operand:V4DI 1 "register_operand" "x") + (match_operand:V4DI 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_VPALIGNRDI))] + "TARGET_AVX2" + "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sselog") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + (define_insn "avx2_vec_dupv4df" [(set (match_operand:V4DF 0 "register_operand" "=x") (vec_duplicate:V4DF @@ -14658,6 +14672,49 @@ (set_attr "length_immediate" "1") (set_attr "prefix" "orig,vex")]) +(define_insn_and_split "avx2_rotate<mode>_perm" + [(set (match_operand:V_256 0 "register_operand" "=&x") + (vec_select:V_256 + (match_operand:V_256 1 "register_operand" "x") + (match_parallel 2 "palignr_operand" + [(match_operand 3 "const_int_operand" "n")])))] + "TARGET_AVX2" + "#" + "&& reload_completed" + [(const_int 0)] +{ + enum machine_mode imode = GET_MODE_INNER (<MODE>mode); + int shift = INTVAL (operands[3]) * GET_MODE_SIZE (imode); + rtx op0 = gen_rtx_REG (V4DImode, REGNO (operands[0])); + rtx op1 = gen_rtx_REG (V4DImode, REGNO (operands[1])); + + if (shift == 0) + emit_move_insn (operands[0], operands[1]); + else + { + emit_insn (gen_avx2_permv2ti (op0, + op1, + op1, + GEN_INT (33))); + if (shift < 16) + emit_insn (gen_avx2_palignrv4di (op0, + op0, + op1, + GEN_INT (shift))); + else if (shift > 16) + emit_insn (gen_avx2_palignrv4di (op0, + op1, + op0, + GEN_INT (shift - 16))); + } + DONE; +} + [(set_attr "type" "sseishft") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex")]) + + (define_expand "avx_vinsertf128<mode>" [(match_operand:V_256 0 "register_operand") (match_operand:V_256 1 "register_operand") The test case covering this is "gcc.target/i386/pr52252-atom.c". It will pass for "-march=core-avx2" when the patch committed. On Thu, Aug 14, 2014 at 6:55 PM, H.J. Lu <hjl.to...@gmail.com> wrote: > On Thu, Aug 14, 2014 at 1:08 AM, Evgeny Stupachenko <evstu...@gmail.com> > wrote: >> Ping. >> >> On Thu, Jul 10, 2014 at 7:29 PM, Evgeny Stupachenko <evstu...@gmail.com> >> wrote: >>> On Mon, Jul 7, 2014 at 6:40 PM, Richard Henderson <r...@redhat.com> wrote: >>>> On 07/03/2014 02:53 AM, Evgeny Stupachenko wrote: >>>>> -expand_vec_perm_palignr (struct expand_vec_perm_d *d) >>>>> +expand_vec_perm_palignr (struct expand_vec_perm_d *d, int insn_num) >>>> >>>> insn_num might as well be "bool avx2", since it's only ever set to two >>>> values. >>> >>> Agree. However: >>> after the alignment, one operand permutation could be just move and >>> take 2 instructions for AVX2 as well >>> for AVX2 there could be other cases when the scheme takes 4 or 5 >>> instructions >>> we can leave it for potential avx512 extension >>> >>>> >>>>> - /* Even with AVX, palignr only operates on 128-bit vectors. */ >>>>> - if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) >>>>> + /* SSSE3 is required to apply PALIGNR on 16 bytes operands. */ >>>>> + if (GET_MODE_SIZE (d->vmode) == 16) >>>>> + { >>>>> + if (!TARGET_SSSE3) >>>>> + return false; >>>>> + } >>>>> + /* AVX2 is required to apply PALIGNR on 32 bytes operands. */ >>>>> + else if (GET_MODE_SIZE (d->vmode) == 32) >>>>> + { >>>>> + if (!TARGET_AVX2) >>>>> + return false; >>>>> + } >>>>> + /* Other sizes are not supported. */ >>>>> + else >>>>> return false; >>>> >>>> And you'd better check it up here because... >>>> >>> >>> Correct. The following should resolve the issue: >>> /* For AVX2 we need more than 2 instructions when the alignment >>> by itself does not produce the desired permutation. */ >>> if (TARGET_AVX2 && insn_num <= 2) >>> return false; >>> >>>>> + /* For SSSE3 we need 1 instruction for palignr plus 1 for one >>>>> + operand permutaoin. */ >>>>> + if (insn_num == 2) >>>>> + { >>>>> + ok = expand_vec_perm_1 (&dcopy); >>>>> + gcc_assert (ok); >>>>> + } >>>>> + /* For AVX2 we need 2 instructions for the shift: vpalignr and >>>>> + vperm plus 4 instructions for one operand permutation. */ >>>>> + else if (insn_num == 6) >>>>> + { >>>>> + ok = expand_vec_perm_vpshufb2_vpermq (&dcopy); >>>>> + gcc_assert (ok); >>>>> + } >>>>> + else >>>>> + ok = false; >>>>> return ok; >>>> >>>> ... down here you'll simply ICE from the gcc_assert. >>> > > Can you modify your patch to fix > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62128 > > with a testcase? > > > -- > H.J.