Hi! No idea how I've missed this. With -mavx V8SImode and V4DImode are valid, but the choice of shuffle insns for them is limited.
This patch will just pay the reinterpretation penalty and reshuffle them as corresponding V4DFmode resp. V8SFmode instead of ICE. Additionally, I've added two new (unrelated) interesting permutations to the tests. Bootstrapped/regtested on x86_64-linux and i686-linux and additionally regtested with GCC_TEST_RUN_EXPENSIVE=1 make check-gcc RUNTESTFLAGS='--target_board=unix\{-m32/-msse2,-m32/-msse4,-m32/-mavx,-m64/-msse2,-m64/-msse4,-m64/-mavx\} dg-torture.exp=vshuf*' and compiling/linking all tests at -O2 -DEXPENSIVE with -mavx2 and testing in sde. The only failures are for -m64 -Os some of the identity permutation, which isn't using VEC_PERM_EXPR at all and so is an unrelated, possibly generic, bug. Ok for trunk? 2011-10-21 Jakub Jelinek <ja...@redhat.com> PR target/50813 * config/i386/i386.c (expand_vec_perm_even_odd_1): Handle V4DImode and V8SImode for !TARGET_AVX2. * gcc.dg/torture/vshuf-32.inc: Add broadcast permutation from element other than first and reverse permutation. * gcc.dg/torture/vshuf-16.inc: Likewise. * gcc.dg/torture/vshuf-8.inc: Likewise. * gcc.dg/torture/vshuf-4.inc: Likewise. --- gcc/config/i386/i386.c.jj 2011-10-21 09:39:22.000000000 +0200 +++ gcc/config/i386/i386.c 2011-10-21 10:03:43.000000000 +0200 @@ -36023,6 +36023,16 @@ expand_vec_perm_even_odd_1 (struct expan return expand_vec_perm_vpshufb2_vpermq_even_odd (d); case V4DImode: + if (!TARGET_AVX2) + { + struct expand_vec_perm_d d_copy = *d; + d_copy.vmode = V4DFmode; + d_copy.target = gen_lowpart (V4DFmode, d->target); + d_copy.op0 = gen_lowpart (V4DFmode, d->op0); + d_copy.op1 = gen_lowpart (V4DFmode, d->op1); + return expand_vec_perm_even_odd_1 (&d_copy, odd); + } + t1 = gen_reg_rtx (V4DImode); t2 = gen_reg_rtx (V4DImode); @@ -36039,6 +36049,16 @@ expand_vec_perm_even_odd_1 (struct expan break; case V8SImode: + if (!TARGET_AVX2) + { + struct expand_vec_perm_d d_copy = *d; + d_copy.vmode = V8SFmode; + d_copy.target = gen_lowpart (V8SFmode, d->target); + d_copy.op0 = gen_lowpart (V8SFmode, d->op0); + d_copy.op1 = gen_lowpart (V8SFmode, d->op1); + return expand_vec_perm_even_odd_1 (&d_copy, odd); + } + t1 = gen_reg_rtx (V8SImode); t2 = gen_reg_rtx (V8SImode); --- gcc/testsuite/gcc.dg/torture/vshuf-8.inc.jj 2011-10-20 14:13:38.000000000 +0200 +++ gcc/testsuite/gcc.dg/torture/vshuf-8.inc 2011-10-21 09:58:47.000000000 +0200 @@ -17,7 +17,9 @@ T (13, 14, 8, 12, 3, 13, 9, 5, 4) \ T (14, 15, 3, 13, 6, 14, 12, 10, 0) \ T (15, 0, 5, 11, 7, 4, 6, 14, 1) \ T (16, 0, 2, 4, 6, 8, 10, 12, 14) \ -T (17, 1, 3, 5, 7, 9, 11, 13, 15) +T (17, 1, 3, 5, 7, 9, 11, 13, 15) \ +T (18, 3, 3, 3, 3, 3, 3, 3, 3) \ +T (19, 7, 6, 5, 4, 3, 2, 1, 0) #define EXPTESTS \ T (116, 9, 3, 9, 4, 7, 0, 0, 6) \ T (117, 4, 14, 12, 8, 9, 6, 0, 10) \ --- gcc/testsuite/gcc.dg/torture/vshuf-4.inc.jj 2011-10-20 14:13:38.000000000 +0200 +++ gcc/testsuite/gcc.dg/torture/vshuf-4.inc 2011-10-21 09:59:14.000000000 +0200 @@ -17,7 +17,9 @@ T (13, 2, 3, 0, 4) \ T (14, 7, 6, 4, 2) \ T (15, 6, 1, 3, 4) \ T (16, 0, 2, 4, 6) \ -T (17, 1, 3, 5, 7) +T (17, 1, 3, 5, 7) \ +T (18, 3, 3, 3, 3) \ +T (19, 3, 2, 1, 0) #define EXPTESTS \ T (116, 1, 2, 4, 3) \ T (117, 7, 3, 3, 0) \ --- gcc/testsuite/gcc.dg/torture/vshuf-32.inc.jj 2011-10-20 14:13:38.000000000 +0200 +++ gcc/testsuite/gcc.dg/torture/vshuf-32.inc 2011-10-21 09:57:21.000000000 +0200 @@ -17,7 +17,9 @@ T (13, 7, 51, 13, 61, 25, 4, 19, 58, 35, T (14, 22, 53, 28, 42, 45, 38, 49, 13, 54, 61, 21, 52, 7, 16, 34, 9, 1, 43, 62, 43, 35, 50, 47, 58, 20, 3, 30, 15, 37, 53, 43, 36) \ T (15, 2, 43, 49, 34, 28, 35, 29, 36, 51, 9, 17, 48, 10, 37, 45, 21, 52, 19, 25, 33, 60, 31, 30, 42, 12, 26, 27, 46, 5, 40, 14, 36) \ T (16, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62) \ -T (17, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63) +T (17, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63) \ +T (18, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3) \ +T (19, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) #define EXPTESTS \ T (116, 13, 38, 47, 3, 17, 8, 38, 20, 59, 61, 39, 26, 7, 49, 63, 43, 57, 16, 40, 19, 4, 32, 27, 7, 52, 19, 46, 55, 36, 41, 48, 6) \ T (117, 39, 35, 59, 20, 56, 18, 58, 63, 57, 14, 2, 16, 5, 61, 35, 4, 53, 9, 52, 51, 27, 33, 61, 12, 3, 35, 36, 40, 37, 7, 45, 42) \ --- gcc/testsuite/gcc.dg/torture/vshuf-16.inc.jj 2011-10-20 14:13:38.000000000 +0200 +++ gcc/testsuite/gcc.dg/torture/vshuf-16.inc 2011-10-21 09:57:59.000000000 +0200 @@ -17,7 +17,9 @@ T (13, 23, 11, 15, 9, 0, 14, 8, 12, 10, T (14, 25, 5, 17, 1, 9, 15, 21, 7, 28, 2, 18, 13, 30, 14, 10, 4) \ T (15, 1, 30, 27, 31, 9, 18, 25, 12, 7, 4, 2, 16, 25, 20, 10, 3) \ T (16, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30) \ -T (17, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31) +T (17, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31) \ +T (18, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3) \ +T (19, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) #define EXPTESTS \ T (116, 28, 13, 27, 11, 21, 1, 5, 22, 29, 14, 15, 6, 3, 10, 16, 30) \ T (117, 22, 26, 1, 13, 29, 3, 18, 18, 11, 21, 12, 28, 19, 5, 7, 4) \ Jakub