On Wed, Jun 15, 2011 at 4:54 AM, Jakub Jelinek <ja...@redhat.com> wrote: > Hi! > > All of these _mm{,256}_permute2_p[sd] and _mm_roti_epi{8,16,32,64} > intrinsics ICE if the last argument is constant integer, but not in the > expected range. > > I could only find MSFT documentation for these intrinsics, where for > *permute2* it says that the last argument must be 0, 1, 2 or 3, > for *roti* it says that the last argument is integer rotation count, > preferrably constant and that if count is negative, it performs right > rotation instead of left rotation. > This patch adjusts the builtins to match that, if we want to instead > e.g. always mandate _mm_roti_epi* last argument is constant integer, > or constant integer in the range -N+1 .. N-1 where N is the number > after _mm_roti_epi, or in the range 0 .. N-1, it can be easily adjusted. > > Regtested on x86_64-linux {-m32,-m64}, unfortunately on a SandyBridge > box, so I couldn't verify if xop-rotate[12]-int.c actually succeeds > on xop capable HW. > > 2011-06-15 Jakub Jelinek <ja...@redhat.com> > > PR target/49411 > * config/i386/i386.c (ix86_expand_multi_arg_builtins): If > last_arg_constant and last argument doesn't match its predicate, > for xop_vpermil2<mode>3 error out and for xop_rotl<mode>3 > if it is CONST_INT, mask it, otherwise expand using rotl<mode>3. > > * gcc.target/i386/xop-vpermil2px-1.c: New test. > * gcc.target/i386/xop-vpermil2px-2.c: New test. > * gcc.target/i386/xop-rotate1-int.c: New test. > * gcc.target/i386/xop-rotate2-int.c: New test. > > --- gcc/config/i386/i386.c.jj 2011-06-09 16:56:56.000000000 +0200 > +++ gcc/config/i386/i386.c 2011-06-15 11:17:12.000000000 +0200 > @@ -26149,16 +26149,66 @@ ix86_expand_multi_arg_builtin (enum insn > int adjust = (comparison_p) ? 1 : 0; > enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; > > - if (last_arg_constant && i == nargs-1) > + if (last_arg_constant && i == nargs - 1) > { > - if (!CONST_INT_P (op)) > + if (!insn_data[icode].operand[i + 1].predicate (op, mode)) > { > - error ("last argument must be an immediate"); > - return gen_reg_rtx (tmode); > + enum insn_code new_icode = icode; > + switch (icode) > + { > + case CODE_FOR_xop_vpermil2v2df3: > + case CODE_FOR_xop_vpermil2v4sf3: > + case CODE_FOR_xop_vpermil2v4df3: > + case CODE_FOR_xop_vpermil2v8sf3: > + if (!CONST_INT_P (op)) > + { > + error ("last argument must be an immediate"); > + return gen_reg_rtx (tmode); > + } > + error ("last argument must be in the range 0 .. 3"); > + return gen_reg_rtx (tmode); > + case CODE_FOR_xop_rotlv2di3: > + new_icode = CODE_FOR_rotlv2di3; > + goto xop_rotl; > + case CODE_FOR_xop_rotlv4si3: > + new_icode = CODE_FOR_rotlv4si3; > + goto xop_rotl; > + case CODE_FOR_xop_rotlv8hi3: > + new_icode = CODE_FOR_rotlv8hi3; > + goto xop_rotl; > + case CODE_FOR_xop_rotlv16qi3: > + new_icode = CODE_FOR_rotlv16qi3; > + xop_rotl: > + if (CONST_INT_P (op)) > + { > + int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - > 1; > + op = GEN_INT (INTVAL (op) & mask); > + gcc_checking_assert > + (insn_data[icode].operand[i + 1].predicate (op, > mode)); > + } > + else > + { > + gcc_checking_assert > + (nargs == 2 > + && insn_data[new_icode].operand[0].mode == tmode > + && insn_data[new_icode].operand[1].mode == tmode > + && insn_data[new_icode].operand[2].mode == mode > + && insn_data[new_icode].operand[0].predicate > + == insn_data[icode].operand[0].predicate > + && insn_data[new_icode].operand[1].predicate > + == insn_data[icode].operand[1].predicate); > + icode = new_icode; > + goto non_constant; > + } > + break; > + default: > + gcc_unreachable (); > + } > } > } > else > { > + non_constant: > if (VECTOR_MODE_P (mode)) > op = safe_vector_operand (op, mode); > > --- gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c.jj 2011-06-15 > 10:18:29.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c 2011-06-15 > 10:41:13.000000000 +0200 > @@ -0,0 +1,25 @@ > +/* PR target/49411 */ > +/* { dg-do compile } */ > +/* { dg-options "-O0 -mxop" } */ > + > +#include <x86intrin.h> > + > +__m128d a1, a2, a3; > +__m256d b1, b2, b3; > +__m128 c1, c2, c3; > +__m256 d1, d2, d3; > +__m128i s; > +__m256i t; > + > +void > +foo (int i) > +{ > + a1 = _mm_permute2_pd (a2, a3, s, 3); > + b1 = _mm256_permute2_pd (b2, b3, t, 3); > + c1 = _mm_permute2_ps (c2, c3, s, 3); > + d1 = _mm256_permute2_ps (d2, d3, t, 3); > + a1 = _mm_permute2_pd (a2, a3, s, 17); /* { dg-error "last > argument must be in the range 0 .. 3" } */ > + b1 = _mm256_permute2_pd (b2, b3, t, 17); /* { dg-error "last argument > must be in the range 0 .. 3" } */ > + c1 = _mm_permute2_ps (c2, c3, s, 17); /* { dg-error "last > argument must be in the range 0 .. 3" } */ > + d1 = _mm256_permute2_ps (d2, d3, t, 17); /* { dg-error "last argument > must be in the range 0 .. 3" } */ > +} > --- gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c.jj 2011-06-15 > 10:39:36.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c 2011-06-15 > 10:39:44.000000000 +0200 > @@ -0,0 +1,21 @@ > +/* PR target/49411 */ > +/* { dg-do compile } */ > +/* { dg-options "-O0 -mxop" } */ > + > +#include <x86intrin.h> > + > +__m128d a1, a2, a3; > +__m256d b1, b2, b3; > +__m128 c1, c2, c3; > +__m256 d1, d2, d3; > +__m128i s; > +__m256i t; > + > +void > +foo (int i) > +{ > + a1 = _mm_permute2_pd (a2, a3, s, i); /* { dg-error "last argument > must be an immediate" } */ > + b1 = _mm256_permute2_pd (b2, b3, t, i); /* { dg-error "last argument > must be an immediate" } */ > + c1 = _mm_permute2_ps (c2, c3, s, i); /* { dg-error "last argument > must be an immediate" } */ > + d1 = _mm256_permute2_ps (d2, d3, t, i); /* { dg-error "last argument > must be an immediate" } */ > +} > --- gcc/testsuite/gcc.target/i386/xop-rotate1-int.c.jj 2011-06-15 > 10:47:29.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/xop-rotate1-int.c 2011-06-15 > 11:25:25.000000000 +0200 > @@ -0,0 +1,63 @@ > +/* PR target/49411 */ > +/* { dg-do run } */ > +/* { dg-require-effective-target xop } */ > +/* { dg-options "-O2 -mxop" } */ > + > +#include "xop-check.h" > + > +#include <x86intrin.h> > + > +extern void abort (void); > + > +union > +{ > + __m128i v; > + unsigned char c[16]; > + unsigned short s[8]; > + unsigned int i[4]; > + unsigned long long l[2]; > +} a, b, c, d; > + > +#define TEST1(F, N, S, SS) \ > +do { \ > + for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++) \ > + a.F[i] = i * 17; \ > + s = _mm_set1_epi##SS (N); \ > + b.v = _mm_roti_epi##S (a.v, N); \ > + c.v = _mm_rot_epi##S (a.v, s); \ > + for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++) \ > + { \ > + int mask = __CHAR_BIT__ * sizeof (a.F[i]) - 1; \ > + d.F[i] = a.F[i] << (N & mask); \ > + if (N & mask) \ > + d.F[i] |= a.F[i] >> (mask + 1 - (N & mask)); \ > + if (b.F[i] != c.F[i] || b.F[i] != d.F[i]) \ > + abort (); \ > + } \ > +} while (0) > +#define TEST(N) \ > + TEST1 (c, N, 8, 8); \ > + TEST1 (s, N, 16, 16); \ > + TEST1 (i, N, 32, 32); \ > + TEST1 (l, N, 64, 64x) > + > +volatile int n; > + > +static void > +xop_test (void) > +{ > + unsigned int i; > + __m128i s; > + > +#ifndef NON_CONST > + TEST (5); > + TEST (-5); > + TEST (0); > + TEST (31); > +#else > + n = 5; TEST (n); > + n = -5; TEST (n); > + n = 0; TEST (n); > + n = 31; TEST (n); > +#endif > +} > --- gcc/testsuite/gcc.target/i386/xop-rotate2-int.c.jj 2011-06-15 > 11:25:42.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/xop-rotate2-int.c 2011-06-15 > 11:26:03.000000000 +0200 > @@ -0,0 +1,7 @@ > +/* PR target/49411 */ > +/* { dg-do run } */ > +/* { dg-require-effective-target xop } */ > +/* { dg-options "-O2 -mxop" } */ > + > +#define NON_CONST 1 > +#include "xop-rotate1-int.c" > > Jakub >
I will test on AMD HW. -- Quentin