On Wed, Jun 15, 2011 at 4:54 AM, Jakub Jelinek <ja...@redhat.com> wrote:
> Hi!
>
> All of these _mm{,256}_permute2_p[sd] and _mm_roti_epi{8,16,32,64}
> intrinsics ICE if the last argument is constant integer, but not in the
> expected range.
>
> I could only find MSFT documentation for these intrinsics, where for
> *permute2* it says that the last argument must be 0, 1, 2 or 3,
> for *roti* it says that the last argument is integer rotation count,
> preferrably constant and that if count is negative, it performs right
> rotation instead of left rotation.
> This patch adjusts the builtins to match that, if we want to instead
> e.g. always mandate _mm_roti_epi* last argument is constant integer,
> or constant integer in the range -N+1 .. N-1 where N is the number
> after _mm_roti_epi, or in the range 0 .. N-1, it can be easily adjusted.
>
> Regtested on x86_64-linux {-m32,-m64}, unfortunately on a SandyBridge
> box, so I couldn't verify if xop-rotate[12]-int.c actually succeeds
> on xop capable HW.
>
> 2011-06-15  Jakub Jelinek  <ja...@redhat.com>
>
>        PR target/49411
>        * config/i386/i386.c (ix86_expand_multi_arg_builtins): If
>        last_arg_constant and last argument doesn't match its predicate,
>        for xop_vpermil2<mode>3 error out and for xop_rotl<mode>3
>        if it is CONST_INT, mask it, otherwise expand using rotl<mode>3.
>
>        * gcc.target/i386/xop-vpermil2px-1.c: New test.
>        * gcc.target/i386/xop-vpermil2px-2.c: New test.
>        * gcc.target/i386/xop-rotate1-int.c: New test.
>        * gcc.target/i386/xop-rotate2-int.c: New test.
>
> --- gcc/config/i386/i386.c.jj   2011-06-09 16:56:56.000000000 +0200
> +++ gcc/config/i386/i386.c      2011-06-15 11:17:12.000000000 +0200
> @@ -26149,16 +26149,66 @@ ix86_expand_multi_arg_builtin (enum insn
>       int adjust = (comparison_p) ? 1 : 0;
>       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
>
> -      if (last_arg_constant && i == nargs-1)
> +      if (last_arg_constant && i == nargs - 1)
>        {
> -         if (!CONST_INT_P (op))
> +         if (!insn_data[icode].operand[i + 1].predicate (op, mode))
>            {
> -             error ("last argument must be an immediate");
> -             return gen_reg_rtx (tmode);
> +             enum insn_code new_icode = icode;
> +             switch (icode)
> +               {
> +               case CODE_FOR_xop_vpermil2v2df3:
> +               case CODE_FOR_xop_vpermil2v4sf3:
> +               case CODE_FOR_xop_vpermil2v4df3:
> +               case CODE_FOR_xop_vpermil2v8sf3:
> +                 if (!CONST_INT_P (op))
> +                   {
> +                     error ("last argument must be an immediate");
> +                     return gen_reg_rtx (tmode);
> +                   }
> +                 error ("last argument must be in the range 0 .. 3");
> +                 return gen_reg_rtx (tmode);
> +               case CODE_FOR_xop_rotlv2di3:
> +                 new_icode = CODE_FOR_rotlv2di3;
> +                 goto xop_rotl;
> +               case CODE_FOR_xop_rotlv4si3:
> +                 new_icode = CODE_FOR_rotlv4si3;
> +                 goto xop_rotl;
> +               case CODE_FOR_xop_rotlv8hi3:
> +                 new_icode = CODE_FOR_rotlv8hi3;
> +                 goto xop_rotl;
> +               case CODE_FOR_xop_rotlv16qi3:
> +                 new_icode = CODE_FOR_rotlv16qi3;
> +               xop_rotl:
> +                 if (CONST_INT_P (op))
> +                   {
> +                     int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 
> 1;
> +                     op = GEN_INT (INTVAL (op) & mask);
> +                     gcc_checking_assert
> +                       (insn_data[icode].operand[i + 1].predicate (op, 
> mode));
> +                   }
> +                 else
> +                   {
> +                     gcc_checking_assert
> +                       (nargs == 2
> +                        && insn_data[new_icode].operand[0].mode == tmode
> +                        && insn_data[new_icode].operand[1].mode == tmode
> +                        && insn_data[new_icode].operand[2].mode == mode
> +                        && insn_data[new_icode].operand[0].predicate
> +                           == insn_data[icode].operand[0].predicate
> +                        && insn_data[new_icode].operand[1].predicate
> +                           == insn_data[icode].operand[1].predicate);
> +                     icode = new_icode;
> +                     goto non_constant;
> +                   }
> +                 break;
> +               default:
> +                 gcc_unreachable ();
> +               }
>            }
>        }
>       else
>        {
> +       non_constant:
>          if (VECTOR_MODE_P (mode))
>            op = safe_vector_operand (op, mode);
>
> --- gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c.jj 2011-06-15 
> 10:18:29.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-1.c    2011-06-15 
> 10:41:13.000000000 +0200
> @@ -0,0 +1,25 @@
> +/* PR target/49411 */
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -mxop" } */
> +
> +#include <x86intrin.h>
> +
> +__m128d a1, a2, a3;
> +__m256d b1, b2, b3;
> +__m128 c1, c2, c3;
> +__m256 d1, d2, d3;
> +__m128i s;
> +__m256i t;
> +
> +void
> +foo (int i)
> +{
> +  a1 = _mm_permute2_pd (a2, a3, s, 3);
> +  b1 = _mm256_permute2_pd (b2, b3, t, 3);
> +  c1 = _mm_permute2_ps (c2, c3, s, 3);
> +  d1 = _mm256_permute2_ps (d2, d3, t, 3);
> +  a1 = _mm_permute2_pd (a2, a3, s, 17);                /* { dg-error "last 
> argument must be in the range 0 .. 3" } */
> +  b1 = _mm256_permute2_pd (b2, b3, t, 17);     /* { dg-error "last argument 
> must be in the range 0 .. 3" } */
> +  c1 = _mm_permute2_ps (c2, c3, s, 17);                /* { dg-error "last 
> argument must be in the range 0 .. 3" } */
> +  d1 = _mm256_permute2_ps (d2, d3, t, 17);     /* { dg-error "last argument 
> must be in the range 0 .. 3" } */
> +}
> --- gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c.jj 2011-06-15 
> 10:39:36.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/xop-vpermil2px-2.c    2011-06-15 
> 10:39:44.000000000 +0200
> @@ -0,0 +1,21 @@
> +/* PR target/49411 */
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -mxop" } */
> +
> +#include <x86intrin.h>
> +
> +__m128d a1, a2, a3;
> +__m256d b1, b2, b3;
> +__m128 c1, c2, c3;
> +__m256 d1, d2, d3;
> +__m128i s;
> +__m256i t;
> +
> +void
> +foo (int i)
> +{
> +  a1 = _mm_permute2_pd (a2, a3, s, i);         /* { dg-error "last argument 
> must be an immediate" } */
> +  b1 = _mm256_permute2_pd (b2, b3, t, i);      /* { dg-error "last argument 
> must be an immediate" } */
> +  c1 = _mm_permute2_ps (c2, c3, s, i);         /* { dg-error "last argument 
> must be an immediate" } */
> +  d1 = _mm256_permute2_ps (d2, d3, t, i);      /* { dg-error "last argument 
> must be an immediate" } */
> +}
> --- gcc/testsuite/gcc.target/i386/xop-rotate1-int.c.jj  2011-06-15 
> 10:47:29.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/xop-rotate1-int.c     2011-06-15 
> 11:25:25.000000000 +0200
> @@ -0,0 +1,63 @@
> +/* PR target/49411 */
> +/* { dg-do run } */
> +/* { dg-require-effective-target xop } */
> +/* { dg-options "-O2 -mxop" } */
> +
> +#include "xop-check.h"
> +
> +#include <x86intrin.h>
> +
> +extern void abort (void);
> +
> +union
> +{
> +  __m128i v;
> +  unsigned char c[16];
> +  unsigned short s[8];
> +  unsigned int i[4];
> +  unsigned long long l[2];
> +} a, b, c, d;
> +
> +#define TEST1(F, N, S, SS) \
> +do {                                                   \
> +  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++) \
> +    a.F[i] = i * 17;                                   \
> +  s = _mm_set1_epi##SS (N);                            \
> +  b.v = _mm_roti_epi##S (a.v, N);                      \
> +  c.v = _mm_rot_epi##S (a.v, s);                       \
> +  for (i = 0; i < sizeof (a.F) / sizeof (a.F[0]); i++) \
> +    {                                                  \
> +      int mask = __CHAR_BIT__ * sizeof (a.F[i]) - 1;   \
> +      d.F[i] = a.F[i] << (N & mask);                   \
> +      if (N & mask)                                    \
> +       d.F[i] |= a.F[i] >> (mask + 1 - (N & mask));    \
> +      if (b.F[i] != c.F[i] || b.F[i] != d.F[i])                \
> +       abort ();                                       \
> +    }                                                  \
> +} while (0)
> +#define TEST(N) \
> +  TEST1 (c, N, 8, 8);                                  \
> +  TEST1 (s, N, 16, 16);                                        \
> +  TEST1 (i, N, 32, 32);                                        \
> +  TEST1 (l, N, 64, 64x)
> +
> +volatile int n;
> +
> +static void
> +xop_test (void)
> +{
> +  unsigned int i;
> +  __m128i s;
> +
> +#ifndef NON_CONST
> +  TEST (5);
> +  TEST (-5);
> +  TEST (0);
> +  TEST (31);
> +#else
> +  n = 5; TEST (n);
> +  n = -5; TEST (n);
> +  n = 0; TEST (n);
> +  n = 31; TEST (n);
> +#endif
> +}
> --- gcc/testsuite/gcc.target/i386/xop-rotate2-int.c.jj  2011-06-15 
> 11:25:42.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/xop-rotate2-int.c     2011-06-15 
> 11:26:03.000000000 +0200
> @@ -0,0 +1,7 @@
> +/* PR target/49411 */
> +/* { dg-do run } */
> +/* { dg-require-effective-target xop } */
> +/* { dg-options "-O2 -mxop" } */
> +
> +#define NON_CONST 1
> +#include "xop-rotate1-int.c"
>
>        Jakub
>

I will test on AMD HW.
-- 
Quentin

Reply via email to