Hi,

> 
> ...can we use expand_vec_perm_const here?  It will try the constant
> expansion first, which is the preferred order.  It also has a few variations 
> up
> its sleeve.
> 

We can, however it this function seems to be incorrectly assuming it can always
Convert the input mode to a QI vector mode.  When I started using it we got a 
number
of miscompilations in the AArch64 codegen.  This had the knock-on effect of 
uncovering
bugs in both the AArch64 backend and i386.  I'll send patched out for those 
separately.

For now here's the new patch using that hook and updating the permute expansion 
code:

Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * expmed.cc (extract_bit_field_1): Add support for vector element
        extracts.
        * optabs.cc (expand_vec_perm_const): Add checks before converting
        permute to QImode fallback.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/ext_1.c: New.

--- inline copy of patch ---

diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index 
bab020c07222afa38305ef8d7333f271b1965b78..7d38045ae525c8a4665a0c1384fc515e4de88c67
 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -1718,6 +1718,21 @@ extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, 
poly_uint64 bitnum,
              return target;
            }
        }
+      else if (!known_eq (bitnum, 0U)
+              && multiple_p (GET_MODE_UNIT_BITSIZE (tmode), bitnum, &pos))
+       {
+         /* The encoding has a single stepped pattern.  */
+         poly_uint64 nunits = GET_MODE_NUNITS (new_mode);
+         vec_perm_builder sel (nunits, 1, 3);
+         sel.quick_push (pos);
+         sel.quick_push (pos + 1);
+         sel.quick_push (pos + 2);
+
+         rtx res
+           = expand_vec_perm_const (new_mode, op0, op0, sel, new_mode, NULL);
+         if (res)
+           return simplify_gen_subreg (tmode, res, new_mode, 0);
+       }
     }
 
   /* See if we can get a better vector mode before extracting.  */
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 
cff37ccb0dfc3dd79b97d0abfd872f340855dc96..f338df410265dfe55b6896160090a453cc6a28d9
 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -6267,6 +6267,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
       v0_qi = gen_lowpart (qimode, v0);
       v1_qi = gen_lowpart (qimode, v1);
       if (targetm.vectorize.vec_perm_const != NULL
+         && targetm.can_change_mode_class (mode, qimode, ALL_REGS)
          && targetm.vectorize.vec_perm_const (qimode, qimode, target_qi, v0_qi,
                                               v1_qi, qimode_indices))
        return gen_lowpart (mode, target_qi);
@@ -6311,7 +6312,8 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
     }
 
   if (qimode != VOIDmode
-      && selector_fits_mode_p (qimode, qimode_indices))
+      && selector_fits_mode_p (qimode, qimode_indices)
+      && targetm.can_change_mode_class (mode, qimode, ALL_REGS))
     {
       icode = direct_optab_handler (vec_perm_optab, qimode);
       if (icode != CODE_FOR_nothing)
diff --git a/gcc/testsuite/gcc.target/aarch64/ext_1.c 
b/gcc/testsuite/gcc.target/aarch64/ext_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..18a10a14f1161584267a8472e571b3bc2ddf887a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ext_1.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <string.h>
+
+typedef unsigned int v4si __attribute__((vector_size (16)));
+typedef unsigned int v2si __attribute__((vector_size (8)));
+
+/*
+** extract: { xfail *-*-* }
+**     ext     v0.16b, v0.16b, v0.16b, #4
+**     ret
+*/
+v2si extract (v4si x)
+{
+    v2si res = {x[1], x[2]};
+    return res;
+}
+
+/*
+** extract1: { xfail *-*-* }
+**     ext     v0.16b, v0.16b, v0.16b, #4
+**     ret
+*/
+v2si extract1 (v4si x)
+{
+    v2si res;
+    memcpy (&res, ((int*)&x)+1, sizeof(res));
+    return res;
+}
+
+typedef struct cast {
+  int a;
+  v2si b __attribute__((packed));
+} cast_t;
+
+typedef union Data {
+   v4si x;
+   cast_t y;
+} data;  
+
+/*
+** extract2:
+**     ext     v0.16b, v0.16b, v0.16b, #4
+**     ret
+*/
+v2si extract2 (v4si x)
+{
+    data d;
+    d.x = x;
+    return d.y.b;
+}
+

Attachment: rb16242.patch
Description: rb16242.patch

Reply via email to