Hi All,

The new partial and full vector types added to AArch64, e.g.

int8x8x2_t with mode V2x8QI are incorrectly being defined as being short
vectors and not being composite types.

This causes the layout code to incorrectly conclude that the registers are
packed. i.e. for V2x8QI it thinks those 16-bytes are in the same registers.

Because of this the code under !aarch64_composite_type_p is unreachable but also
lacked any extra checks to see that nregs is what we expected it to be.

I have also updated aarch64_advsimd_full_struct_mode_p and 
aarch64_advsimd_partial_struct_mode_p to only consider vector types as struct
modes.  Otherwise types such as OImode and friends would qualify leading to
incorrect results.

This patch fixes up the issues and we now generate correct code.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar



gcc/ChangeLog:

        PR target/103094
        * config/aarch64/aarch64.c (aarch64_function_value, aarch64_layout_arg):
        Fix unreachable code for partial vectors and re-order switch to perform
        the simplest test first.
        (aarch64_short_vector_p): Mark as not short vectors.
        (aarch64_composite_type_p): Mark as composite types.
        (aarch64_advsimd_partial_struct_mode_p,
        aarch64_advsimd_full_struct_mode_p): Restrict to actual SIMD types.

gcc/testsuite/ChangeLog:

        PR target/103094
        * gcc.target/aarch64/pr103094.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b8725d05be4fd6468f
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3055,15 +3055,17 @@ aarch64_advsimd_struct_mode_p (machine_mode mode)
 static bool
 aarch64_advsimd_partial_struct_mode_p (machine_mode mode)
 {
-  return (aarch64_classify_vector_mode (mode)
-         == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
+  return VECTOR_MODE_P (mode)
+        && (aarch64_classify_vector_mode (mode)
+               == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
 }
 
 /* Return true if MODE is an Advanced SIMD Q-register structure mode.  */
 static bool
 aarch64_advsimd_full_struct_mode_p (machine_mode mode)
 {
-  return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
+  return VECTOR_MODE_P (mode)
+        && (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
 }
 
 /* Return true if MODE is any of the data vector modes, including
@@ -6468,17 +6470,21 @@ aarch64_function_value (const_tree type, const_tree 
func,
                                               NULL, false))
     {
       gcc_assert (!sve_p);
-      if (!aarch64_composite_type_p (type, mode))
+      if (aarch64_advsimd_full_struct_mode_p (mode))
+       {
+         gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 16), count));
+         return gen_rtx_REG (mode, V0_REGNUM);
+       }
+      else if (aarch64_advsimd_partial_struct_mode_p (mode))
+       {
+         gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 8), count));
+         return gen_rtx_REG (mode, V0_REGNUM);
+       }
+      else if (!aarch64_composite_type_p (type, mode))
        {
          gcc_assert (count == 1 && mode == ag_mode);
          return gen_rtx_REG (mode, V0_REGNUM);
        }
-      else if (aarch64_advsimd_full_struct_mode_p (mode)
-              && known_eq (GET_MODE_SIZE (ag_mode), 16))
-       return gen_rtx_REG (mode, V0_REGNUM);
-      else if (aarch64_advsimd_partial_struct_mode_p (mode)
-              && known_eq (GET_MODE_SIZE (ag_mode), 8))
-       return gen_rtx_REG (mode, V0_REGNUM);
       else
        {
          int i;
@@ -6745,6 +6751,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const 
function_arg_info &arg)
     /* No frontends can create types with variable-sized modes, so we
        shouldn't be asked to pass or return them.  */
     size = GET_MODE_SIZE (mode).to_constant ();
+
   size = ROUND_UP (size, UNITS_PER_WORD);
 
   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
@@ -6769,17 +6776,21 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const 
function_arg_info &arg)
       if (nvrn + nregs <= NUM_FP_ARG_REGS)
        {
          pcum->aapcs_nextnvrn = nvrn + nregs;
-         if (!aarch64_composite_type_p (type, mode))
+         if (aarch64_advsimd_full_struct_mode_p (mode))
+           {
+             gcc_assert (nregs == size / 16);
+             pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
+           }
+         else if (aarch64_advsimd_partial_struct_mode_p (mode))
+           {
+             gcc_assert (nregs == size / 8);
+             pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
+           }
+         else if (!aarch64_composite_type_p (type, mode))
            {
              gcc_assert (nregs == 1);
              pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
            }
-         else if (aarch64_advsimd_full_struct_mode_p (mode)
-                  && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 16))
-           pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
-         else if (aarch64_advsimd_partial_struct_mode_p (mode)
-                  && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 8))
-           pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
          else
            {
              rtx par;
@@ -19285,6 +19296,13 @@ aarch64_short_vector_p (const_tree type,
       else
        size = GET_MODE_SIZE (mode);
     }
+
+  /* If a Advanced SIMD partial or full aggregate vector type we aren't a short
+     type.  */
+  if (aarch64_advsimd_partial_struct_mode_p (mode)
+      || aarch64_advsimd_full_struct_mode_p (mode))
+    return false;
+
   if (known_eq (size, 8) || known_eq (size, 16))
     {
       /* 64-bit and 128-bit vectors should only acquire an SVE mode if
@@ -19316,6 +19334,12 @@ static bool
 aarch64_composite_type_p (const_tree type,
                          machine_mode mode)
 {
+  /* If a Advanced SIMD partial or full aggregate vector type we are a
+     composite type.  */
+  if (aarch64_advsimd_partial_struct_mode_p (mode)
+      || aarch64_advsimd_full_struct_mode_p (mode))
+    return true;
+
   if (aarch64_short_vector_p (type, mode))
     return false;
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr103094.c 
b/gcc/testsuite/gcc.target/aarch64/pr103094.c
new file mode 100644
index 
0000000000000000000000000000000000000000..441e602928ce8ac4e9890a1376acbc25671e284d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr103094.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-rtl-expand -w" } */
+
+#include <arm_neon.h>
+
+void foo (uint8x8x2_t cols_01_23, uint8x8x2_t cols_45_67, uint16_t* outptr0)
+{
+  uint16x4x4_t cols_01_23_45_67 = { {
+    vreinterpret_u16_u8(cols_01_23.val[0]),
+    vreinterpret_u16_u8(cols_01_23.val[1]),
+    vreinterpret_u16_u8(cols_45_67.val[0]),
+    vreinterpret_u16_u8(cols_45_67.val[1])
+  } };
+
+  vst4_lane_u16(outptr0, cols_01_23_45_67, 0);
+}
+
+/* Check that we expand to v0 and v2 from the function arguments.  */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v0 \[ cols_01_23 \]\)} expand 
} } */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v2 \[ cols_45_67 \]\)} expand 
} } */
+


-- 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b8725d05be4fd6468f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3055,15 +3055,17 @@ aarch64_advsimd_struct_mode_p (machine_mode mode)
 static bool
 aarch64_advsimd_partial_struct_mode_p (machine_mode mode)
 {
-  return (aarch64_classify_vector_mode (mode)
-	  == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
+  return VECTOR_MODE_P (mode)
+	 && (aarch64_classify_vector_mode (mode)
+		== (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
 }
 
 /* Return true if MODE is an Advanced SIMD Q-register structure mode.  */
 static bool
 aarch64_advsimd_full_struct_mode_p (machine_mode mode)
 {
-  return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
+  return VECTOR_MODE_P (mode)
+	 && (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
 }
 
 /* Return true if MODE is any of the data vector modes, including
@@ -6468,17 +6470,21 @@ aarch64_function_value (const_tree type, const_tree func,
 					       NULL, false))
     {
       gcc_assert (!sve_p);
-      if (!aarch64_composite_type_p (type, mode))
+      if (aarch64_advsimd_full_struct_mode_p (mode))
+	{
+	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 16), count));
+	  return gen_rtx_REG (mode, V0_REGNUM);
+	}
+      else if (aarch64_advsimd_partial_struct_mode_p (mode))
+	{
+	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 8), count));
+	  return gen_rtx_REG (mode, V0_REGNUM);
+	}
+      else if (!aarch64_composite_type_p (type, mode))
 	{
 	  gcc_assert (count == 1 && mode == ag_mode);
 	  return gen_rtx_REG (mode, V0_REGNUM);
 	}
-      else if (aarch64_advsimd_full_struct_mode_p (mode)
-	       && known_eq (GET_MODE_SIZE (ag_mode), 16))
-	return gen_rtx_REG (mode, V0_REGNUM);
-      else if (aarch64_advsimd_partial_struct_mode_p (mode)
-	       && known_eq (GET_MODE_SIZE (ag_mode), 8))
-	return gen_rtx_REG (mode, V0_REGNUM);
       else
 	{
 	  int i;
@@ -6745,6 +6751,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
     /* No frontends can create types with variable-sized modes, so we
        shouldn't be asked to pass or return them.  */
     size = GET_MODE_SIZE (mode).to_constant ();
+
   size = ROUND_UP (size, UNITS_PER_WORD);
 
   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
@@ -6769,17 +6776,21 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
       if (nvrn + nregs <= NUM_FP_ARG_REGS)
 	{
 	  pcum->aapcs_nextnvrn = nvrn + nregs;
-	  if (!aarch64_composite_type_p (type, mode))
+	  if (aarch64_advsimd_full_struct_mode_p (mode))
+	    {
+	      gcc_assert (nregs == size / 16);
+	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
+	    }
+	  else if (aarch64_advsimd_partial_struct_mode_p (mode))
+	    {
+	      gcc_assert (nregs == size / 8);
+	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
+	    }
+	  else if (!aarch64_composite_type_p (type, mode))
 	    {
 	      gcc_assert (nregs == 1);
 	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
 	    }
-	  else if (aarch64_advsimd_full_struct_mode_p (mode)
-		   && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 16))
-	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
-	  else if (aarch64_advsimd_partial_struct_mode_p (mode)
-		   && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 8))
-	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
 	  else
 	    {
 	      rtx par;
@@ -19285,6 +19296,13 @@ aarch64_short_vector_p (const_tree type,
       else
 	size = GET_MODE_SIZE (mode);
     }
+
+  /* If a Advanced SIMD partial or full aggregate vector type we aren't a short
+     type.  */
+  if (aarch64_advsimd_partial_struct_mode_p (mode)
+      || aarch64_advsimd_full_struct_mode_p (mode))
+    return false;
+
   if (known_eq (size, 8) || known_eq (size, 16))
     {
       /* 64-bit and 128-bit vectors should only acquire an SVE mode if
@@ -19316,6 +19334,12 @@ static bool
 aarch64_composite_type_p (const_tree type,
 			  machine_mode mode)
 {
+  /* If a Advanced SIMD partial or full aggregate vector type we are a
+     composite type.  */
+  if (aarch64_advsimd_partial_struct_mode_p (mode)
+      || aarch64_advsimd_full_struct_mode_p (mode))
+    return true;
+
   if (aarch64_short_vector_p (type, mode))
     return false;
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr103094.c b/gcc/testsuite/gcc.target/aarch64/pr103094.c
new file mode 100644
index 0000000000000000000000000000000000000000..441e602928ce8ac4e9890a1376acbc25671e284d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr103094.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-rtl-expand -w" } */
+
+#include <arm_neon.h>
+
+void foo (uint8x8x2_t cols_01_23, uint8x8x2_t cols_45_67, uint16_t* outptr0)
+{
+  uint16x4x4_t cols_01_23_45_67 = { {
+    vreinterpret_u16_u8(cols_01_23.val[0]),
+    vreinterpret_u16_u8(cols_01_23.val[1]),
+    vreinterpret_u16_u8(cols_45_67.val[0]),
+    vreinterpret_u16_u8(cols_45_67.val[1])
+  } };
+
+  vst4_lane_u16(outptr0, cols_01_23_45_67, 0);
+}
+
+/* Check that we expand to v0 and v2 from the function arguments.  */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v0 \[ cols_01_23 \]\)} expand } } */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v2 \[ cols_45_67 \]\)} expand } } */
+

Reply via email to