Hi,
The patch folds:
lhs = svld1rq ({-1, -1, -1, ...}, &v[0])
into:
lhs = vec_perm_expr<v, v, {0, 1, 2, 3, ... }>
and expands above vec_perm_expr using aarch64_expand_sve_dupq.

With patch, for following test:
#include <arm_sve.h>
#include <arm_neon.h>

svint32_t
foo (int32x4_t x)
{
  return svld1rq (svptrue_b8 (), &x[0]);
}

it generates following code:
foo:
.LFB4350:
        dup     z0.q, z0.q[0]
        ret

and passes bootstrap+test on aarch64-linux-gnu.
But I am not sure if the changes to aarch64_evpc_sve_tbl
are correct.

Thanks,
Prathamesh
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 02e42a71e5e..e21bbec360c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1207,6 +1207,56 @@ public:
     insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
     return e.use_contiguous_load_insn (icode);
   }
+
+  gimple *
+  fold (gimple_folder &f) const OVERRIDE
+  {
+    tree arg0 = gimple_call_arg (f.call, 0);
+    tree arg1 = gimple_call_arg (f.call, 1);
+
+    /* Transform:
+       lhs = svld1rq ({-1, -1, ... }, &v[0])
+       into:
+       lhs = vec_perm_expr<v, v, {0, 1, 2, 3, ...}>.
+       on little endian target.  */
+
+    if (!BYTES_BIG_ENDIAN
+       && integer_all_onesp (arg0)
+       && TREE_CODE (arg1) == ADDR_EXPR)
+      {
+       tree t = TREE_OPERAND (arg1, 0);
+       if (TREE_CODE (t) == ARRAY_REF)
+         {
+           tree index = TREE_OPERAND (t, 1);
+           t = TREE_OPERAND (t, 0);
+           if (integer_zerop (index) && TREE_CODE (t) == VIEW_CONVERT_EXPR)
+             {
+               t = TREE_OPERAND (t, 0);
+               tree vectype = TREE_TYPE (t);
+               if (VECTOR_TYPE_P (vectype)
+                   && known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
+                   && wi::to_wide (TYPE_SIZE (vectype)) == 128)
+                 {
+                   tree lhs = gimple_call_lhs (f.call);
+                   tree lhs_type = TREE_TYPE (lhs);
+                   int source_nelts = TYPE_VECTOR_SUBPARTS 
(vectype).to_constant ();
+                   vec_perm_builder sel (TYPE_VECTOR_SUBPARTS (lhs_type), 
source_nelts, 1);
+                   for (int i = 0; i < source_nelts; i++)
+                     sel.quick_push (i);
+
+                   vec_perm_indices indices (sel, 1, source_nelts);
+                   if (!can_vec_perm_const_p (TYPE_MODE (lhs_type), indices))
+                     return NULL;
+
+                   tree mask = vec_perm_indices_to_tree (lhs_type, indices);
+                   return gimple_build_assign (lhs, VEC_PERM_EXPR, t, t, mask);
+                 }
+             }
+         }
+      }
+
+    return NULL;
+  }
 };
 
 class svld1ro_impl : public load_replicate
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f07330cff4f..af27f550be3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23002,8 +23002,32 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
 
   machine_mode sel_mode = related_int_vector_mode (d->vmode).require ();
   rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
+
   if (d->one_vector_p)
-    emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
+    {
+      bool use_dupq = false;
+      /* Check if sel is dup vector with encoded elements {0, 1, 2, ... nelts} 
 */
+      if (GET_CODE (sel) == CONST_VECTOR
+         && !GET_MODE_NUNITS (GET_MODE (sel)).is_constant ()
+         && CONST_VECTOR_DUPLICATE_P (sel))
+         {
+           unsigned nelts = const_vector_encoded_nelts (sel);
+           unsigned i;
+           for (i = 0; i < nelts; i++)
+             {
+               rtx elem = CONST_VECTOR_ENCODED_ELT(sel, i);
+               if (!(CONST_INT_P (elem) && INTVAL(elem) == i))
+                 break;
+             }
+           if (i == nelts)
+             use_dupq = true;
+         }
+
+      if (use_dupq)
+       aarch64_expand_sve_dupq (d->target, GET_MODE (d->target), d->op0);
+      else
+       emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
+    }
   else
     aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
   return true;

Reply via email to