Hi, The patch folds: lhs = svld1rq ({-1, -1, -1, ...}, &v[0]) into: lhs = vec_perm_expr<v, v, {0, 1, 2, 3, ... }> and expands above vec_perm_expr using aarch64_expand_sve_dupq.
With patch, for following test: #include <arm_sve.h> #include <arm_neon.h> svint32_t foo (int32x4_t x) { return svld1rq (svptrue_b8 (), &x[0]); } it generates following code: foo: .LFB4350: dup z0.q, z0.q[0] ret and passes bootstrap+test on aarch64-linux-gnu. But I am not sure if the changes to aarch64_evpc_sve_tbl are correct. Thanks, Prathamesh
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 02e42a71e5e..e21bbec360c 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -1207,6 +1207,56 @@ public: insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0)); return e.use_contiguous_load_insn (icode); } + + gimple * + fold (gimple_folder &f) const OVERRIDE + { + tree arg0 = gimple_call_arg (f.call, 0); + tree arg1 = gimple_call_arg (f.call, 1); + + /* Transform: + lhs = svld1rq ({-1, -1, ... }, &v[0]) + into: + lhs = vec_perm_expr<v, v, {0, 1, 2, 3, ...}>. + on little endian target. */ + + if (!BYTES_BIG_ENDIAN + && integer_all_onesp (arg0) + && TREE_CODE (arg1) == ADDR_EXPR) + { + tree t = TREE_OPERAND (arg1, 0); + if (TREE_CODE (t) == ARRAY_REF) + { + tree index = TREE_OPERAND (t, 1); + t = TREE_OPERAND (t, 0); + if (integer_zerop (index) && TREE_CODE (t) == VIEW_CONVERT_EXPR) + { + t = TREE_OPERAND (t, 0); + tree vectype = TREE_TYPE (t); + if (VECTOR_TYPE_P (vectype) + && known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u) + && wi::to_wide (TYPE_SIZE (vectype)) == 128) + { + tree lhs = gimple_call_lhs (f.call); + tree lhs_type = TREE_TYPE (lhs); + int source_nelts = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); + vec_perm_builder sel (TYPE_VECTOR_SUBPARTS (lhs_type), source_nelts, 1); + for (int i = 0; i < source_nelts; i++) + sel.quick_push (i); + + vec_perm_indices indices (sel, 1, source_nelts); + if (!can_vec_perm_const_p (TYPE_MODE (lhs_type), indices)) + return NULL; + + tree mask = vec_perm_indices_to_tree (lhs_type, indices); + return gimple_build_assign (lhs, VEC_PERM_EXPR, t, t, mask); + } + } + } + } + + return NULL; + } }; class svld1ro_impl : public load_replicate diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index f07330cff4f..af27f550be3 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -23002,8 +23002,32 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d) machine_mode sel_mode = related_int_vector_mode (d->vmode).require (); rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm); + if (d->one_vector_p) - emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel)); + { + bool use_dupq = false; + /* Check if sel is dup vector with encoded elements {0, 1, 2, ... nelts} */ + if (GET_CODE (sel) == CONST_VECTOR + && !GET_MODE_NUNITS (GET_MODE (sel)).is_constant () + && CONST_VECTOR_DUPLICATE_P (sel)) + { + unsigned nelts = const_vector_encoded_nelts (sel); + unsigned i; + for (i = 0; i < nelts; i++) + { + rtx elem = CONST_VECTOR_ENCODED_ELT(sel, i); + if (!(CONST_INT_P (elem) && INTVAL(elem) == i)) + break; + } + if (i == nelts) + use_dupq = true; + } + + if (use_dupq) + aarch64_expand_sve_dupq (d->target, GET_MODE (d->target), d->op0); + else + emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel)); + } else aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel); return true;