Hi!

I had not noticed that this patch makes gcc.target/arm/pr112337.c fail because __builtin_mve_vldrwq_sv4si is no longer available.

Adding this fixes the problem:
diff --git a/gcc/testsuite/gcc.target/arm/pr112337.c b/gcc/testsuite/gcc.target/arm/pr112337.c
index 10b7881b9f9..599229c1db0 100644
--- a/gcc/testsuite/gcc.target/arm/pr112337.c
+++ b/gcc/testsuite/gcc.target/arm/pr112337.c
@@ -4,7 +4,9 @@
 /* { dg-add-options arm_v8_1m_mve } */

 #pragma GCC arm "arm_mve_types.h"
-int32x4_t h(void *p) { return __builtin_mve_vldrwq_sv4si(p); }
+#pragma GCC arm "arm_mve.h" false
+
+int32x4_t h(void *p) { return vldrwq_s32(p); }
 void g(int32x4_t);
 void f(int, int, int, short, int *p) {
   int *bias = p;


I hope that's simple enough not to need a v2 of the patch series if everything else is OK?

Thanks,

Christophe


On 9/16/24 11:38, Christophe Lyon wrote:
From: Alfie Richards <alfie.richa...@arm.com>

Implement the mve vld and vst intrinsics using the MVE builtins framework.

The main part of the patch is to reimplement to vstr/vldr patterns
such that we now have much fewer of them:
- non-truncating stores
- predicated non-truncating stores
- truncating stores
- predicated truncating stores
- non-extending loads
- predicated non-extending loads
- extending loads
- predicated extending loads

This enables us to update the implementation of vld1/vst1 and use the
new vldr/vstr builtins.

The patch also adds support for the predicated vld1/vst1 versions.

2024-09-11  Alfie Richards  <alfie.richa...@arm.com>
            Christophe Lyon  <christophe.l...@arm.com>

        gcc/

        * config/arm/arm-mve-builtins-base.cc (vld1q_impl): Add support
        for predicated version.
        (vst1q_impl): Likewise.
        (vstrq_impl): New class.
        (vldrq_impl): New class.
        (vldrbq): New.
        (vldrhq): New.
        (vldrwq): New.
        (vstrbq): New.
        (vstrhq): New.
        (vstrwq): New.
        * config/arm/arm-mve-builtins-base.def (vld1q): Add predicated
        version.
        (vldrbq): New.
        (vldrhq): New.
        (vldrwq): New.
        (vst1q): Add predicated version.
        (vstrbq): New.
        (vstrhq): New.
        (vstrwq): New.
        (vrev32q): Update types to float_16.
        * config/arm/arm-mve-builtins-base.h (vldrbq): New.
        (vldrhq): New.
        (vldrwq): New.
        (vstrbq): New.
        (vstrhq): New.
        (vstrwq): New.
        * config/arm/arm-mve-builtins-functions.h (memory_vector_mode):
        Remove conversion of floating point vectors to integer.
        * config/arm/arm-mve-builtins.cc (TYPES_float16): Change to...
        (TYPES_float_16): ...this.
        (TYPES_float_32): New.
        (float16): Change to...
        (float_16): ...this.
        (float_32): New.
        (preds_z_or_none): New.
        (function_resolver::check_gp_argument): Add support for _z
        predicate.
        * config/arm/arm_mve.h (vstrbq): Remove.
        (vstrbq_p): Likewise.
        (vstrhq): Likewise.
        (vstrhq_p): Likewise.
        (vstrwq): Likewise.
        (vstrwq_p): Likewise.
        (vst1q_p): Likewise.
        (vld1q_z): Likewise.
        (vldrbq_s8): Likewise.
        (vldrbq_u8): Likewise.
        (vldrbq_s16): Likewise.
        (vldrbq_u16): Likewise.
        (vldrbq_s32): Likewise.
        (vldrbq_u32): Likewise.
        (vstrbq_p_s8): Likewise.
        (vstrbq_p_s32): Likewise.
        (vstrbq_p_s16): Likewise.
        (vstrbq_p_u8): Likewise.
        (vstrbq_p_u32): Likewise.
        (vstrbq_p_u16): Likewise.
        (vldrbq_z_s16): Likewise.
        (vldrbq_z_u8): Likewise.
        (vldrbq_z_s8): Likewise.
        (vldrbq_z_s32): Likewise.
        (vldrbq_z_u16): Likewise.
        (vldrbq_z_u32): Likewise.
        (vldrhq_s32): Likewise.
        (vldrhq_s16): Likewise.
        (vldrhq_u32): Likewise.
        (vldrhq_u16): Likewise.
        (vldrhq_z_s32): Likewise.
        (vldrhq_z_s16): Likewise.
        (vldrhq_z_u32): Likewise.
        (vldrhq_z_u16): Likewise.
        (vldrwq_s32): Likewise.
        (vldrwq_u32): Likewise.
        (vldrwq_z_s32): Likewise.
        (vldrwq_z_u32): Likewise.
        (vldrhq_f16): Likewise.
        (vldrhq_z_f16): Likewise.
        (vldrwq_f32): Likewise.
        (vldrwq_z_f32): Likewise.
        (vstrhq_f16): Likewise.
        (vstrhq_s32): Likewise.
        (vstrhq_s16): Likewise.
        (vstrhq_u32): Likewise.
        (vstrhq_u16): Likewise.
        (vstrhq_p_f16): Likewise.
        (vstrhq_p_s32): Likewise.
        (vstrhq_p_s16): Likewise.
        (vstrhq_p_u32): Likewise.
        (vstrhq_p_u16): Likewise.
        (vstrwq_f32): Likewise.
        (vstrwq_s32): Likewise.
        (vstrwq_u32): Likewise.
        (vstrwq_p_f32): Likewise.
        (vstrwq_p_s32): Likewise.
        (vstrwq_p_u32): Likewise.
        (vst1q_p_u8): Likewise.
        (vst1q_p_s8): Likewise.
        (vld1q_z_u8): Likewise.
        (vld1q_z_s8): Likewise.
        (vst1q_p_u16): Likewise.
        (vst1q_p_s16): Likewise.
        (vld1q_z_u16): Likewise.
        (vld1q_z_s16): Likewise.
        (vst1q_p_u32): Likewise.
        (vst1q_p_s32): Likewise.
        (vld1q_z_u32): Likewise.
        (vld1q_z_s32): Likewise.
        (vld1q_z_f16): Likewise.
        (vst1q_p_f16): Likewise.
        (vld1q_z_f32): Likewise.
        (vst1q_p_f32): Likewise.
        (__arm_vstrbq_s8): Likewise.
        (__arm_vstrbq_s32): Likewise.
        (__arm_vstrbq_s16): Likewise.
        (__arm_vstrbq_u8): Likewise.
        (__arm_vstrbq_u32): Likewise.
        (__arm_vstrbq_u16): Likewise.
        (__arm_vldrbq_s8): Likewise.
        (__arm_vldrbq_u8): Likewise.
        (__arm_vldrbq_s16): Likewise.
        (__arm_vldrbq_u16): Likewise.
        (__arm_vldrbq_s32): Likewise.
        (__arm_vldrbq_u32): Likewise.
        (__arm_vstrbq_p_s8): Likewise.
        (__arm_vstrbq_p_s32): Likewise.
        (__arm_vstrbq_p_s16): Likewise.
        (__arm_vstrbq_p_u8): Likewise.
        (__arm_vstrbq_p_u32): Likewise.
        (__arm_vstrbq_p_u16): Likewise.
        (__arm_vldrbq_z_s8): Likewise.
        (__arm_vldrbq_z_s32): Likewise.
        (__arm_vldrbq_z_s16): Likewise.
        (__arm_vldrbq_z_u8): Likewise.
        (__arm_vldrbq_z_u32): Likewise.
        (__arm_vldrbq_z_u16): Likewise.
        (__arm_vldrhq_s32): Likewise.
        (__arm_vldrhq_s16): Likewise.
        (__arm_vldrhq_u32): Likewise.
        (__arm_vldrhq_u16): Likewise.
        (__arm_vldrhq_z_s32): Likewise.
        (__arm_vldrhq_z_s16): Likewise.
        (__arm_vldrhq_z_u32): Likewise.
        (__arm_vldrhq_z_u16): Likewise.
        (__arm_vldrwq_s32): Likewise.
        (__arm_vldrwq_u32): Likewise.
        (__arm_vldrwq_z_s32): Likewise.
        (__arm_vldrwq_z_u32): Likewise.
        (__arm_vstrhq_s32): Likewise.
        (__arm_vstrhq_s16): Likewise.
        (__arm_vstrhq_u32): Likewise.
        (__arm_vstrhq_u16): Likewise.
        (__arm_vstrhq_p_s32): Likewise.
        (__arm_vstrhq_p_s16): Likewise.
        (__arm_vstrhq_p_u32): Likewise.
        (__arm_vstrhq_p_u16): Likewise.
        (__arm_vstrwq_s32): Likewise.
        (__arm_vstrwq_u32): Likewise.
        (__arm_vstrwq_p_s32): Likewise.
        (__arm_vstrwq_p_u32): Likewise.
        (__arm_vst1q_p_u8): Likewise.
        (__arm_vst1q_p_s8): Likewise.
        (__arm_vld1q_z_u8): Likewise.
        (__arm_vld1q_z_s8): Likewise.
        (__arm_vst1q_p_u16): Likewise.
        (__arm_vst1q_p_s16): Likewise.
        (__arm_vld1q_z_u16): Likewise.
        (__arm_vld1q_z_s16): Likewise.
        (__arm_vst1q_p_u32): Likewise.
        (__arm_vst1q_p_s32): Likewise.
        (__arm_vld1q_z_u32): Likewise.
        (__arm_vld1q_z_s32): Likewise.
        (__arm_vldrwq_f32): Likewise.
        (__arm_vldrwq_z_f32): Likewise.
        (__arm_vldrhq_z_f16): Likewise.
        (__arm_vldrhq_f16): Likewise.
        (__arm_vstrwq_p_f32): Likewise.
        (__arm_vstrwq_f32): Likewise.
        (__arm_vstrhq_f16): Likewise.
        (__arm_vstrhq_p_f16): Likewise.
        (__arm_vld1q_z_f16): Likewise.
        (__arm_vst1q_p_f16): Likewise.
        (__arm_vld1q_z_f32): Likewise.
        (__arm_vst2q_f32): Likewise.
        (__arm_vst1q_p_f32): Likewise.
        (__arm_vstrbq): Likewise.
        (__arm_vstrbq_p): Likewise.
        (__arm_vstrhq): Likewise.
        (__arm_vstrhq_p): Likewise.
        (__arm_vstrwq): Likewise.
        (__arm_vstrwq_p): Likewise.
        (__arm_vst1q_p): Likewise.
        (__arm_vld1q_z): Likewise.
        * config/arm/arm_mve_builtins.def:
        (vstrbq_s): Delete.
        (vstrbq_u): Likewise.
        (vldrbq_s): Likewise.
        (vldrbq_u): Likewise.
        (vstrbq_p_s): Likewise.
        (vstrbq_p_u): Likewise.
        (vldrbq_z_s): Likewise.
        (vldrbq_z_u): Likewise.
        (vld1q_u): Likewise.
        (vld1q_s): Likewise.
        (vldrhq_z_u): Likewise.
        (vldrhq_u): Likewise.
        (vldrhq_z_s): Likewise.
        (vldrhq_s): Likewise.
        (vld1q_f): Likewise.
        (vldrhq_f): Likewise.
        (vldrhq_z_f): Likewise.
        (vldrwq_f): Likewise.
        (vldrwq_s): Likewise.
        (vldrwq_u): Likewise.
        (vldrwq_z_f): Likewise.
        (vldrwq_z_s): Likewise.
        (vldrwq_z_u): Likewise.
        (vst1q_u): Likewise.
        (vst1q_s): Likewise.
        (vstrhq_p_u): Likewise.
        (vstrhq_u): Likewise.
        (vstrhq_p_s): Likewise.
        (vstrhq_s): Likewise.
        (vst1q_f): Likewise.
        (vstrhq_f): Likewise.
        (vstrhq_p_f): Likewise.
        (vstrwq_f): Likewise.
        (vstrwq_s): Likewise.
        (vstrwq_u): Likewise.
        (vstrwq_p_f): Likewise.
        (vstrwq_p_s): Likewise.
        (vstrwq_p_u): Likewise.
        * config/arm/iterators.md (MVE_w_narrow_TYPE): New iterator.
        (MVE_w_narrow_type): New iterator.
        (MVE_wide_n_TYPE): New attribute.
        (MVE_wide_n_type): New attribute.
        (MVE_wide_n_sz_elem): New attribute.
        (MVE_wide_n_VPRED): New attribute.
        (MVE_elem_ch): New attribute.
        (supf): Remove VSTRBQ_S, VSTRBQ_U, VLDRBQ_S, VLDRBQ_U, VLD1Q_S,
        VLD1Q_U, VLDRHQ_S, VLDRHQ_U, VLDRWQ_S, VLDRWQ_U, VST1Q_S, VST1Q_U,
        VSTRHQ_S, VSTRHQ_U, VSTRWQ_S, VSTRWQ_U.
        (VSTRBQ, VLDRBQ, VLD1Q, VLDRHQ, VLDRWQ, VST1Q, VSTRHQ, VSTRWQ):
        Delete.
        * config/arm/mve.md (mve_vstrbq_<supf><mode>): Remove.
        (mve_vldrbq_<supf><mode>): Likewise.
        (mve_vstrbq_p_<supf><mode>): Likewise.
        (mve_vldrbq_z_<supf><mode>): Likewise.
        (mve_vldrhq_fv8hf): Likewise.
        (mve_vldrhq_<supf><mode>): Likewise.
        (mve_vldrhq_z_fv8hf): Likewise.
        (mve_vldrhq_z_<supf><mode>): Likewise.
        (mve_vldrwq_fv4sf): Likewise.
        (mve_vldrwq_<supf>v4si): Likewise.
        (mve_vldrwq_z_fv4sf): Likewise.
        (mve_vldrwq_z_<supf>v4si): Likewise.
        (@mve_vld1q_f<mode>): Likewise.
        (@mve_vld1q_<supf><mode>): Likewise.
        (mve_vstrhq_fv8hf): Likewise.
        (mve_vstrhq_p_fv8hf): Likewise.
        (mve_vstrhq_p_<supf><mode>): Likewise.
        (mve_vstrhq_<supf><mode>): Likewise.
        (mve_vstrwq_fv4sf): Likewise.
        (mve_vstrwq_p_fv4sf): Likewise.
        (mve_vstrwq_p_<supf>v4si): Likewise.
        (mve_vstrwq_<supf>v4si): Likewise.
        (@mve_vst1q_f<mode>): Likewise.
        (@mve_vst1q_<supf><mode>): Likewise.
        (@mve_vstrq_<mode>): New.
        (@mve_vstrq_p_<mode>): New.
        (@mve_vstrq_truncate_<mode>): New.
        (@mve_vstrq_p_truncate_<mode>): New.
        (@mve_vldrq_<mode>): New.
        (@mve_vldrq_z_<mode>): New.
        (@mve_vldrq_extend_<mode><US>): New.
        (@mve_vldrq_z_extend_<mode><US>): New.
        * config/arm/unspecs.md:
        (VSTRBQ_S): Remove.
        (VSTRBQ_U): Likewise.
        (VLDRBQ_S): Likewise.
        (VLDRBQ_U): Likewise.
        (VLD1Q_F): Likewise.
        (VLD1Q_S): Likewise.
        (VLD1Q_U): Likewise.
        (VLDRHQ_F): Likewise.
        (VLDRHQ_U): Likewise.
        (VLDRHQ_S): Likewise.
        (VLDRWQ_F): Likewise.
        (VLDRWQ_S): Likewise.
        (VLDRWQ_U): Likewise.
        (VSTRHQ_F): Likewise.
        (VST1Q_S): Likewise.
        (VST1Q_U): Likewise.
        (VSTRHQ_U): Likewise.
        (VSTRWQ_S): Likewise.
        (VSTRWQ_U): Likewise.
        (VSTRWQ_F): Likewise.
        (VST1Q_F): Likewise.
        (VLDRQ): New.
        (VLDRQ_Z): Likewise.
        (VLDRQ_EXT): Likewise.
        (VLDRQ_EXT_Z): Likewise.
        (VSTRQ): Likewise.
        (VSTRQ_P): Likewise.
        (VSTRQ_TRUNC): Likewise.
        (VSTRQ_TRUNC_P): Likewise.
---
  gcc/config/arm/arm-mve-builtins-base.cc     | 135 ++-
  gcc/config/arm/arm-mve-builtins-base.def    |  20 +-
  gcc/config/arm/arm-mve-builtins-base.h      |   6 +
  gcc/config/arm/arm-mve-builtins-functions.h |  13 -
  gcc/config/arm/arm-mve-builtins.cc          |  15 +-
  gcc/config/arm/arm_mve.h                    | 978 +-------------------
  gcc/config/arm/arm_mve_builtins.def         |  38 -
  gcc/config/arm/iterators.md                 |  37 +-
  gcc/config/arm/mve.md                       | 662 ++++---------
  gcc/config/arm/unspecs.md                   |  29 +-
  10 files changed, 379 insertions(+), 1554 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc 
b/gcc/config/arm/arm-mve-builtins-base.cc
index e0ae593a6c0..9ca1bc4258a 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -96,16 +96,18 @@ public:
    expand (function_expander &e) const override
    {
      insn_code icode;
-    if (e.type_suffix (0).float_p)
-      icode = code_for_mve_vld1q_f(e.vector_mode (0));
-    else
+    switch (e.pred)
        {
-       if (e.type_suffix (0).unsigned_p)
-         icode = code_for_mve_vld1q(VLD1Q_U,
-                                    e.vector_mode (0));
-       else
-         icode = code_for_mve_vld1q(VLD1Q_S,
-                                    e.vector_mode (0));
+      case PRED_none:
+       icode = code_for_mve_vldrq (e.vector_mode (0));
+       break;
+
+      case PRED_z:
+       icode = code_for_mve_vldrq_z (e.vector_mode (0));
+       break;
+
+      default:
+       gcc_unreachable ();
        }
      return e.use_contiguous_load_insn (icode);
    }
@@ -124,21 +126,112 @@ public:
    expand (function_expander &e) const override
    {
      insn_code icode;
-    if (e.type_suffix (0).float_p)
-      icode = code_for_mve_vst1q_f(e.vector_mode (0));
-    else
+    switch (e.pred)
+      {
+      case PRED_none:
+       icode = code_for_mve_vstrq (e.vector_mode (0));
+       break;
+
+      case PRED_p:
+       icode = code_for_mve_vstrq_p (e.vector_mode (0));
+       break;
+
+      default:
+       gcc_unreachable ();
+      }
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+/* Builds the vstrq* intrinsics.  */
+class vstrq_impl : public store_truncating
+{
+public:
+  using store_truncating::store_truncating;
+
+  unsigned int call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    insn_code icode;
+    switch (e.pred)
        {
-       if (e.type_suffix (0).unsigned_p)
-         icode = code_for_mve_vst1q(VST1Q_U,
-                                    e.vector_mode (0));
+      case PRED_none:
+       if (e.vector_mode (0) == e.memory_vector_mode ())
+         /* Non-truncating store case.  */
+         icode = code_for_mve_vstrq (e.vector_mode (0));
        else
-         icode = code_for_mve_vst1q(VST1Q_S,
-                                    e.vector_mode (0));
+         /* Truncating store case.
+            (there is only one possible truncation for each memory mode so only
+            one mode argument is needed).  */
+         icode = code_for_mve_vstrq_truncate (e.memory_vector_mode ());
+       break;
+
+      case PRED_p:
+       if (e.vector_mode (0) == e.memory_vector_mode ())
+         icode = code_for_mve_vstrq_p (e.vector_mode (0));
+       else
+         icode = code_for_mve_vstrq_p_truncate (e.memory_vector_mode ());
+       break;
+
+      default:
+       gcc_unreachable ();
        }
      return e.use_contiguous_store_insn (icode);
    }
  };
+/* Builds the vldrq* intrinsics. */
+class vldrq_impl : public load_extending
+{
+public:
+  using load_extending::load_extending;
+
+  unsigned int call_properties (const function_instance &) const override
+  {
+    return CP_READ_MEMORY;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    insn_code icode;
+    switch (e.pred)
+      {
+      case PRED_none:
+       if (e.vector_mode (0) == e.memory_vector_mode ())
+         /* Non-extending load case.  */
+         icode = code_for_mve_vldrq (e.vector_mode (0));
+       else
+         /* Extending load case.
+            (there is only one extension for each memory mode so only one type
+            argument is needed).  */
+         icode = code_for_mve_vldrq_extend (e.memory_vector_mode (),
+                                            e.type_suffix (0).unsigned_p
+                                            ? ZERO_EXTEND
+                                            : SIGN_EXTEND);
+       break;
+
+      case PRED_z:
+       if (e.vector_mode (0) == e.memory_vector_mode ())
+         icode = code_for_mve_vldrq_z (e.vector_mode (0));
+       else
+         icode = code_for_mve_vldrq_z_extend (e.memory_vector_mode (),
+                                              e.type_suffix (0).unsigned_p
+                                              ? ZERO_EXTEND
+                                              : SIGN_EXTEND);
+       break;
+
+      default:
+       gcc_unreachable ();
+      }
+
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
  } /* end anonymous namespace */
namespace arm_mve {
@@ -347,6 +440,11 @@ FUNCTION (vfmsq, unspec_mve_function_exact_insn, (-1, -1, 
VFMSQ_F, -1, -1, -1, -
  FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ)
  FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ)
  FUNCTION (vld1q, vld1_impl,)
+FUNCTION (vldrbq, vldrq_impl, (TYPE_SUFFIX_s8, TYPE_SUFFIX_u8))
+FUNCTION (vldrhq, vldrq_impl,
+         (TYPE_SUFFIX_s16, TYPE_SUFFIX_u16, TYPE_SUFFIX_f16))
+FUNCTION (vldrwq, vldrq_impl,
+         (TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32))
  FUNCTION_PRED_P_S (vmaxavq, VMAXAVQ)
  FUNCTION_WITHOUT_N_NO_U_F (vmaxaq, VMAXAQ)
  FUNCTION_ONLY_F (vmaxnmaq, VMAXNMAQ)
@@ -463,6 +561,9 @@ FUNCTION_ONLY_N_NO_F (vshrq, VSHRQ)
  FUNCTION_ONLY_N_NO_F (vsliq, VSLIQ)
  FUNCTION_ONLY_N_NO_F (vsriq, VSRIQ)
  FUNCTION (vst1q, vst1_impl,)
+FUNCTION (vstrbq, vstrq_impl, (QImode, opt_scalar_mode ()))
+FUNCTION (vstrhq, vstrq_impl, (HImode, HFmode))
+FUNCTION (vstrwq, vstrq_impl, (SImode, SFmode))
  FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
  FUNCTION (vuninitializedq, vuninitializedq_impl,)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 90d031eebec..513827f0e40 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -47,7 +47,10 @@ DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, 
mx_or_none)
  DEF_MVE_FUNCTION (vhcaddq_rot90, binary, all_signed, mx_or_none)
  DEF_MVE_FUNCTION (vhcaddq_rot270, binary, all_signed, mx_or_none)
  DEF_MVE_FUNCTION (vhsubq, binary_opt_n, all_integer, mx_or_none)
-DEF_MVE_FUNCTION (vld1q, load, all_integer, none)
+DEF_MVE_FUNCTION (vld1q, load, all_integer, z_or_none)
+DEF_MVE_FUNCTION (vldrbq, load_ext, all_integer, z_or_none)
+DEF_MVE_FUNCTION (vldrhq, load_ext, integer_16_32, z_or_none)
+DEF_MVE_FUNCTION (vldrwq, load_ext, integer_32, z_or_none)
  DEF_MVE_FUNCTION (vmaxaq, binary_maxamina, all_signed, m_or_none)
  DEF_MVE_FUNCTION (vmaxavq, binary_maxavminav, all_signed, p_or_none)
  DEF_MVE_FUNCTION (vmaxq, binary, all_integer, mx_or_none)
@@ -151,7 +154,10 @@ DEF_MVE_FUNCTION (vshrntq, binary_rshift_narrow, 
integer_16_32, m_or_none)
  DEF_MVE_FUNCTION (vshrq, binary_rshift, all_integer, mx_or_none)
  DEF_MVE_FUNCTION (vsliq, ternary_lshift, all_integer, m_or_none)
  DEF_MVE_FUNCTION (vsriq, ternary_rshift, all_integer, m_or_none)
-DEF_MVE_FUNCTION (vst1q, store, all_integer, none)
+DEF_MVE_FUNCTION (vst1q, store, all_integer, p_or_none)
+DEF_MVE_FUNCTION (vstrbq, store, all_integer, p_or_none)
+DEF_MVE_FUNCTION (vstrhq, store, integer_16_32, p_or_none)
+DEF_MVE_FUNCTION (vstrwq, store, integer_32, p_or_none)
  DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
  DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
  #undef REQUIRES_FLOAT
@@ -184,7 +190,9 @@ DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none)
  DEF_MVE_FUNCTION (vfmaq, ternary_opt_n, all_float, m_or_none)
  DEF_MVE_FUNCTION (vfmasq, ternary_n, all_float, m_or_none)
  DEF_MVE_FUNCTION (vfmsq, ternary, all_float, m_or_none)
-DEF_MVE_FUNCTION (vld1q, load, all_float, none)
+DEF_MVE_FUNCTION (vld1q, load, all_float, z_or_none)
+DEF_MVE_FUNCTION (vldrhq, load_ext, float_16, z_or_none)
+DEF_MVE_FUNCTION (vldrwq, load_ext, float_32, z_or_none)
  DEF_MVE_FUNCTION (vmaxnmaq, binary, all_float, m_or_none)
  DEF_MVE_FUNCTION (vmaxnmavq, binary_maxvminv, all_float, p_or_none)
  DEF_MVE_FUNCTION (vmaxnmq, binary, all_float, mx_or_none)
@@ -198,7 +206,7 @@ DEF_MVE_FUNCTION (vnegq, unary, all_float, mx_or_none)
  DEF_MVE_FUNCTION (vorrq, binary_orrq, all_float, mx_or_none)
  DEF_MVE_FUNCTION (vpselq, vpsel, all_float, none)
  DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_float, none)
-DEF_MVE_FUNCTION (vrev32q, unary, float16, mx_or_none)
+DEF_MVE_FUNCTION (vrev32q, unary, float_16, mx_or_none)
  DEF_MVE_FUNCTION (vrev64q, unary, all_float, mx_or_none)
  DEF_MVE_FUNCTION (vrndaq, unary, all_float, mx_or_none)
  DEF_MVE_FUNCTION (vrndmq, unary, all_float, mx_or_none)
@@ -206,7 +214,9 @@ DEF_MVE_FUNCTION (vrndnq, unary, all_float, mx_or_none)
  DEF_MVE_FUNCTION (vrndpq, unary, all_float, mx_or_none)
  DEF_MVE_FUNCTION (vrndq, unary, all_float, mx_or_none)
  DEF_MVE_FUNCTION (vrndxq, unary, all_float, mx_or_none)
-DEF_MVE_FUNCTION (vst1q, store, all_float, none)
+DEF_MVE_FUNCTION (vst1q, store, all_float, p_or_none)
+DEF_MVE_FUNCTION (vstrhq, store, float_16, p_or_none)
+DEF_MVE_FUNCTION (vstrwq, store, float_32, p_or_none)
  DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_float, mx_or_none)
  DEF_MVE_FUNCTION (vuninitializedq, inherent, all_float, none)
  #undef REQUIRES_FLOAT
diff --git a/gcc/config/arm/arm-mve-builtins-base.h 
b/gcc/config/arm/arm-mve-builtins-base.h
index c9b52a81c5e..1e267ce0238 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -64,6 +64,9 @@ extern const function_base *const vhcaddq_rot270;
  extern const function_base *const vhcaddq_rot90;
  extern const function_base *const vhsubq;
  extern const function_base *const vld1q;
+extern const function_base *const vldrbq;
+extern const function_base *const vldrhq;
+extern const function_base *const vldrwq;
  extern const function_base *const vmaxaq;
  extern const function_base *const vmaxavq;
  extern const function_base *const vmaxnmaq;
@@ -180,6 +183,9 @@ extern const function_base *const vshrq;
  extern const function_base *const vsliq;
  extern const function_base *const vsriq;
  extern const function_base *const vst1q;
+extern const function_base *const vstrbq;
+extern const function_base *const vstrhq;
+extern const function_base *const vstrwq;
  extern const function_base *const vsubq;
  extern const function_base *const vuninitializedq;
diff --git a/gcc/config/arm/arm-mve-builtins-functions.h b/gcc/config/arm/arm-mve-builtins-functions.h
index e47bc69936e..c78b370e958 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -1005,19 +1005,6 @@ public:
    memory_vector_mode (const function_instance &fi) const override
    {
      machine_mode mode = fi.vector_mode (0);
-    /* Vectors of floating-point are managed in memory as vectors of
-       integers.  */
-    switch (mode)
-      {
-      case E_V4SFmode:
-       mode = E_V4SImode;
-       break;
-      case E_V8HFmode:
-       mode = E_V8HImode;
-       break;
-      default:
-       break;
-      }
if (m_vectors_per_tuple != 1)
        mode = targetm.array_mode (mode, m_vectors_per_tuple).require ();
diff --git a/gcc/config/arm/arm-mve-builtins.cc 
b/gcc/config/arm/arm-mve-builtins.cc
index f519fded000..109e391d768 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -149,8 +149,10 @@ CONSTEXPR const type_suffix_info 
type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
     class ("b", "f", etc.) and a numerical bit count.  */
/* _f16. */
-#define TYPES_float16(S, D) \
-  S (f16)
+#define TYPES_float_16(S, D) S (f16)
+
+/* _f32.  */
+#define TYPES_float_32(S, D) S (f32)
/* _f16 _f32. */
  #define TYPES_all_float(S, D) \
@@ -273,7 +275,8 @@ static const type_suffix_pair types_none[] = {
DEF_MVE_TYPES_ARRAY (all_integer);
  DEF_MVE_TYPES_ARRAY (all_integer_with_64);
-DEF_MVE_TYPES_ARRAY (float16);
+DEF_MVE_TYPES_ARRAY (float_16);
+DEF_MVE_TYPES_ARRAY (float_32);
  DEF_MVE_TYPES_ARRAY (all_float);
  DEF_MVE_TYPES_ARRAY (all_signed);
  DEF_MVE_TYPES_ARRAY (all_unsigned);
@@ -308,6 +311,11 @@ static const predication_index preds_p_or_none[] = {
    PRED_p, PRED_none, NUM_PREDS
  };
+/* Used by functions that have the z predicated form, in addition to
+   an unpredicated form.  */
+static const predication_index preds_z_or_none[]
+  = {PRED_z, PRED_none, NUM_PREDS};
+
  /* A list of all MVE ACLE functions.  */
  static CONSTEXPR const function_group_info function_groups[] = {
  #define DEF_MVE_FUNCTION(NAME, SHAPE, TYPES, PREDS)                   \
@@ -1601,6 +1609,7 @@ function_resolver::check_gp_argument (unsigned int nops,
case PRED_p:
        case PRED_x:
+       case PRED_z:
          /* Add final predicate.  */
          nargs = nops + 1;
          break;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index ae1b5438797..659d8802e4a 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -49,10 +49,8 @@
  #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, 
__b, __p)
  #define vornq_m(__inactive, __a, __b, __p) __arm_vornq_m(__inactive, __a, 
__b, __p)
  #define vstrbq_scatter_offset(__base, __offset, __value) 
__arm_vstrbq_scatter_offset(__base, __offset, __value)
-#define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value)
  #define vstrwq_scatter_base(__addr, __offset, __value) 
__arm_vstrwq_scatter_base(__addr, __offset, __value)
  #define vldrbq_gather_offset(__base, __offset) 
__arm_vldrbq_gather_offset(__base, __offset)
-#define vstrbq_p(__addr, __value, __p) __arm_vstrbq_p(__addr, __value, __p)
  #define vstrbq_scatter_offset_p(__base, __offset, __value, __p) 
__arm_vstrbq_scatter_offset_p(__base, __offset, __value, __p)
  #define vstrwq_scatter_base_p(__addr, __offset, __value, __p) 
__arm_vstrwq_scatter_base_p(__addr, __offset, __value, __p)
  #define vldrbq_gather_offset_z(__base, __offset, __p) 
__arm_vldrbq_gather_offset_z(__base, __offset, __p)
@@ -72,10 +70,6 @@
  #define vstrhq_scatter_offset_p(__base, __offset, __value, __p) 
__arm_vstrhq_scatter_offset_p(__base, __offset, __value, __p)
  #define vstrhq_scatter_shifted_offset(__base, __offset, __value) 
__arm_vstrhq_scatter_shifted_offset(__base, __offset, __value)
  #define vstrhq_scatter_shifted_offset_p(__base, __offset, __value, __p) 
__arm_vstrhq_scatter_shifted_offset_p(__base, __offset, __value, __p)
-#define vstrhq(__addr, __value) __arm_vstrhq(__addr, __value)
-#define vstrhq_p(__addr, __value, __p) __arm_vstrhq_p(__addr, __value, __p)
-#define vstrwq(__addr, __value) __arm_vstrwq(__addr, __value)
-#define vstrwq_p(__addr, __value, __p) __arm_vstrwq_p(__addr, __value, __p)
  #define vstrdq_scatter_base_p(__addr, __offset, __value, __p) 
__arm_vstrdq_scatter_base_p(__addr, __offset, __value, __p)
  #define vstrdq_scatter_base(__addr, __offset, __value) 
__arm_vstrdq_scatter_base(__addr, __offset, __value)
  #define vstrdq_scatter_offset_p(__base, __offset, __value, __p) 
__arm_vstrdq_scatter_offset_p(__base, __offset, __value, __p)
@@ -129,9 +123,7 @@
  #define vsbciq_m(__inactive, __a, __b, __carry_out, __p) 
__arm_vsbciq_m(__inactive, __a, __b, __carry_out, __p)
  #define vsbcq(__a, __b, __carry) __arm_vsbcq(__a, __b, __carry)
  #define vsbcq_m(__inactive, __a, __b, __carry, __p) __arm_vsbcq_m(__inactive, 
__a, __b, __carry, __p)
-#define vst1q_p(__addr, __value, __p) __arm_vst1q_p(__addr, __value, __p)
  #define vst2q(__addr, __value) __arm_vst2q(__addr, __value)
-#define vld1q_z(__base, __p) __arm_vld1q_z(__base, __p)
  #define vld2q(__addr) __arm_vld2q(__addr)
  #define vld4q(__addr) __arm_vld4q(__addr)
  #define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx)
@@ -304,24 +296,12 @@
  #define vstrwq_scatter_base_u32(__addr,  __offset, __value) 
__arm_vstrwq_scatter_base_u32(__addr,  __offset, __value)
  #define vldrbq_gather_offset_u8(__base, __offset) 
__arm_vldrbq_gather_offset_u8(__base, __offset)
  #define vldrbq_gather_offset_s8(__base, __offset) 
__arm_vldrbq_gather_offset_s8(__base, __offset)
-#define vldrbq_s8(__base) __arm_vldrbq_s8(__base)
-#define vldrbq_u8(__base) __arm_vldrbq_u8(__base)
  #define vldrbq_gather_offset_u16(__base, __offset) 
__arm_vldrbq_gather_offset_u16(__base, __offset)
  #define vldrbq_gather_offset_s16(__base, __offset) 
__arm_vldrbq_gather_offset_s16(__base, __offset)
-#define vldrbq_s16(__base) __arm_vldrbq_s16(__base)
-#define vldrbq_u16(__base) __arm_vldrbq_u16(__base)
  #define vldrbq_gather_offset_u32(__base, __offset) 
__arm_vldrbq_gather_offset_u32(__base, __offset)
  #define vldrbq_gather_offset_s32(__base, __offset) 
__arm_vldrbq_gather_offset_s32(__base, __offset)
-#define vldrbq_s32(__base) __arm_vldrbq_s32(__base)
-#define vldrbq_u32(__base) __arm_vldrbq_u32(__base)
  #define vldrwq_gather_base_s32(__addr,  __offset) 
__arm_vldrwq_gather_base_s32(__addr,  __offset)
  #define vldrwq_gather_base_u32(__addr,  __offset) 
__arm_vldrwq_gather_base_u32(__addr,  __offset)
-#define vstrbq_p_s8( __addr, __value, __p) __arm_vstrbq_p_s8( __addr, __value, 
__p)
-#define vstrbq_p_s32( __addr, __value, __p) __arm_vstrbq_p_s32( __addr, 
__value, __p)
-#define vstrbq_p_s16( __addr, __value, __p) __arm_vstrbq_p_s16( __addr, 
__value, __p)
-#define vstrbq_p_u8( __addr, __value, __p) __arm_vstrbq_p_u8( __addr, __value, 
__p)
-#define vstrbq_p_u32( __addr, __value, __p) __arm_vstrbq_p_u32( __addr, 
__value, __p)
-#define vstrbq_p_u16( __addr, __value, __p) __arm_vstrbq_p_u16( __addr, 
__value, __p)
  #define vstrbq_scatter_offset_p_s8( __base, __offset, __value, __p) 
__arm_vstrbq_scatter_offset_p_s8( __base, __offset, __value, __p)
  #define vstrbq_scatter_offset_p_s32( __base, __offset, __value, __p) 
__arm_vstrbq_scatter_offset_p_s32( __base, __offset, __value, __p)
  #define vstrbq_scatter_offset_p_s16( __base, __offset, __value, __p) 
__arm_vstrbq_scatter_offset_p_s16( __base, __offset, __value, __p)
@@ -336,12 +316,6 @@
  #define vldrbq_gather_offset_z_u16(__base, __offset, __p) 
__arm_vldrbq_gather_offset_z_u16(__base, __offset, __p)
  #define vldrbq_gather_offset_z_u32(__base, __offset, __p) 
__arm_vldrbq_gather_offset_z_u32(__base, __offset, __p)
  #define vldrbq_gather_offset_z_s8(__base, __offset, __p) 
__arm_vldrbq_gather_offset_z_s8(__base, __offset, __p)
-#define vldrbq_z_s16(__base, __p) __arm_vldrbq_z_s16(__base, __p)
-#define vldrbq_z_u8(__base, __p) __arm_vldrbq_z_u8(__base, __p)
-#define vldrbq_z_s8(__base, __p) __arm_vldrbq_z_s8(__base, __p)
-#define vldrbq_z_s32(__base, __p) __arm_vldrbq_z_s32(__base, __p)
-#define vldrbq_z_u16(__base, __p) __arm_vldrbq_z_u16(__base, __p)
-#define vldrbq_z_u32(__base, __p) __arm_vldrbq_z_u32(__base, __p)
  #define vldrwq_gather_base_z_u32(__addr,  __offset, __p) 
__arm_vldrwq_gather_base_z_u32(__addr,  __offset, __p)
  #define vldrwq_gather_base_z_s32(__addr,  __offset, __p) 
__arm_vldrwq_gather_base_z_s32(__addr,  __offset, __p)
  #define vldrhq_gather_offset_s32(__base, __offset) 
__arm_vldrhq_gather_offset_s32(__base, __offset)
@@ -360,22 +334,6 @@
  #define vldrhq_gather_shifted_offset_z_s16(__base, __offset, __p) 
__arm_vldrhq_gather_shifted_offset_z_s16(__base, __offset, __p)
  #define vldrhq_gather_shifted_offset_z_u32(__base, __offset, __p) 
__arm_vldrhq_gather_shifted_offset_z_u32(__base, __offset, __p)
  #define vldrhq_gather_shifted_offset_z_u16(__base, __offset, __p) 
__arm_vldrhq_gather_shifted_offset_z_u16(__base, __offset, __p)
-#define vldrhq_s32(__base) __arm_vldrhq_s32(__base)
-#define vldrhq_s16(__base) __arm_vldrhq_s16(__base)
-#define vldrhq_u32(__base) __arm_vldrhq_u32(__base)
-#define vldrhq_u16(__base) __arm_vldrhq_u16(__base)
-#define vldrhq_z_s32(__base, __p) __arm_vldrhq_z_s32(__base, __p)
-#define vldrhq_z_s16(__base, __p) __arm_vldrhq_z_s16(__base, __p)
-#define vldrhq_z_u32(__base, __p) __arm_vldrhq_z_u32(__base, __p)
-#define vldrhq_z_u16(__base, __p) __arm_vldrhq_z_u16(__base, __p)
-#define vldrwq_s32(__base) __arm_vldrwq_s32(__base)
-#define vldrwq_u32(__base) __arm_vldrwq_u32(__base)
-#define vldrwq_z_s32(__base, __p) __arm_vldrwq_z_s32(__base, __p)
-#define vldrwq_z_u32(__base, __p) __arm_vldrwq_z_u32(__base, __p)
-#define vldrhq_f16(__base) __arm_vldrhq_f16(__base)
-#define vldrhq_z_f16(__base, __p) __arm_vldrhq_z_f16(__base, __p)
-#define vldrwq_f32(__base) __arm_vldrwq_f32(__base)
-#define vldrwq_z_f32(__base, __p) __arm_vldrwq_z_f32(__base, __p)
  #define vldrdq_gather_base_s64(__addr,  __offset) 
__arm_vldrdq_gather_base_s64(__addr,  __offset)
  #define vldrdq_gather_base_u64(__addr,  __offset) 
__arm_vldrdq_gather_base_u64(__addr,  __offset)
  #define vldrdq_gather_base_z_s64(__addr,  __offset, __p) 
__arm_vldrdq_gather_base_z_s64(__addr,  __offset, __p)
@@ -406,7 +364,6 @@
  #define vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p) 
__arm_vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p)
  #define vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p) 
__arm_vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p)
  #define vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p) 
__arm_vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p)
-#define vstrhq_f16(__addr, __value) __arm_vstrhq_f16(__addr, __value)
  #define vstrhq_scatter_offset_s32( __base, __offset, __value) 
__arm_vstrhq_scatter_offset_s32( __base, __offset, __value)
  #define vstrhq_scatter_offset_s16( __base, __offset, __value) 
__arm_vstrhq_scatter_offset_s16( __base, __offset, __value)
  #define vstrhq_scatter_offset_u32( __base, __offset, __value) 
__arm_vstrhq_scatter_offset_u32( __base, __offset, __value)
@@ -423,21 +380,6 @@
  #define vstrhq_scatter_shifted_offset_p_s16( __base, __offset, __value, __p) 
__arm_vstrhq_scatter_shifted_offset_p_s16( __base, __offset, __value, __p)
  #define vstrhq_scatter_shifted_offset_p_u32( __base, __offset, __value, __p) 
__arm_vstrhq_scatter_shifted_offset_p_u32( __base, __offset, __value, __p)
  #define vstrhq_scatter_shifted_offset_p_u16( __base, __offset, __value, __p) 
__arm_vstrhq_scatter_shifted_offset_p_u16( __base, __offset, __value, __p)
-#define vstrhq_s32(__addr, __value) __arm_vstrhq_s32(__addr, __value)
-#define vstrhq_s16(__addr, __value) __arm_vstrhq_s16(__addr, __value)
-#define vstrhq_u32(__addr, __value) __arm_vstrhq_u32(__addr, __value)
-#define vstrhq_u16(__addr, __value) __arm_vstrhq_u16(__addr, __value)
-#define vstrhq_p_f16(__addr, __value, __p) __arm_vstrhq_p_f16(__addr, __value, 
__p)
-#define vstrhq_p_s32(__addr, __value, __p) __arm_vstrhq_p_s32(__addr, __value, 
__p)
-#define vstrhq_p_s16(__addr, __value, __p) __arm_vstrhq_p_s16(__addr, __value, 
__p)
-#define vstrhq_p_u32(__addr, __value, __p) __arm_vstrhq_p_u32(__addr, __value, 
__p)
-#define vstrhq_p_u16(__addr, __value, __p) __arm_vstrhq_p_u16(__addr, __value, 
__p)
-#define vstrwq_f32(__addr, __value) __arm_vstrwq_f32(__addr, __value)
-#define vstrwq_s32(__addr, __value) __arm_vstrwq_s32(__addr, __value)
-#define vstrwq_u32(__addr, __value) __arm_vstrwq_u32(__addr, __value)
-#define vstrwq_p_f32(__addr, __value, __p) __arm_vstrwq_p_f32(__addr, __value, 
__p)
-#define vstrwq_p_s32(__addr, __value, __p) __arm_vstrwq_p_s32(__addr, __value, 
__p)
-#define vstrwq_p_u32(__addr, __value, __p) __arm_vstrwq_p_u32(__addr, __value, 
__p)
  #define vstrdq_scatter_base_p_s64(__addr, __offset, __value, __p) 
__arm_vstrdq_scatter_base_p_s64(__addr, __offset, __value, __p)
  #define vstrdq_scatter_base_p_u64(__addr, __offset, __value, __p) 
__arm_vstrdq_scatter_base_p_u64(__addr, __offset, __value, __p)
  #define vstrdq_scatter_base_s64(__addr, __offset, __value) 
__arm_vstrdq_scatter_base_s64(__addr, __offset, __value)
@@ -636,46 +578,30 @@
  #define vsbcq_u32(__a, __b,  __carry) __arm_vsbcq_u32(__a, __b,  __carry)
  #define vsbcq_m_s32(__inactive, __a, __b,  __carry, __p) 
__arm_vsbcq_m_s32(__inactive, __a, __b,  __carry, __p)
  #define vsbcq_m_u32(__inactive, __a, __b,  __carry, __p) 
__arm_vsbcq_m_u32(__inactive, __a, __b,  __carry, __p)
-#define vst1q_p_u8(__addr, __value, __p) __arm_vst1q_p_u8(__addr, __value, __p)
-#define vst1q_p_s8(__addr, __value, __p) __arm_vst1q_p_s8(__addr, __value, __p)
  #define vst2q_s8(__addr, __value) __arm_vst2q_s8(__addr, __value)
  #define vst2q_u8(__addr, __value) __arm_vst2q_u8(__addr, __value)
-#define vld1q_z_u8(__base, __p) __arm_vld1q_z_u8(__base, __p)
-#define vld1q_z_s8(__base, __p) __arm_vld1q_z_s8(__base, __p)
  #define vld2q_s8(__addr) __arm_vld2q_s8(__addr)
  #define vld2q_u8(__addr) __arm_vld2q_u8(__addr)
  #define vld4q_s8(__addr) __arm_vld4q_s8(__addr)
  #define vld4q_u8(__addr) __arm_vld4q_u8(__addr)
-#define vst1q_p_u16(__addr, __value, __p) __arm_vst1q_p_u16(__addr, __value, 
__p)
-#define vst1q_p_s16(__addr, __value, __p) __arm_vst1q_p_s16(__addr, __value, 
__p)
  #define vst2q_s16(__addr, __value) __arm_vst2q_s16(__addr, __value)
  #define vst2q_u16(__addr, __value) __arm_vst2q_u16(__addr, __value)
-#define vld1q_z_u16(__base, __p) __arm_vld1q_z_u16(__base, __p)
-#define vld1q_z_s16(__base, __p) __arm_vld1q_z_s16(__base, __p)
  #define vld2q_s16(__addr) __arm_vld2q_s16(__addr)
  #define vld2q_u16(__addr) __arm_vld2q_u16(__addr)
  #define vld4q_s16(__addr) __arm_vld4q_s16(__addr)
  #define vld4q_u16(__addr) __arm_vld4q_u16(__addr)
-#define vst1q_p_u32(__addr, __value, __p) __arm_vst1q_p_u32(__addr, __value, 
__p)
-#define vst1q_p_s32(__addr, __value, __p) __arm_vst1q_p_s32(__addr, __value, 
__p)
  #define vst2q_s32(__addr, __value) __arm_vst2q_s32(__addr, __value)
  #define vst2q_u32(__addr, __value) __arm_vst2q_u32(__addr, __value)
-#define vld1q_z_u32(__base, __p) __arm_vld1q_z_u32(__base, __p)
-#define vld1q_z_s32(__base, __p) __arm_vld1q_z_s32(__base, __p)
  #define vld2q_s32(__addr) __arm_vld2q_s32(__addr)
  #define vld2q_u32(__addr) __arm_vld2q_u32(__addr)
  #define vld4q_s32(__addr) __arm_vld4q_s32(__addr)
  #define vld4q_u32(__addr) __arm_vld4q_u32(__addr)
  #define vld4q_f16(__addr) __arm_vld4q_f16(__addr)
  #define vld2q_f16(__addr) __arm_vld2q_f16(__addr)
-#define vld1q_z_f16(__base, __p) __arm_vld1q_z_f16(__base, __p)
  #define vst2q_f16(__addr, __value) __arm_vst2q_f16(__addr, __value)
-#define vst1q_p_f16(__addr, __value, __p) __arm_vst1q_p_f16(__addr, __value, 
__p)
  #define vld4q_f32(__addr) __arm_vld4q_f32(__addr)
  #define vld2q_f32(__addr) __arm_vld2q_f32(__addr)
-#define vld1q_z_f32(__base, __p) __arm_vld1q_z_f32(__base, __p)
  #define vst2q_f32(__addr, __value) __arm_vst2q_f32(__addr, __value)
-#define vst1q_p_f32(__addr, __value, __p) __arm_vst1q_p_f32(__addr, __value, 
__p)
  #define vsetq_lane_f16(__a, __b,  __idx) __arm_vsetq_lane_f16(__a, __b,  
__idx)
  #define vsetq_lane_f32(__a, __b,  __idx) __arm_vsetq_lane_f32(__a, __b,  
__idx)
  #define vsetq_lane_s16(__a, __b,  __idx) __arm_vsetq_lane_s16(__a, __b,  
__idx)
@@ -1169,48 +1095,6 @@ __arm_vstrbq_scatter_offset_u16 (uint8_t * __base, 
uint16x8_t __offset, uint16x8
    __builtin_mve_vstrbq_scatter_offset_uv8hi ((__builtin_neon_qi *) __base, 
__offset, __value);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_s8 (int8_t * __addr, int8x16_t __value)
-{
-  __builtin_mve_vstrbq_sv16qi ((__builtin_neon_qi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_s32 (int8_t * __addr, int32x4_t __value)
-{
-  __builtin_mve_vstrbq_sv4si ((__builtin_neon_qi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_s16 (int8_t * __addr, int16x8_t __value)
-{
-  __builtin_mve_vstrbq_sv8hi ((__builtin_neon_qi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_u8 (uint8_t * __addr, uint8x16_t __value)
-{
-  __builtin_mve_vstrbq_uv16qi ((__builtin_neon_qi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_u32 (uint8_t * __addr, uint32x4_t __value)
-{
-  __builtin_mve_vstrbq_uv4si ((__builtin_neon_qi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_u16 (uint8_t * __addr, uint16x8_t __value)
-{
-  __builtin_mve_vstrbq_uv8hi ((__builtin_neon_qi *) __addr, __value);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vstrwq_scatter_base_s32 (uint32x4_t __addr, const int __offset, 
int32x4_t __value)
@@ -1239,20 +1123,6 @@ __arm_vldrbq_gather_offset_s8 (int8_t const * __base, 
uint8x16_t __offset)
    return __builtin_mve_vldrbq_gather_offset_sv16qi ((__builtin_neon_qi *) 
__base, __offset);
  }
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_s8 (int8_t const * __base)
-{
-  return __builtin_mve_vldrbq_sv16qi ((__builtin_neon_qi *) __base);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_u8 (uint8_t const * __base)
-{
-  return __builtin_mve_vldrbq_uv16qi ((__builtin_neon_qi *) __base);
-}
-
  __extension__ extern __inline uint16x8_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vldrbq_gather_offset_u16 (uint8_t const * __base, uint16x8_t __offset)
@@ -1267,20 +1137,6 @@ __arm_vldrbq_gather_offset_s16 (int8_t const * __base, 
uint16x8_t __offset)
    return __builtin_mve_vldrbq_gather_offset_sv8hi ((__builtin_neon_qi *) 
__base, __offset);
  }
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_s16 (int8_t const * __base)
-{
-  return __builtin_mve_vldrbq_sv8hi ((__builtin_neon_qi *) __base);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_u16 (uint8_t const * __base)
-{
-  return __builtin_mve_vldrbq_uv8hi ((__builtin_neon_qi *) __base);
-}
-
  __extension__ extern __inline uint32x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vldrbq_gather_offset_u32 (uint8_t const * __base, uint32x4_t __offset)
@@ -1295,20 +1151,6 @@ __arm_vldrbq_gather_offset_s32 (int8_t const * __base, 
uint32x4_t __offset)
    return __builtin_mve_vldrbq_gather_offset_sv4si ((__builtin_neon_qi *) 
__base, __offset);
  }
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_s32 (int8_t const * __base)
-{
-  return __builtin_mve_vldrbq_sv4si ((__builtin_neon_qi *) __base);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_u32 (uint8_t const * __base)
-{
-  return __builtin_mve_vldrbq_uv4si ((__builtin_neon_qi *) __base);
-}
-
  __extension__ extern __inline int32x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vldrwq_gather_base_s32 (uint32x4_t __addr, const int __offset)
@@ -1323,48 +1165,6 @@ __arm_vldrwq_gather_base_u32 (uint32x4_t __addr, const 
int __offset)
    return __builtin_mve_vldrwq_gather_base_uv4si (__addr, __offset);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_s8 (int8_t * __addr, int8x16_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_sv16qi ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_s32 (int8_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_sv4si ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_s16 (int8_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_sv8hi ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_u8 (uint8_t * __addr, uint8x16_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_uv16qi ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_u32 (uint8_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_uv4si ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_u16 (uint8_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_uv8hi ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vstrbq_scatter_offset_p_s8 (int8_t * __base, uint8x16_t __offset, 
int8x16_t __value, mve_pred16_t __p)
@@ -1463,48 +1263,6 @@ __arm_vldrbq_gather_offset_z_u16 (uint8_t const * 
__base, uint16x8_t __offset, m
    return __builtin_mve_vldrbq_gather_offset_z_uv8hi ((__builtin_neon_qi *) 
__base, __offset, __p);
  }
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_s8 (int8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_sv16qi ((__builtin_neon_qi *) __base, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_s32 (int8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_sv4si ((__builtin_neon_qi *) __base, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_s16 (int8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_sv8hi ((__builtin_neon_qi *) __base, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_u8 (uint8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_uv16qi ((__builtin_neon_qi *) __base, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_u32 (uint8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_uv4si ((__builtin_neon_qi *) __base, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_u16 (uint8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_uv8hi ((__builtin_neon_qi *) __base, __p);
-}
-
  __extension__ extern __inline int32x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vldrwq_gather_base_z_s32 (uint32x4_t __addr, const int __offset, 
mve_pred16_t __p)
@@ -1631,91 +1389,6 @@ __arm_vldrhq_gather_shifted_offset_z_u16 (uint16_t const 
* __base, uint16x8_t __
    return __builtin_mve_vldrhq_gather_shifted_offset_z_uv8hi 
((__builtin_neon_hi *) __base, __offset, __p);
  }
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_s32 (int16_t const * __base)
-{
-  return __builtin_mve_vldrhq_sv4si ((__builtin_neon_hi *) __base);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_s16 (int16_t const * __base)
-{
-  return __builtin_mve_vldrhq_sv8hi ((__builtin_neon_hi *) __base);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_u32 (uint16_t const * __base)
-{
-  return __builtin_mve_vldrhq_uv4si ((__builtin_neon_hi *) __base);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_u16 (uint16_t const * __base)
-{
-  return __builtin_mve_vldrhq_uv8hi ((__builtin_neon_hi *) __base);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_z_s32 (int16_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_z_sv4si ((__builtin_neon_hi *) __base, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_z_s16 (int16_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_z_sv8hi ((__builtin_neon_hi *) __base, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_z_u32 (uint16_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_z_uv4si ((__builtin_neon_hi *) __base, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_z_u16 (uint16_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_z_uv8hi ((__builtin_neon_hi *) __base, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_s32 (int32_t const * __base)
-{
-  return __builtin_mve_vldrwq_sv4si ((__builtin_neon_si *) __base);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_u32 (uint32_t const * __base)
-{
-  return __builtin_mve_vldrwq_uv4si ((__builtin_neon_si *) __base);
-}
-
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_z_s32 (int32_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_z_sv4si ((__builtin_neon_si *) __base, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_z_u32 (uint32_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_z_uv4si ((__builtin_neon_si *) __base, __p);
-}
-
  __extension__ extern __inline int64x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vldrdq_gather_base_s64 (uint64x2_t __addr, const int __offset)
@@ -1969,90 +1642,6 @@ __arm_vstrhq_scatter_shifted_offset_p_u16 (uint16_t * 
__base, uint16x8_t __offse
    __builtin_mve_vstrhq_scatter_shifted_offset_p_uv8hi ((__builtin_neon_hi *) 
__base, __offset, __value, __p);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_s32 (int16_t * __addr, int32x4_t __value)
-{
-  __builtin_mve_vstrhq_sv4si ((__builtin_neon_hi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_s16 (int16_t * __addr, int16x8_t __value)
-{
-  __builtin_mve_vstrhq_sv8hi ((__builtin_neon_hi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_u32 (uint16_t * __addr, uint32x4_t __value)
-{
-  __builtin_mve_vstrhq_uv4si ((__builtin_neon_hi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_u16 (uint16_t * __addr, uint16x8_t __value)
-{
-  __builtin_mve_vstrhq_uv8hi ((__builtin_neon_hi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p_s32 (int16_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_p_sv4si ((__builtin_neon_hi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p_s16 (int16_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_p_sv8hi ((__builtin_neon_hi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p_u32 (uint16_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_p_uv4si ((__builtin_neon_hi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p_u16 (uint16_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_p_uv8hi ((__builtin_neon_hi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_s32 (int32_t * __addr, int32x4_t __value)
-{
-  __builtin_mve_vstrwq_sv4si ((__builtin_neon_si *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_u32 (uint32_t * __addr, uint32x4_t __value)
-{
-  __builtin_mve_vstrwq_uv4si ((__builtin_neon_si *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p_s32 (int32_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_p_sv4si ((__builtin_neon_si *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p_u32 (uint32_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_p_uv4si ((__builtin_neon_si *) __addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vstrdq_scatter_base_p_s64 (uint64x2_t __addr, const int __offset, 
int64x2_t __value, mve_pred16_t __p)
@@ -3190,20 +2779,6 @@ __arm_vsbcq_m_u32 (uint32x4_t __inactive, uint32x4_t 
__a, uint32x4_t __b, unsign
    return __res;
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_u8 (uint8_t * __addr, uint8x16_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrbq_p_u8 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_s8 (int8_t * __addr, int8x16_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrbq_p_s8 (__addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vst2q_s8 (int8_t * __addr, int8x16x2_t __value)
@@ -3222,20 +2797,6 @@ __arm_vst2q_u8 (uint8_t * __addr, uint8x16x2_t __value)
    __builtin_mve_vst2qv16qi ((__builtin_neon_qi *) __addr, __rv.__o);
  }
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_u8 (uint8_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrbq_z_u8 ( __base, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_s8 (int8_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrbq_z_s8 ( __base, __p);
-}
-
  __extension__ extern __inline int8x16x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vld2q_s8 (int8_t const * __addr)
@@ -3272,20 +2833,6 @@ __arm_vld4q_u8 (uint8_t const * __addr)
    return __rv.__i;
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_u16 (uint16_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrhq_p_u16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_s16 (int16_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrhq_p_s16 (__addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vst2q_s16 (int16_t * __addr, int16x8x2_t __value)
@@ -3304,20 +2851,6 @@ __arm_vst2q_u16 (uint16_t * __addr, uint16x8x2_t __value)
    __builtin_mve_vst2qv8hi ((__builtin_neon_hi *) __addr, __rv.__o);
  }
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_u16 (uint16_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrhq_z_u16 ( __base, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_s16 (int16_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrhq_z_s16 ( __base, __p);
-}
-
  __extension__ extern __inline int16x8x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vld2q_s16 (int16_t const * __addr)
@@ -3354,20 +2887,6 @@ __arm_vld4q_u16 (uint16_t const * __addr)
    return __rv.__i;
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_u32 (uint32_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrwq_p_u32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_s32 (int32_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrwq_p_s32 (__addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vst2q_s32 (int32_t * __addr, int32x4x2_t __value)
@@ -3386,20 +2905,6 @@ __arm_vst2q_u32 (uint32_t * __addr, uint32x4x2_t __value)
    __builtin_mve_vst2qv4si ((__builtin_neon_si *) __addr, __rv.__o);
  }
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_u32 (uint32_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrwq_z_u32 ( __base, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_s32 (int32_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrwq_z_s32 ( __base, __p);
-}
-
  __extension__ extern __inline int32x4x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vld2q_s32 (int32_t const * __addr)
@@ -4319,34 +3824,6 @@ __arm_vornq_m_f16 (float16x8_t __inactive, float16x8_t 
__a, float16x8_t __b, mve
    return __builtin_mve_vornq_m_fv8hf (__inactive, __a, __b, __p);
  }
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_f32 (float32_t const * __base)
-{
-  return __builtin_mve_vldrwq_fv4sf((__builtin_neon_si *) __base);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_z_f32 (float32_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_z_fv4sf((__builtin_neon_si *) __base, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_z_f16 (float16_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_z_fv8hf((__builtin_neon_hi *) __base, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_f16 (float16_t const * __base)
-{
-  return __builtin_mve_vldrhq_fv8hf((__builtin_neon_hi *) __base);
-}
-
  __extension__ extern __inline float16x8_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vldrhq_gather_offset_f16 (float16_t const * __base, uint16x8_t __offset)
@@ -4417,34 +3894,6 @@ __arm_vldrwq_gather_shifted_offset_z_f32 (float32_t 
const * __base, uint32x4_t _
    return __builtin_mve_vldrwq_gather_shifted_offset_z_fv4sf 
((__builtin_neon_si *) __base, __offset, __p);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p_f32 (float32_t * __addr, float32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_p_fv4sf ((__builtin_neon_si *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_f32 (float32_t * __addr, float32x4_t __value)
-{
-  __builtin_mve_vstrwq_fv4sf ((__builtin_neon_si *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_f16 (float16_t * __addr, float16x8_t __value)
-{
-  __builtin_mve_vstrhq_fv8hf ((__builtin_neon_hi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p_f16 (float16_t * __addr, float16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_p_fv8hf ((__builtin_neon_hi *) __addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vstrhq_scatter_offset_f16 (float16_t * __base, uint16x8_t __offset, 
float16x8_t __value)
@@ -4833,13 +4282,6 @@ __arm_vld2q_f16 (float16_t const * __addr)
    return __rv.__i;
  }
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_f16 (float16_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrhq_z_f16 (__base, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vst2q_f16 (float16_t * __addr, float16x8x2_t __value)
@@ -4849,13 +4291,6 @@ __arm_vst2q_f16 (float16_t * __addr, float16x8x2_t 
__value)
    __builtin_mve_vst2qv8hf (__addr, __rv.__o);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_f16 (float16_t * __addr, float16x8_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrhq_p_f16 (__addr, __value, __p);
-}
-
  __extension__ extern __inline float32x4x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vld4q_f32 (float32_t const * __addr)
@@ -4874,27 +4309,13 @@ __arm_vld2q_f32 (float32_t const * __addr)
    return __rv.__i;
  }
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_f32 (float32_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrwq_z_f32 (__base, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q_f32 (float32_t * __addr, float32x4x2_t __value)
-{
-  union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst2qv4sf (__addr, __rv.__o);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_f32 (float32_t * __addr, float32x4_t __value, mve_pred16_t __p)
+__arm_vst2q_f32 (float32_t * __addr, float32x4x2_t __value)
  {
-  return __arm_vstrwq_p_f32 (__addr, __value, __p);
+  union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__i = __value;
+  __builtin_mve_vst2qv4sf (__addr, __rv.__o);
  }
__extension__ extern __inline float16x8_t
@@ -5283,48 +4704,6 @@ __arm_vstrbq_scatter_offset (uint8_t * __base, 
uint16x8_t __offset, uint16x8_t _
   __arm_vstrbq_scatter_offset_u16 (__base, __offset, __value);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (int8_t * __addr, int8x16_t __value)
-{
- __arm_vstrbq_s8 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (int8_t * __addr, int32x4_t __value)
-{
- __arm_vstrbq_s32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (int8_t * __addr, int16x8_t __value)
-{
- __arm_vstrbq_s16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (uint8_t * __addr, uint8x16_t __value)
-{
- __arm_vstrbq_u8 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (uint8_t * __addr, uint32x4_t __value)
-{
- __arm_vstrbq_u32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (uint8_t * __addr, uint16x8_t __value)
-{
- __arm_vstrbq_u16 (__addr, __value);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vstrwq_scatter_base (uint32x4_t __addr, const int __offset, int32x4_t 
__value)
@@ -5381,48 +4760,6 @@ __arm_vldrbq_gather_offset (int8_t const * __base, 
uint32x4_t __offset)
   return __arm_vldrbq_gather_offset_s32 (__base, __offset);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (int8_t * __addr, int8x16_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_s8 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (int8_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_s32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (int8_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_s16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (uint8_t * __addr, uint8x16_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_u8 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (uint8_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_u32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (uint8_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_u16 (__addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vstrbq_scatter_offset_p (int8_t * __base, uint8x16_t __offset, 
int8x16_t __value, mve_pred16_t __p)
@@ -5857,90 +5194,6 @@ __arm_vstrhq_scatter_shifted_offset_p (uint16_t * 
__base, uint16x8_t __offset, u
   __arm_vstrhq_scatter_shifted_offset_p_u16 (__base, __offset, __value, __p);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq (int16_t * __addr, int32x4_t __value)
-{
- __arm_vstrhq_s32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq (int16_t * __addr, int16x8_t __value)
-{
- __arm_vstrhq_s16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq (uint16_t * __addr, uint32x4_t __value)
-{
- __arm_vstrhq_u32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq (uint16_t * __addr, uint16x8_t __value)
-{
- __arm_vstrhq_u16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p (int16_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_p_s32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p (int16_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_p_s16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p (uint16_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_p_u32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p (uint16_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_p_u16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq (int32_t * __addr, int32x4_t __value)
-{
- __arm_vstrwq_s32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq (uint32_t * __addr, uint32x4_t __value)
-{
- __arm_vstrwq_u32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p (int32_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_p_s32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p (uint32_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_p_u32 (__addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vstrdq_scatter_base_p (uint64x2_t __addr, const int __offset, int64x2_t 
__value, mve_pred16_t __p)
@@ -6837,20 +6090,6 @@ __arm_vsbcq_m (uint32x4_t __inactive, uint32x4_t __a, 
uint32x4_t __b, unsigned *
   return __arm_vsbcq_m_u32 (__inactive, __a, __b, __carry, __p);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (uint8_t * __addr, uint8x16_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_u8 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (int8_t * __addr, int8x16_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_s8 (__addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vst2q (int8_t * __addr, int8x16x2_t __value)
@@ -6865,20 +6104,6 @@ __arm_vst2q (uint8_t * __addr, uint8x16x2_t __value)
   __arm_vst2q_u8 (__addr, __value);
  }
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (uint8_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_u8 (__base, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (int8_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_s8 (__base, __p);
-}
-
  __extension__ extern __inline int8x16x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vld2q (int8_t const * __addr)
@@ -6907,20 +6132,6 @@ __arm_vld4q (uint8_t const * __addr)
   return __arm_vld4q_u8 (__addr);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (uint16_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_u16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (int16_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_s16 (__addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vst2q (int16_t * __addr, int16x8x2_t __value)
@@ -6935,20 +6146,6 @@ __arm_vst2q (uint16_t * __addr, uint16x8x2_t __value)
   __arm_vst2q_u16 (__addr, __value);
  }
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (uint16_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_u16 (__base, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (int16_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_s16 (__base, __p);
-}
-
  __extension__ extern __inline int16x8x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vld2q (int16_t const * __addr)
@@ -6977,20 +6174,6 @@ __arm_vld4q (uint16_t const * __addr)
   return __arm_vld4q_u16 (__addr);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (uint32_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_u32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (int32_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_s32 (__addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vst2q (int32_t * __addr, int32x4x2_t __value)
@@ -7005,20 +6188,6 @@ __arm_vst2q (uint32_t * __addr, uint32x4x2_t __value)
   __arm_vst2q_u32 (__addr, __value);
  }
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (uint32_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_u32 (__base, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (int32_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_s32 (__base, __p);
-}
-
  __extension__ extern __inline int32x4x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vld2q (int32_t const * __addr)
@@ -7651,34 +6820,6 @@ __arm_vldrwq_gather_shifted_offset_z (float32_t const * 
__base, uint32x4_t __off
   return __arm_vldrwq_gather_shifted_offset_z_f32 (__base, __offset, __p);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p (float32_t * __addr, float32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_p_f32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq (float32_t * __addr, float32x4_t __value)
-{
- __arm_vstrwq_f32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq (float16_t * __addr, float16x8_t __value)
-{
- __arm_vstrhq_f16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p (float16_t * __addr, float16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_p_f16 (__addr, __value, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vstrhq_scatter_offset (float16_t * __base, uint16x8_t __offset, 
float16x8_t __value)
@@ -7861,13 +7002,6 @@ __arm_vld2q (float16_t const * __addr)
   return __arm_vld2q_f16 (__addr);
  }
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (float16_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_f16 (__base, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vst2q (float16_t * __addr, float16x8x2_t __value)
@@ -7875,13 +7009,6 @@ __arm_vst2q (float16_t * __addr, float16x8x2_t __value)
   __arm_vst2q_f16 (__addr, __value);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (float16_t * __addr, float16x8_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_f16 (__addr, __value, __p);
-}
-
  __extension__ extern __inline float32x4x4_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vld4q (float32_t const * __addr)
@@ -7896,13 +7023,6 @@ __arm_vld2q (float32_t const * __addr)
   return __arm_vld2q_f32 (__addr);
  }
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (float32_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_f32 (__base, __p);
-}
-
  __extension__ extern __inline void
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vst2q (float32_t * __addr, float32x4x2_t __value)
@@ -7910,13 +7030,6 @@ __arm_vst2q (float32_t * __addr, float32x4x2_t __value)
   __arm_vst2q_f32 (__addr, __value);
  }
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (float32_t * __addr, float32x4_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_f32 (__addr, __value, __p);
-}
-
  __extension__ extern __inline float16x8_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  __arm_vsetq_lane (float16_t __a, float16x8_t __b, const int __idx)
@@ -8428,17 +7541,6 @@ extern void *__ARM_undef;
    int 
(*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]:
 __arm_vornq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, 
float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
    int 
(*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]:
 __arm_vornq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, 
float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-#define __arm_vld1q_z(p0,p1) ( \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 
(__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \
-  int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 
(__ARM_mve_coerce_s16_ptr(p0, int16_t *), p1), \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 
(__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1), \
-  int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 
(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), p1), \
-  int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 
(__ARM_mve_coerce_u16_ptr(p0, uint16_t *), p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 
(__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1), \
-  int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16 
(__ARM_mve_coerce_f16_ptr(p0, float16_t *), p1), \
-  int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32 
(__ARM_mve_coerce_f32_ptr(p0, float32_t *), p1)))
-
  #define __arm_vld2q(p0) ( \
    _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
    int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 
(__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \
@@ -8517,17 +7619,6 @@ extern void *__ARM_undef;
    int (*)[__ARM_mve_type_uint32_t_ptr]: 
__arm_vldrwq_gather_shifted_offset_z_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t 
*), p1, p2), \
    int (*)[__ARM_mve_type_float32_t_ptr]: 
__arm_vldrwq_gather_shifted_offset_z_f32 (__ARM_mve_coerce_f32_ptr(p0, 
float32_t *), p1, p2)))
-#define __arm_vst1q_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: 
__arm_vst1q_p_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, 
int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: 
__arm_vst1q_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), 
__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: 
__arm_vst1q_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), 
__ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: 
__arm_vst1q_p_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), 
__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: 
__arm_vst1q_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), 
__ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: 
__arm_vst1q_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), 
__ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8_t]: 
__arm_vst1q_p_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), 
__ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: 
__arm_vst1q_p_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), 
__ARM_mve_coerce(__p1, float32x4_t), p2));})
-
  #define __arm_vst2q(p0,p1) ({ __typeof(p1) __p1 = (p1); \
    _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
    int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16x2_t]: 
__arm_vst2q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, 
int8x16x2_t)), \
@@ -8933,15 +8024,6 @@ extern void *__ARM_undef;
    int (*)[__ARM_mve_type_int32_t_ptr]: 
__arm_vldrwq_gather_shifted_offset_z_s32 (__ARM_mve_coerce_s32_ptr(__p0, 
int32_t *), p1, p2), \
    int (*)[__ARM_mve_type_uint32_t_ptr]: 
__arm_vldrwq_gather_shifted_offset_z_u32 (__ARM_mve_coerce_u32_ptr(__p0, 
uint32_t *), p1, p2));})
-#define __arm_vst1q_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: 
__arm_vst1q_p_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, 
int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: 
__arm_vst1q_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), 
__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: 
__arm_vst1q_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), 
__ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: 
__arm_vst1q_p_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), 
__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: 
__arm_vst1q_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), 
__ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: 
__arm_vst1q_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), 
__ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
  #define __arm_vst2q(p0,p1) ({ __typeof(p1) __p1 = (p1); \
    _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
    int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16x2_t]: 
__arm_vst2q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, 
int8x16x2_t)), \
@@ -8951,20 +8033,6 @@ extern void *__ARM_undef;
    int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x2_t]: 
__arm_vst2q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), 
__ARM_mve_coerce(__p1, uint16x8x2_t)), \
    int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x2_t]: 
__arm_vst2q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), 
__ARM_mve_coerce(__p1, uint32x4x2_t)));})
-#define __arm_vstrhq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: 
__arm_vstrhq_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), 
__ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int32x4_t]: 
__arm_vstrhq_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), 
__ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: 
__arm_vstrhq_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), 
__ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: 
__arm_vstrhq_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), 
__ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vstrhq_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: 
__arm_vstrhq_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), 
__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int32x4_t]: 
__arm_vstrhq_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), 
__ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: 
__arm_vstrhq_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), 
__ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: 
__arm_vstrhq_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), 
__ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
  #define __arm_vstrhq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = 
(p1); \
    __typeof(p2) __p2 = (p2); \
    _Generic( (int 
(*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
@@ -8997,17 +8065,6 @@ extern void *__ARM_undef;
    int 
(*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
 __arm_vstrhq_scatter_shifted_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t 
*), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
    int 
(*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
 __arm_vstrhq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t 
*), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vstrwq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: 
__arm_vstrwq_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), 
__ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: 
__arm_vstrwq_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), 
__ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vstrwq_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: 
__arm_vstrwq_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), 
__ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: 
__arm_vstrwq_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), 
__ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
  #define __arm_vstrdq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); 
\
    _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
    int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64 (p0, p1, 
__ARM_mve_coerce(__p2, int64x2_t), p3), \
@@ -9105,14 +8162,6 @@ extern void *__ARM_undef;
    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: 
__arm_vbicq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, 
uint16x8_t), p3), \
    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: 
__arm_vbicq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, 
uint32x4_t), p3));})
-#define __arm_vld1q_z(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 
(__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \
-  int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 
(__ARM_mve_coerce_s16_ptr(p0, int16_t *), p1), \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 
(__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1), \
-  int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 
(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), p1), \
-  int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 
(__ARM_mve_coerce_u16_ptr(p0, uint16_t *), p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 
(__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1)))
-
  #define __arm_vld2q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
    int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 
(__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \
    int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 
(__ARM_mve_coerce_s16_ptr(p0, int16_t *)), \
@@ -9428,25 +8477,6 @@ extern void *__ARM_undef;
    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 
(__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \
    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 
(__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));})
-#define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: 
__arm_vstrbq_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, 
int8x16_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: 
__arm_vstrbq_s16 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, 
int16x8_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: 
__arm_vstrbq_s32 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, 
int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: 
__arm_vstrbq_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, 
uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: 
__arm_vstrbq_u16 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), 
__ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: 
__arm_vstrbq_u32 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), 
__ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: 
__arm_vstrbq_p_s8 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), 
__ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: 
__arm_vstrbq_p_s16 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), 
__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: 
__arm_vstrbq_p_s32 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), 
__ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: 
__arm_vstrbq_p_u8 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), 
__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: 
__arm_vstrbq_p_u16 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), 
__ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: 
__arm_vstrbq_p_u32 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), 
__ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
  #define __arm_vstrdq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
    _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
    int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_s64 (p0, p1, 
__ARM_mve_coerce(__p2, int64x2_t)), \
diff --git a/gcc/config/arm/arm_mve_builtins.def 
b/gcc/config/arm/arm_mve_builtins.def
index f141aab816c..08ae37170b3 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -669,20 +669,14 @@ VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vandq_m_f, v8hf, 
v4sf)
  VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_n_f, v8hf, v4sf)
  VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_f, v8hf, v4sf)
  VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vabdq_m_f, v8hf, v4sf)
-VAR3 (STRS, vstrbq_s, v16qi, v8hi, v4si)
-VAR3 (STRU, vstrbq_u, v16qi, v8hi, v4si)
  VAR3 (STRSS, vstrbq_scatter_offset_s, v16qi, v8hi, v4si)
  VAR3 (STRSU, vstrbq_scatter_offset_u, v16qi, v8hi, v4si)
  VAR1 (STRSBS, vstrwq_scatter_base_s, v4si)
  VAR1 (STRSBU, vstrwq_scatter_base_u, v4si)
  VAR3 (LDRGU, vldrbq_gather_offset_u, v16qi, v8hi, v4si)
  VAR3 (LDRGS, vldrbq_gather_offset_s, v16qi, v8hi, v4si)
-VAR3 (LDRS, vldrbq_s, v16qi, v8hi, v4si)
-VAR3 (LDRU, vldrbq_u, v16qi, v8hi, v4si)
  VAR1 (LDRGBS, vldrwq_gather_base_s, v4si)
  VAR1 (LDRGBU, vldrwq_gather_base_u, v4si)
-VAR3 (STRS_P, vstrbq_p_s, v16qi, v8hi, v4si)
-VAR3 (STRU_P, vstrbq_p_u, v16qi, v8hi, v4si)
  VAR3 (STRSS_P, vstrbq_scatter_offset_p_s, v16qi, v8hi, v4si)
  VAR3 (STRSU_P, vstrbq_scatter_offset_p_u, v16qi, v8hi, v4si)
  VAR1 (STRSBS_P, vstrwq_scatter_base_p_s, v4si)
@@ -691,15 +685,6 @@ VAR1 (LDRGBS_Z, vldrwq_gather_base_z_s, v4si)
  VAR1 (LDRGBU_Z, vldrwq_gather_base_z_u, v4si)
  VAR3 (LDRGS_Z, vldrbq_gather_offset_z_s, v16qi, v8hi, v4si)
  VAR3 (LDRGU_Z, vldrbq_gather_offset_z_u, v16qi, v8hi, v4si)
-VAR3 (LDRS_Z, vldrbq_z_s, v16qi, v8hi, v4si)
-VAR3 (LDRU_Z, vldrbq_z_u, v16qi, v8hi, v4si)
-VAR3 (LDRU, vld1q_u, v16qi, v8hi, v4si)
-VAR3 (LDRS, vld1q_s, v16qi, v8hi, v4si)
-VAR2 (LDRU_Z, vldrhq_z_u, v8hi, v4si)
-VAR2 (LDRU, vldrhq_u, v8hi, v4si)
-VAR2 (LDRS_Z, vldrhq_z_s, v8hi, v4si)
-VAR2 (LDRS, vldrhq_s, v8hi, v4si)
-VAR2 (LDRS, vld1q_f, v8hf, v4sf)
  VAR2 (LDRGU_Z, vldrhq_gather_shifted_offset_z_u, v8hi, v4si)
  VAR2 (LDRGU_Z, vldrhq_gather_offset_z_u, v8hi, v4si)
  VAR2 (LDRGU, vldrhq_gather_shifted_offset_u, v8hi, v4si)
@@ -708,14 +693,6 @@ VAR2 (LDRGS_Z, vldrhq_gather_shifted_offset_z_s, v8hi, 
v4si)
  VAR2 (LDRGS_Z, vldrhq_gather_offset_z_s, v8hi, v4si)
  VAR2 (LDRGS, vldrhq_gather_shifted_offset_s, v8hi, v4si)
  VAR2 (LDRGS, vldrhq_gather_offset_s, v8hi, v4si)
-VAR1 (LDRS, vldrhq_f, v8hf)
-VAR1 (LDRS_Z, vldrhq_z_f, v8hf)
-VAR1 (LDRS, vldrwq_f, v4sf)
-VAR1 (LDRS, vldrwq_s, v4si)
-VAR1 (LDRU, vldrwq_u, v4si)
-VAR1 (LDRS_Z, vldrwq_z_f, v4sf)
-VAR1 (LDRS_Z, vldrwq_z_s, v4si)
-VAR1 (LDRU_Z, vldrwq_z_u, v4si)
  VAR1 (LDRGBS, vldrdq_gather_base_s, v2di)
  VAR1 (LDRGBS, vldrwq_gather_base_f, v4sf)
  VAR1 (LDRGBS_Z, vldrdq_gather_base_z_s, v2di)
@@ -746,13 +723,6 @@ VAR1 (LDRGU_Z, vldrdq_gather_offset_z_u, v2di)
  VAR1 (LDRGU_Z, vldrdq_gather_shifted_offset_z_u, v2di)
  VAR1 (LDRGU_Z, vldrwq_gather_offset_z_u, v4si)
  VAR1 (LDRGU_Z, vldrwq_gather_shifted_offset_z_u, v4si)
-VAR3 (STRU, vst1q_u, v16qi, v8hi, v4si)
-VAR3 (STRS, vst1q_s, v16qi, v8hi, v4si)
-VAR2 (STRU_P, vstrhq_p_u, v8hi, v4si)
-VAR2 (STRU, vstrhq_u, v8hi, v4si)
-VAR2 (STRS_P, vstrhq_p_s, v8hi, v4si)
-VAR2 (STRS, vstrhq_s, v8hi, v4si)
-VAR2 (STRS, vst1q_f, v8hf, v4sf)
  VAR2 (STRSU_P, vstrhq_scatter_shifted_offset_p_u, v8hi, v4si)
  VAR2 (STRSU_P, vstrhq_scatter_offset_p_u, v8hi, v4si)
  VAR2 (STRSU, vstrhq_scatter_shifted_offset_u, v8hi, v4si)
@@ -761,14 +731,6 @@ VAR2 (STRSS_P, vstrhq_scatter_shifted_offset_p_s, v8hi, 
v4si)
  VAR2 (STRSS_P, vstrhq_scatter_offset_p_s, v8hi, v4si)
  VAR2 (STRSS, vstrhq_scatter_shifted_offset_s, v8hi, v4si)
  VAR2 (STRSS, vstrhq_scatter_offset_s, v8hi, v4si)
-VAR1 (STRS, vstrhq_f, v8hf)
-VAR1 (STRS_P, vstrhq_p_f, v8hf)
-VAR1 (STRS, vstrwq_f, v4sf)
-VAR1 (STRS, vstrwq_s, v4si)
-VAR1 (STRU, vstrwq_u, v4si)
-VAR1 (STRS_P, vstrwq_p_f, v4sf)
-VAR1 (STRS_P, vstrwq_p_s, v4si)
-VAR1 (STRU_P, vstrwq_p_u, v4si)
  VAR1 (STRSBS, vstrdq_scatter_base_s, v2di)
  VAR1 (STRSBS, vstrwq_scatter_base_f, v4sf)
  VAR1 (STRSBS_P, vstrdq_scatter_base_p_s, v2di)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index b9ff01cb104..d67e0be1788 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -283,6 +283,14 @@ (define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI V2QI])
  (define_mode_iterator MVE_V8HF [V8HF])
  (define_mode_iterator MVE_V16QI [V16QI])
+;; Types for MVE truncating stores and widening loads
+(define_mode_iterator MVE_w_narrow_TYPE [V8QI V4QI V4HI])
+(define_mode_attr MVE_w_narrow_type [(V8QI "v8qi") (V4QI "v4qi") (V4HI 
"v4hi")])
+(define_mode_attr MVE_wide_n_TYPE [(V8QI "V8HI") (V4QI "V4SI") (V4HI "V4SI")])
+(define_mode_attr MVE_wide_n_type [(V8QI "v8hi") (V4QI "v4si") (V4HI "v4si")])
+(define_mode_attr MVE_wide_n_sz_elem [(V8QI "16") (V4QI "32") (V4HI "32")])
+(define_mode_attr MVE_wide_n_VPRED [(V8QI "V8BI") (V4QI "V4BI") (V4HI "V4BI")])
+
  ;;----------------------------------------------------------------------------
  ;; Code iterators
  ;;----------------------------------------------------------------------------
@@ -1769,6 +1777,10 @@ (define_mode_attr V_elem_ch [(V8QI "b")  (V16QI "b")
                             (V2SF "s") (V4SF  "s")
                             (V2SF "s") (V4SF  "s")])
+(define_mode_attr MVE_elem_ch [(V4QI "b") (V8QI "b") (V16QI "b")
+                              (V4HI "h") (V8HI "h") (V8HF "h")
+                              (V4SI "w") (V4SF "w")])
+
  (define_mode_attr VH_elem_ch [(V4HI "s") (V8HI  "s")
                              (V4HF "s") (V8HF  "s")
                              (HF "s")])
@@ -2472,19 +2484,16 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") 
(VREV16Q_S "s")
                       (VMLALDAVAXQ_P_S "s")
                       (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u")
                       (VSTRWQSB_S "s") (VSTRWQSB_U "u") (VSTRBQSO_S "s")
-                      (VSTRBQSO_U "u") (VSTRBQ_S "s") (VSTRBQ_U "u")
-                      (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRBQ_S "s")
-                      (VLDRBQ_U "u") (VLDRWQGB_S "s") (VLDRWQGB_U "u")
-                      (VLD1Q_S "s") (VLD1Q_U "u") (VLDRHQGO_S "s")
+                      (VSTRBQSO_U "u")
+                      (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRWQGB_S "s")
+                      (VLDRWQGB_U "u") (VLDRHQGO_S "s")
                       (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u")
-                      (VLDRHQ_S "s") (VLDRHQ_U "u") (VLDRWQ_S "s")
-                      (VLDRWQ_U "u") (VLDRDQGB_S "s") (VLDRDQGB_U "u")
+                      (VLDRDQGB_S "s") (VLDRDQGB_U "u")
                       (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s")
                       (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u")
-                      (VLDRWQGSO_S "s") (VLDRWQGSO_U "u") (VST1Q_S "s")
-                      (VST1Q_U "u") (VSTRHQSO_S "s") (VSTRHQSO_U "u")
-                      (VSTRHQSSO_S "s") (VSTRHQSSO_U "u") (VSTRHQ_S "s")
-                      (VSTRHQ_U "u") (VSTRWQ_S "s") (VSTRWQ_U "u")
+                      (VLDRWQGSO_S "s") (VLDRWQGSO_U "u")
+                      (VSTRHQSO_S "s") (VSTRHQSO_U "u")
+                      (VSTRHQSSO_S "s") (VSTRHQSSO_U "u")
                       (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VSTRDQSO_S "s")
                       (VSTRDQSO_U "u") (VSTRDQSSO_S "s") (VSTRDQSSO_U "u")
                       (VSTRWQSO_U "u") (VSTRWQSO_S "s") (VSTRWQSSO_U "u")
@@ -2899,25 +2908,17 @@ (define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S 
VSHRNBQ_M_N_U])
  (define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U])
  (define_int_iterator VSTRWSBQ [VSTRWQSB_S VSTRWQSB_U])
  (define_int_iterator VSTRBSOQ [VSTRBQSO_S VSTRBQSO_U])
-(define_int_iterator VSTRBQ [VSTRBQ_S VSTRBQ_U])
  (define_int_iterator VLDRBGOQ [VLDRBQGO_S VLDRBQGO_U])
-(define_int_iterator VLDRBQ [VLDRBQ_S VLDRBQ_U])
  (define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U])
-(define_int_iterator VLD1Q [VLD1Q_S VLD1Q_U])
  (define_int_iterator VLDRHGOQ [VLDRHQGO_S VLDRHQGO_U])
  (define_int_iterator VLDRHGSOQ [VLDRHQGSO_S VLDRHQGSO_U])
-(define_int_iterator VLDRHQ [VLDRHQ_S VLDRHQ_U])
-(define_int_iterator VLDRWQ [VLDRWQ_S VLDRWQ_U])
  (define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U])
  (define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U])
  (define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U])
  (define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U])
  (define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U])
-(define_int_iterator VST1Q [VST1Q_S VST1Q_U])
  (define_int_iterator VSTRHSOQ [VSTRHQSO_S VSTRHQSO_U])
  (define_int_iterator VSTRHSSOQ [VSTRHQSSO_S VSTRHQSSO_U])
-(define_int_iterator VSTRHQ [VSTRHQ_S VSTRHQ_U])
-(define_int_iterator VSTRWQ [VSTRWQ_S VSTRWQ_U])
  (define_int_iterator VSTRDSBQ [VSTRDQSB_S VSTRDQSB_U])
  (define_int_iterator VSTRDSOQ [VSTRDQSO_S VSTRDQSO_U])
  (define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 706a45c7d66..17fa4d0182e 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -3354,26 +3354,6 @@ (define_insn "mve_vornq_m_f<mode>"
    (set_attr "type" "mve_move")
     (set_attr "length""8")])
-;;
-;; [vstrbq_s vstrbq_u]
-;;
-(define_insn "mve_vstrbq_<supf><mode>"
-  [(set (match_operand:<MVE_B_ELEM> 0 "mve_memory_operand" "=Ux")
-       (unspec:<MVE_B_ELEM> [(match_operand:MVE_2 1 "s_register_operand" "w")]
-        VSTRBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn("vstrb.<V_sz_elem>\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vstrbq_<supf><mode>"))
-  (set_attr "length" "4")])
-
  ;;
  ;; [vstrbq_scatter_offset_s vstrbq_scatter_offset_u]
  ;;
@@ -3450,29 +3430,6 @@ (define_insn "mve_vldrbq_gather_offset_<supf><mode>"
   [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>"))
    (set_attr "length" "4")])
-;;
-;; [vldrbq_s vldrbq_u]
-;;
-(define_insn "mve_vldrbq_<supf><mode>"
-  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
-       (unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "mve_memory_operand" "Ux")]
-        VLDRBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   if (<V_sz_elem> == 8)
-     output_asm_insn ("vldrb.<V_sz_elem>\t%q0, %E1",ops);
-   else
-     output_asm_insn ("vldrb.<supf><V_sz_elem>\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrbq_<supf><mode>"))
-  (set_attr "length" "4")])
-
  ;;
  ;; [vldrwq_gather_base_s vldrwq_gather_base_u]
  ;;
@@ -3551,25 +3508,6 @@ (define_insn "mve_vstrwq_scatter_base_p_<supf>v4si"
   [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vstrwq_scatter_base_<supf>v4si"))
    (set_attr "length" "8")])
-(define_insn "mve_vstrbq_p_<supf><mode>"
-  [(set (match_operand:<MVE_B_ELEM> 0 "mve_memory_operand" "=Ux")
-       (unspec:<MVE_B_ELEM>
-        [(match_operand:MVE_2 1 "s_register_operand" "w")
-         (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")
-         (match_dup 0)]
-        VSTRBQ))]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vpst\;vstrbt.<V_sz_elem>\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vstrbq_<supf><mode>"))
-  (set_attr "length" "8")])
-
  ;;
  ;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u]
  ;;
@@ -3596,30 +3534,6 @@ (define_insn "mve_vldrbq_gather_offset_z_<supf><mode>"
   [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>"))
    (set_attr "length" "8")])
-;;
-;; [vldrbq_z_s vldrbq_z_u]
-;;
-(define_insn "mve_vldrbq_z_<supf><mode>"
-  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
-       (unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "mve_memory_operand" "Ux")
-                      (match_operand:<MVE_VPRED> 2 "vpr_register_operand" 
"Up")]
-        VLDRBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   if (<V_sz_elem> == 8)
-     output_asm_insn ("vpst\;vldrbt.<V_sz_elem>\t%q0, %E1",ops);
-   else
-     output_asm_insn ("vpst\;vldrbt.<supf><V_sz_elem>\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrbq_<supf><mode>"))
-  (set_attr "length" "8")])
-
  ;;
  ;; [vldrwq_gather_base_z_s vldrwq_gather_base_z_u]
  ;;
@@ -3642,26 +3556,6 @@ (define_insn "mve_vldrwq_gather_base_z_<supf>v4si"
   [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrwq_gather_base_<supf>v4si"))
    (set_attr "length" "8")])
-;;
-;; [vldrhq_f]
-;;
-(define_insn "mve_vldrhq_fv8hf"
-  [(set (match_operand:V8HF 0 "s_register_operand" "=w")
-       (unspec:V8HF [(match_operand:V8HI 1 "mve_memory_operand" "Ux")]
-        VLDRHQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vldrh.16\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_fv8hf"))
-  (set_attr "length" "4")])
-
  ;;
  ;; [vldrhq_gather_offset_s vldrhq_gather_offset_u]
  ;;
@@ -3762,176 +3656,6 @@ (define_insn 
"mve_vldrhq_gather_shifted_offset_z_<supf><mode>"
   [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrhq_gather_shifted_offset_<supf><mode>"))
    (set_attr "length" "8")])
-;;
-;; [vldrhq_s, vldrhq_u]
-;;
-(define_insn "mve_vldrhq_<supf><mode>"
-  [(set (match_operand:MVE_5 0 "s_register_operand" "=w")
-       (unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "mve_memory_operand" "Ux")]
-        VLDRHQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   if (<V_sz_elem> == 16)
-     output_asm_insn ("vldrh.16\t%q0, %E1",ops);
-   else
-     output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrhq_<supf><mode>"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrhq_z_f]
-;;
-(define_insn "mve_vldrhq_z_fv8hf"
-  [(set (match_operand:V8HF 0 "s_register_operand" "=w")
-       (unspec:V8HF [(match_operand:V8HI 1 "mve_memory_operand" "Ux")
-       (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")]
-        VLDRHQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vpst\;vldrht.16\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_fv8hf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vldrhq_z_s vldrhq_z_u]
-;;
-(define_insn "mve_vldrhq_z_<supf><mode>"
-  [(set (match_operand:MVE_5 0 "s_register_operand" "=w")
-       (unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "mve_memory_operand" "Ux")
-       (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")]
-        VLDRHQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   if (<V_sz_elem> == 16)
-     output_asm_insn ("vpst\;vldrht.16\t%q0, %E1",ops);
-   else
-     output_asm_insn ("vpst\;vldrht.<supf><V_sz_elem>\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrhq_<supf><mode>"))
-  (set_attr "length" "8")])
-
-;;
-;; [vldrwq_f]
-;;
-(define_insn "mve_vldrwq_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
-       (unspec:V4SF [(match_operand:V4SI 1 "mve_memory_operand" "Ux")]
-        VLDRWQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vldrw.32\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_fv4sf"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrwq_s vldrwq_u]
-;;
-(define_insn "mve_vldrwq_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
-       (unspec:V4SI [(match_operand:V4SI 1 "mve_memory_operand" "Ux")]
-        VLDRWQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vldrw.32\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrwq_<supf>v4si"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrwq_z_f]
-;;
-(define_insn "mve_vldrwq_z_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
-       (unspec:V4SF [(match_operand:V4SI 1 "mve_memory_operand" "Ux")
-       (match_operand:V4BI 2 "vpr_register_operand" "Up")]
-        VLDRWQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vpst\;vldrwt.32\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_fv4sf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vldrwq_z_s vldrwq_z_u]
-;;
-(define_insn "mve_vldrwq_z_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
-       (unspec:V4SI [(match_operand:V4SI 1 "mve_memory_operand" "Ux")
-       (match_operand:V4BI 2 "vpr_register_operand" "Up")]
-        VLDRWQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vpst\;vldrwt.32\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrwq_<supf>v4si"))
-  (set_attr "length" "8")])
-
-(define_expand "@mve_vld1q_f<mode>"
-  [(match_operand:MVE_0 0 "s_register_operand")
-   (unspec:MVE_0 [(match_operand:<MVE_CNVT> 1 "mve_memory_operand")] VLD1Q_F)
-  ]
-  "TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
-{
-  emit_insn (gen_mve_vldr<V_sz_elem1>q_f<mode>(operands[0],operands[1]));
-  DONE;
-})
-
-(define_expand "@mve_vld1q_<supf><mode>"
-  [(match_operand:MVE_2 0 "s_register_operand")
-   (unspec:MVE_2 [(match_operand:MVE_2 1 "mve_memory_operand")] VLD1Q)
-  ]
-  "TARGET_HAVE_MVE"
-{
-  emit_insn (gen_mve_vldr<V_sz_elem1>q_<supf><mode>(operands[0],operands[1]));
-  DONE;
-})
-
  ;;
  ;; [vldrdq_gather_base_s vldrdq_gather_base_u]
  ;;
@@ -4368,71 +4092,6 @@ (define_insn 
"mve_vldrwq_gather_shifted_offset_z_<supf>v4si"
   [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vldrwq_gather_shifted_offset_<supf>v4si"))
    (set_attr "length" "8")])
-;;
-;; [vstrhq_f]
-;;
-(define_insn "mve_vstrhq_fv8hf"
-  [(set (match_operand:V8HI 0 "mve_memory_operand" "=Ux")
-       (unspec:V8HI [(match_operand:V8HF 1 "s_register_operand" "w")]
-        VSTRHQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vstrh.16\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_fv8hf"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrhq_p_f]
-;;
-(define_insn "mve_vstrhq_p_fv8hf"
-  [(set (match_operand:V8HI 0 "mve_memory_operand" "=Ux")
-       (unspec:V8HI
-        [(match_operand:V8HF 1 "s_register_operand" "w")
-         (match_operand:V8BI 2 "vpr_register_operand" "Up")
-         (match_dup 0)]
-        VSTRHQ_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vpst\;vstrht.16\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_fv8hf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrhq_p_s vstrhq_p_u]
-;;
-(define_insn "mve_vstrhq_p_<supf><mode>"
-  [(set (match_operand:<MVE_H_ELEM> 0 "mve_memory_operand" "=Ux")
-       (unspec:<MVE_H_ELEM>
-        [(match_operand:MVE_5 1 "s_register_operand" "w")
-         (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")
-         (match_dup 0)]
-        VSTRHQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vpst\;vstrht.<V_sz_elem>\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vstrhq_<supf><mode>"))
-  (set_attr "length" "8")])
-
  ;;
  ;; [vstrhq_scatter_offset_p_s vstrhq_scatter_offset_p_u]
  ;;
@@ -4558,130 +4217,6 @@ (define_insn 
"mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn"
   [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn"))
    (set_attr "length" "4")])
-;;
-;; [vstrhq_s, vstrhq_u]
-;;
-(define_insn "mve_vstrhq_<supf><mode>"
-  [(set (match_operand:<MVE_H_ELEM> 0 "mve_memory_operand" "=Ux")
-       (unspec:<MVE_H_ELEM> [(match_operand:MVE_5 1 "s_register_operand" "w")]
-        VSTRHQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vstrh.<V_sz_elem>\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vstrhq_<supf><mode>"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrwq_f]
-;;
-(define_insn "mve_vstrwq_fv4sf"
-  [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
-       (unspec:V4SI [(match_operand:V4SF 1 "s_register_operand" "w")]
-        VSTRWQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vstrw.32\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_fv4sf"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrwq_p_f]
-;;
-(define_insn "mve_vstrwq_p_fv4sf"
-  [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
-       (unspec:V4SI
-        [(match_operand:V4SF 1 "s_register_operand" "w")
-         (match_operand:V4BI 2 "vpr_register_operand" "Up")
-         (match_dup 0)]
-        VSTRWQ_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vpst\;vstrwt.32\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_fv4sf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrwq_p_s vstrwq_p_u]
-;;
-(define_insn "mve_vstrwq_p_<supf>v4si"
-  [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
-       (unspec:V4SI
-        [(match_operand:V4SI 1 "s_register_operand" "w")
-         (match_operand:V4BI 2 "vpr_register_operand" "Up")
-         (match_dup 0)]
-        VSTRWQ))]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vpst\;vstrwt.32\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vstrwq_<supf>v4si"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrwq_s vstrwq_u]
-;;
-(define_insn "mve_vstrwq_<supf>v4si"
-  [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
-       (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")]
-        VSTRWQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vstrw.32\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref 
"CODE_FOR_mve_vstrwq_<supf>v4si"))
-  (set_attr "length" "4")])
-
-(define_expand "@mve_vst1q_f<mode>"
-  [(match_operand:<MVE_CNVT> 0 "mve_memory_operand")
-   (unspec:<MVE_CNVT> [(match_operand:MVE_0 1 "s_register_operand")] VST1Q_F)
-  ]
-  "TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
-{
-  emit_insn (gen_mve_vstr<V_sz_elem1>q_f<mode>(operands[0],operands[1]));
-  DONE;
-})
-
-(define_expand "@mve_vst1q_<supf><mode>"
-  [(match_operand:MVE_2 0 "mve_memory_operand")
-   (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand")] VST1Q)
-  ]
-  "TARGET_HAVE_MVE"
-{
-  emit_insn (gen_mve_vstr<V_sz_elem1>q_<supf><mode>(operands[0],operands[1]));
-  DONE;
-})
-
  ;;
  ;; [vstrdq_scatter_base_p_s vstrdq_scatter_base_p_u]
  ;;
@@ -6931,6 +6466,7 @@ (define_expand "@arm_mve_reinterpret<mode>"
    }
  )
+
  ;; Originally expanded by 'predicated_doloop_end'.
  ;; In the rare situation where the branch is too far, we do also need to
  ;; revert FPSCR.LTPSIZE back to 0x100 after the last iteration.
@@ -6980,3 +6516,199 @@ (define_insn "dlstp<dlstp_elemsize>_insn"
    "TARGET_HAVE_MVE"
    "dlstp.<dlstp_elemsize>\t%|lr, %0"
    [(set_attr "type" "mve_misc")])
+
+
+;; Vector stores
+;; [vstrbq_s8, vstrhq_s16, vstrwq_s32,
+;;  vstrbq_u8, vstrhq_u16, vstrwq_u32,
+;;  vst1q ]
+(define_insn "@mve_vstrq_<mode>"
+  [(set (match_operand:MVE_VLD_ST 0 "mve_memory_operand" "=Ux")
+       (unspec:MVE_VLD_ST
+         [(match_operand:MVE_VLD_ST 1 "s_register_operand" "w")]
+         VSTRQ))
+  ]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[1]);
+  ops[1] = gen_rtx_REG (TImode, regno);
+  ops[0]  = operands[0];
+  output_asm_insn ("vstr<MVE_elem_ch>.<V_sz_elem>\t%q1, %E0",ops);
+  return "";
+}
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_<mode>"))
+  (set_attr "length" "4")])
+
+;; Predicated vector stores
+;; [vstrbq_p_s8, vstrhq_p_s16, vstrwq_p_s32,
+;;  vstrbq_p_u8, vstrhq_p_u16, vstrwq_p_u32,
+;;  vst1q_p ]
+(define_insn "@mve_vstrq_p_<mode>"
+  [(set (match_operand:MVE_VLD_ST 0 "mve_memory_operand" "=Ux")
+       (unspec:MVE_VLD_ST [
+          (match_operand:MVE_VLD_ST 1 "s_register_operand" "w")
+          (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")
+          (match_dup 0)
+       ] VSTRQ_P))
+  ]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[1]);
+  ops[1] = gen_rtx_REG (TImode, regno);
+  ops[0]  = operands[0];
+  output_asm_insn ("vpst\;vstr<MVE_elem_ch>t.<V_sz_elem>\t%q1, %E0",ops);
+  return "";
+}
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_<mode>"))
+  (set_attr "type" "mve_move")
+  (set_attr "length" "8")])
+
+;; Truncating vector stores
+;; [vstrbq_s16, vstrbq_s32, vstrhq_s32,
+;;  vstrbq_u16, vstrbq_u32, vstrhq_u32]
+(define_insn "@mve_vstrq_truncate_<mode>"
+  [(set (match_operand:MVE_w_narrow_TYPE 0 "mve_memory_operand" "=Ux")
+       (unspec:MVE_w_narrow_TYPE
+         [(truncate:MVE_w_narrow_TYPE
+           (match_operand:<MVE_wide_n_TYPE> 1 "s_register_operand" "w"))]
+         VSTRQ_TRUNC
+       ))]
+  "TARGET_HAVE_MVE"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[1]);
+  ops[1] = gen_rtx_REG (TImode, regno);
+  ops[0]  = operands[0];
+  output_asm_insn ("vstr<MVE_elem_ch>.<MVE_wide_n_sz_elem>\t%q1, %E0",ops);
+  return "";
+}
+  [(set (attr "mve_unpredicated_insn")
+       (symbol_ref "CODE_FOR_mve_vstrq_truncate_<mode>"))
+   (set_attr "length" "4")])
+
+;; Predicated truncating vector stores
+;; [vstrbq_p_s16, vstrbq_p_s32, vstrhq_p_s32,
+;;  vstrbq_p_u16, vstrbq_p_u32, vstrhq_p_u32]
+(define_insn "@mve_vstrq_p_truncate_<mode>"
+  [(set (match_operand:MVE_w_narrow_TYPE 0 "mve_memory_operand" "=Ux")
+       (unspec:MVE_w_narrow_TYPE [
+         (truncate:MVE_w_narrow_TYPE
+           (match_operand:<MVE_wide_n_TYPE> 1 "s_register_operand" "w"))
+         (match_operand:<MVE_wide_n_VPRED> 2 "vpr_register_operand" "Up")
+         (match_dup 0)
+       ] VSTRQ_TRUNC_P))]
+  "TARGET_HAVE_MVE"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[1]);
+  ops[1] = gen_rtx_REG (TImode, regno);
+  ops[0]  = operands[0];
+  output_asm_insn (
+    "vpst\;vstr<MVE_elem_ch>t.<MVE_wide_n_sz_elem>\t%q1, %E0",
+    ops
+  );
+  return "";
+}
+ [(set (attr "mve_unpredicated_insn")
+       (symbol_ref "CODE_FOR_mve_vstrq_truncate_<mode>"))
+  (set_attr "type" "mve_move")
+  (set_attr "length" "8")])
+
+;; Vector Loads
+;; [vldrbq_s8, vldrhq_s16, vldrwq_s32,
+;;  vldrbq_u8, vldrhq_u16, vldrwq_u32,
+;;  vld1q ]
+(define_insn "@mve_vldrq_<mode>"
+  [(set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
+       (unspec:MVE_VLD_ST
+         [(match_operand:MVE_VLD_ST 1 "mve_memory_operand" "Ux")]
+         VLDRQ))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[0]);
+  ops[0] = gen_rtx_REG (TImode, regno);
+  ops[1]  = operands[1];
+  output_asm_insn ("vldr<MVE_elem_ch>.<V_sz_elem>\t%q0, %E1",ops);
+  return "";
+ }
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_<mode>"))
+  (set_attr "length" "4")])
+
+;; Predicated vector load
+;; [vldrbq_z_s8, vldrhq_z_s16, vldrwq_z_s32,
+;;  vldrbq_z_u8, vldrhq_z_u16, vldrwq_z_u32,
+;;  vld1q_z ]
+(define_insn "@mve_vldrq_z_<mode>"
+  [(set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
+       (unspec:MVE_VLD_ST [
+          (match_operand:MVE_VLD_ST 1 "mve_memory_operand" "Ux")
+          (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")
+       ] VLDRQ_Z))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[0]);
+  ops[0] = gen_rtx_REG (TImode, regno);
+  ops[1]  = operands[1];
+  output_asm_insn ("vpst\;vldr<MVE_elem_ch>t.<V_sz_elem>\t%q0, %E1",ops);
+  return "";
+}
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_<mode>"))
+  (set_attr "type" "mve_move")
+  (set_attr "length" "8")])
+
+;; Extending vector loads
+;; [vldrbq_s16, vldrbq_s32, vldrhq_s32,
+;;  vldrbq_u16, vldrbq_u32, vldrhq_u32]
+(define_insn "@mve_vldrq_extend_<mode><US>"
+  [(set (match_operand:<MVE_wide_n_TYPE> 0 "s_register_operand" "=w")
+       (unspec:<MVE_wide_n_TYPE>
+         [(SE:<MVE_wide_n_TYPE>
+           (match_operand:MVE_w_narrow_TYPE 1 "mve_memory_operand" "Ux"))]
+         VLDRQ_EXT))]
+  "TARGET_HAVE_MVE"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[0]);
+  ops[0] = gen_rtx_REG (TImode, regno);
+  ops[1]  = operands[1];
+  output_asm_insn ("vldr<MVE_elem_ch>.<US><MVE_wide_n_sz_elem>\t%q0, %E1",ops);
+  return "";
+}
+ [(set (attr "mve_unpredicated_insn")
+       (symbol_ref "CODE_FOR_mve_vldrq_extend_<mode><US>"))
+  (set_attr "length" "4")])
+
+;; Predicated extending vector loads
+;; [vldrbq_z_s16, vldrbq_z_s32, vldrhq_z_s32,
+;;  vldrbq_z_u16, vldrbq_z_u32, vldrhq_z_u32]
+(define_insn "@mve_vldrq_z_extend_<mode><US>"
+  [(set (match_operand:<MVE_wide_n_TYPE> 0 "s_register_operand" "=w")
+         (unspec:<MVE_wide_n_TYPE> [
+             (SE:<MVE_wide_n_TYPE>
+               (match_operand:MVE_w_narrow_TYPE 1 "mve_memory_operand" "Ux"))
+             (match_operand:<MVE_wide_n_VPRED> 2 "vpr_register_operand" "Up")
+         ] VLDRQ_EXT_Z))]
+  "TARGET_HAVE_MVE"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[0]);
+  ops[0] = gen_rtx_REG (TImode, regno);
+  ops[1]  = operands[1];
+  output_asm_insn (
+    "vpst\;vldr<MVE_elem_ch>t.<US><MVE_wide_n_sz_elem>\t%q0, %E1",
+    ops
+  );
+  return "";
+}
+ [(set (attr "mve_unpredicated_insn")
+       (symbol_ref "CODE_FOR_mve_vldrq_extend_<mode><US>"))
+  (set_attr "type" "mve_move")
+  (set_attr "length" "8")])
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index f5f4d154364..01963d54cd4 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -1150,27 +1150,18 @@ (define_c_enum "unspec" [
    VSTRWQSB_U
    VSTRBQSO_S
    VSTRBQSO_U
-  VSTRBQ_S
-  VSTRBQ_U
+  VLDRQ
+  VLDRQ_Z
+  VLDRQ_EXT
+  VLDRQ_EXT_Z
    VLDRBQGO_S
    VLDRBQGO_U
-  VLDRBQ_S
-  VLDRBQ_U
    VLDRWQGB_S
    VLDRWQGB_U
-  VLD1Q_F
-  VLD1Q_S
-  VLD1Q_U
-  VLDRHQ_F
    VLDRHQGO_S
    VLDRHQGO_U
    VLDRHQGSO_S
    VLDRHQGSO_U
-  VLDRHQ_S
-  VLDRHQ_U
-  VLDRWQ_F
-  VLDRWQ_S
-  VLDRWQ_U
    VLDRDQGB_S
    VLDRDQGB_U
    VLDRDQGO_S
@@ -1186,15 +1177,11 @@ (define_c_enum "unspec" [
    VLDRWQGSO_F
    VLDRWQGSO_S
    VLDRWQGSO_U
-  VSTRHQ_F
-  VST1Q_S
-  VST1Q_U
+  VSTRQ
+  VSTRQ_P
+  VSTRQ_TRUNC
+  VSTRQ_TRUNC_P
    VSTRHQSO_S
-  VSTRHQ_U
-  VSTRWQ_S
-  VSTRWQ_U
-  VSTRWQ_F
-  VST1Q_F
    VSTRDQSB_S
    VSTRDQSB_U
    VSTRDQSO_S

Reply via email to