Jennifer Schmitz <jschm...@nvidia.com> writes:
> [...]
> @@ -54,25 +56,121 @@ TEST_UNIFORM_ZX (mul_w0_s16_m_untied, svint16_t, int16_t,
>                z0 = svmul_m (p0, z1, x0))
>  
>  /*
> -** mul_2_s16_m_tied1:
> -**   mov     (z[0-9]+\.h), #2
> +** mul_4dupop1_s16_m_tied1:
> +**   mov     (z[0-9]+)\.h, #4
> +**   mov     (z[0-9]+)\.d, z0\.d
> +**   movprfx z0, \1
> +**   mul     z0\.h, p0/m, z0\.h, \2\.h
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s16_m_tied1, svint16_t,
> +             z0 = svmul_m (p0, svdup_s16 (4), z0),
> +             z0 = svmul_m (p0, svdup_s16 (4), z0))

Sorry for only noticing this now, but: the naming scheme was intended
to be that "tied1" meant "the result is in the same register as op1/
the first data argument" and that "tied2" meant "the result is in the
same register as op2/the second data argument".  This isn't documented
anywhere, so there was no way of knowing. :(

So I think this should be tied2 rather than tied1.

> +
> +/*
> +** mul_4dupop1ptrue_s16_m_tied1:
> +**   lsl     z0\.h, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s16_m_tied1, svint16_t,
> +             z0 = svmul_m (svptrue_b16 (), svdup_s16 (4), z0),
> +             z0 = svmul_m (svptrue_b16 (), svdup_s16 (4), z0))

Similarly here, for the z and x variants, and for the correspending
tests in other files.

OK for trunk with that change, thanks (no need for another review).

Richard

> +
> +/*
> +** mul_4dupop2_s16_m_tied1:
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s16_m_tied1, svint16_t,
> +             z0 = svmul_m (p0, z0, svdup_s16 (4)),
> +             z0 = svmul_m (p0, z0, svdup_s16 (4)))
> +
> +/*
> +** mul_4nop2_s16_m_tied1:
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s16_m_tied1, svint16_t,
> +             z0 = svmul_n_s16_m (p0, z0, 4),
> +             z0 = svmul_m (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s16_m_tied1:
> +**   lsl     z0\.h, p0/m, z0\.h, #14
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s16_m_tied1, svint16_t,
> +             z0 = svmul_n_s16_m (p0, z0, MAXPOW),
> +             z0 = svmul_m (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s16_m_tied1:
> +**   lsl     z0\.h, p0/m, z0\.h, #15
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s16_m_tied1, svint16_t,
> +             z0 = svmul_n_s16_m (p0, z0, INT16_MIN),
> +             z0 = svmul_m (p0, z0, INT16_MIN))
> +
> +/*
> +** mul_1_s16_m_tied1:
> +**   sel     z0\.h, p0, z0\.h, z0\.h
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s16_m_tied1, svint16_t,
> +             z0 = svmul_n_s16_m (p0, z0, 1),
> +             z0 = svmul_m (p0, z0, 1))
> +
> +/*
> +** mul_3_s16_m_tied1:
> +**   mov     (z[0-9]+\.h), #3
>  **   mul     z0\.h, p0/m, z0\.h, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s16_m_tied1, svint16_t,
> -             z0 = svmul_n_s16_m (p0, z0, 2),
> -             z0 = svmul_m (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_s16_m_tied1, svint16_t,
> +             z0 = svmul_n_s16_m (p0, z0, 3),
> +             z0 = svmul_m (p0, z0, 3))
>  
>  /*
> -** mul_2_s16_m_untied:
> -**   mov     (z[0-9]+\.h), #2
> +** mul_4dupop2_s16_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s16_m_untied, svint16_t,
> +             z0 = svmul_m (p0, z1, svdup_s16 (4)),
> +             z0 = svmul_m (p0, z1, svdup_s16 (4)))
> +
> +/*
> +** mul_4nop2_s16_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s16_m_untied, svint16_t,
> +             z0 = svmul_n_s16_m (p0, z1, 4),
> +             z0 = svmul_m (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s16_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.h, p0/m, z0\.h, #14
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s16_m_untied, svint16_t,
> +             z0 = svmul_n_s16_m (p0, z1, MAXPOW),
> +             z0 = svmul_m (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_s16_m_untied:
> +**   mov     (z[0-9]+\.h), #3
>  **   movprfx z0, z1
>  **   mul     z0\.h, p0/m, z0\.h, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s16_m_untied, svint16_t,
> -             z0 = svmul_n_s16_m (p0, z1, 2),
> -             z0 = svmul_m (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s16_m_untied, svint16_t,
> +             z0 = svmul_n_s16_m (p0, z1, 3),
> +             z0 = svmul_m (p0, z1, 3))
>  
>  /*
>  ** mul_m1_s16_m:
> @@ -147,19 +245,119 @@ TEST_UNIFORM_ZX (mul_w0_s16_z_untied, svint16_t, 
> int16_t,
>                z0 = svmul_z (p0, z1, x0))
>  
>  /*
> -** mul_2_s16_z_tied1:
> -**   mov     (z[0-9]+\.h), #2
> +** mul_4dupop1_s16_z_tied1:
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s16_z_tied1, svint16_t,
> +             z0 = svmul_z (p0, svdup_s16 (4), z0),
> +             z0 = svmul_z (p0, svdup_s16 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_s16_z_tied1:
> +**   lsl     z0\.h, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s16_z_tied1, svint16_t,
> +             z0 = svmul_z (svptrue_b16 (), svdup_s16 (4), z0),
> +             z0 = svmul_z (svptrue_b16 (), svdup_s16 (4), z0))
> +
> +/*
> +** mul_4dupop2_s16_z_tied1:
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s16_z_tied1, svint16_t,
> +             z0 = svmul_z (p0, z0, svdup_s16 (4)),
> +             z0 = svmul_z (p0, z0, svdup_s16 (4)))
> +
> +/*
> +** mul_4nop2_s16_z_tied1:
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s16_z_tied1, svint16_t,
> +             z0 = svmul_n_s16_z (p0, z0, 4),
> +             z0 = svmul_z (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s16_z_tied1:
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #14
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s16_z_tied1, svint16_t,
> +             z0 = svmul_n_s16_z (p0, z0, MAXPOW),
> +             z0 = svmul_z (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s16_z_tied1:
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #15
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s16_z_tied1, svint16_t,
> +             z0 = svmul_n_s16_z (p0, z0, INT16_MIN),
> +             z0 = svmul_z (p0, z0, INT16_MIN))
> +
> +/*
> +** mul_1_s16_z_tied1:
> +**   mov     z31.h, #1
> +**   movprfx z0.h, p0/z, z0.h
> +**   mul     z0.h, p0/m, z0.h, z31.h
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s16_z_tied1, svint16_t,
> +             z0 = svmul_n_s16_z (p0, z0, 1),
> +             z0 = svmul_z (p0, z0, 1))
> +
> +/*
> +** mul_3_s16_z_tied1:
> +**   mov     (z[0-9]+\.h), #3
>  **   movprfx z0\.h, p0/z, z0\.h
>  **   mul     z0\.h, p0/m, z0\.h, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s16_z_tied1, svint16_t,
> -             z0 = svmul_n_s16_z (p0, z0, 2),
> -             z0 = svmul_z (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_s16_z_tied1, svint16_t,
> +             z0 = svmul_n_s16_z (p0, z0, 3),
> +             z0 = svmul_z (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_s16_z_untied:
> +**   movprfx z0\.h, p0/z, z1\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s16_z_untied, svint16_t,
> +             z0 = svmul_z (p0, z1, svdup_s16 (4)),
> +             z0 = svmul_z (p0, z1, svdup_s16 (4)))
> +
> +/*
> +** mul_4nop2_s16_z_untied:
> +**   movprfx z0\.h, p0/z, z1\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s16_z_untied, svint16_t,
> +             z0 = svmul_n_s16_z (p0, z1, 4),
> +             z0 = svmul_z (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s16_z_untied:
> +**   movprfx z0\.h, p0/z, z1\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #14
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s16_z_untied, svint16_t,
> +             z0 = svmul_n_s16_z (p0, z1, MAXPOW),
> +             z0 = svmul_z (p0, z1, MAXPOW))
>  
>  /*
> -** mul_2_s16_z_untied:
> -**   mov     (z[0-9]+\.h), #2
> +** mul_3_s16_z_untied:
> +**   mov     (z[0-9]+\.h), #3
>  ** (
>  **   movprfx z0\.h, p0/z, z1\.h
>  **   mul     z0\.h, p0/m, z0\.h, \1
> @@ -169,9 +367,9 @@ TEST_UNIFORM_Z (mul_2_s16_z_tied1, svint16_t,
>  ** )
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s16_z_untied, svint16_t,
> -             z0 = svmul_n_s16_z (p0, z1, 2),
> -             z0 = svmul_z (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s16_z_untied, svint16_t,
> +             z0 = svmul_n_s16_z (p0, z1, 3),
> +             z0 = svmul_z (p0, z1, 3))
>  
>  /*
>  ** mul_s16_x_tied1:
> @@ -227,23 +425,112 @@ TEST_UNIFORM_ZX (mul_w0_s16_x_untied, svint16_t, 
> int16_t,
>                z0 = svmul_x (p0, z1, x0))
>  
>  /*
> -** mul_2_s16_x_tied1:
> -**   mul     z0\.h, z0\.h, #2
> +** mul_4dupop1_s16_x_tied1:
> +**   lsl     z0\.h, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s16_x_tied1, svint16_t,
> +             z0 = svmul_x (p0, svdup_s16 (4), z0),
> +             z0 = svmul_x (p0, svdup_s16 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_s16_x_tied1:
> +**   lsl     z0\.h, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s16_x_tied1, svint16_t,
> +             z0 = svmul_x (svptrue_b16 (), svdup_s16 (4), z0),
> +             z0 = svmul_x (svptrue_b16 (), svdup_s16 (4), z0))
> +
> +/*
> +** mul_4dupop2_s16_x_tied1:
> +**   lsl     z0\.h, z0\.h, #2
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s16_x_tied1, svint16_t,
> -             z0 = svmul_n_s16_x (p0, z0, 2),
> -             z0 = svmul_x (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_4dupop2_s16_x_tied1, svint16_t,
> +             z0 = svmul_x (p0, z0, svdup_s16 (4)),
> +             z0 = svmul_x (p0, z0, svdup_s16 (4)))
>  
>  /*
> -** mul_2_s16_x_untied:
> +** mul_4nop2_s16_x_tied1:
> +**   lsl     z0\.h, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s16_x_tied1, svint16_t,
> +             z0 = svmul_n_s16_x (p0, z0, 4),
> +             z0 = svmul_x (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s16_x_tied1:
> +**   lsl     z0\.h, z0\.h, #14
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s16_x_tied1, svint16_t,
> +             z0 = svmul_n_s16_x (p0, z0, MAXPOW),
> +             z0 = svmul_x (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s16_x_tied1:
> +**   lsl     z0\.h, z0\.h, #15
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s16_x_tied1, svint16_t,
> +             z0 = svmul_n_s16_x (p0, z0, INT16_MIN),
> +             z0 = svmul_x (p0, z0, INT16_MIN))
> +
> +/*
> +** mul_1_s16_x_tied1:
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s16_x_tied1, svint16_t,
> +             z0 = svmul_n_s16_x (p0, z0, 1),
> +             z0 = svmul_x (p0, z0, 1))
> +
> +/*
> +** mul_3_s16_x_tied1:
> +**   mul     z0\.h, z0\.h, #3
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_s16_x_tied1, svint16_t,
> +             z0 = svmul_n_s16_x (p0, z0, 3),
> +             z0 = svmul_x (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_s16_x_untied:
> +**   lsl     z0\.h, z1\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s16_x_untied, svint16_t,
> +             z0 = svmul_x (p0, z1, svdup_s16 (4)),
> +             z0 = svmul_x (p0, z1, svdup_s16 (4)))
> +
> +/*
> +** mul_4nop2_s16_x_untied:
> +**   lsl     z0\.h, z1\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s16_x_untied, svint16_t,
> +             z0 = svmul_n_s16_x (p0, z1, 4),
> +             z0 = svmul_x (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s16_x_untied:
> +**   lsl     z0\.h, z1\.h, #14
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s16_x_untied, svint16_t,
> +             z0 = svmul_n_s16_x (p0, z1, MAXPOW),
> +             z0 = svmul_x (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_s16_x_untied:
>  **   movprfx z0, z1
> -**   mul     z0\.h, z0\.h, #2
> +**   mul     z0\.h, z0\.h, #3
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s16_x_untied, svint16_t,
> -             z0 = svmul_n_s16_x (p0, z1, 2),
> -             z0 = svmul_x (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s16_x_untied, svint16_t,
> +             z0 = svmul_n_s16_x (p0, z1, 3),
> +             z0 = svmul_x (p0, z1, 3))
>  
>  /*
>  ** mul_127_s16_x:
> @@ -256,8 +543,7 @@ TEST_UNIFORM_Z (mul_127_s16_x, svint16_t,
>  
>  /*
>  ** mul_128_s16_x:
> -**   mov     (z[0-9]+\.h), #128
> -**   mul     z0\.h, p0/m, z0\.h, \1
> +**   lsl     z0\.h, z0\.h, #7
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_128_s16_x, svint16_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c
> index 01c224932d9..aa91824a30d 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c
> @@ -2,6 +2,8 @@
>  
>  #include "test_sve_acle.h"
>  
> +#define MAXPOW 1ULL<<30
> +
>  /*
>  ** mul_s32_m_tied1:
>  **   mul     z0\.s, p0/m, z0\.s, z1\.s
> @@ -54,25 +56,121 @@ TEST_UNIFORM_ZX (mul_w0_s32_m_untied, svint32_t, int32_t,
>                z0 = svmul_m (p0, z1, x0))
>  
>  /*
> -** mul_2_s32_m_tied1:
> -**   mov     (z[0-9]+\.s), #2
> +** mul_4dupop1_s32_m_tied1:
> +**   mov     (z[0-9]+)\.s, #4
> +**   mov     (z[0-9]+)\.d, z0\.d
> +**   movprfx z0, \1
> +**   mul     z0\.s, p0/m, z0\.s, \2\.s
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s32_m_tied1, svint32_t,
> +             z0 = svmul_m (p0, svdup_s32 (4), z0),
> +             z0 = svmul_m (p0, svdup_s32 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_s32_m_tied1:
> +**   lsl     z0\.s, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s32_m_tied1, svint32_t,
> +             z0 = svmul_m (svptrue_b32 (), svdup_s32 (4), z0),
> +             z0 = svmul_m (svptrue_b32 (), svdup_s32 (4), z0))
> +
> +/*
> +** mul_4dupop2_s32_m_tied1:
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s32_m_tied1, svint32_t,
> +             z0 = svmul_m (p0, z0, svdup_s32 (4)),
> +             z0 = svmul_m (p0, z0, svdup_s32 (4)))
> +
> +/*
> +** mul_4nop2_s32_m_tied1:
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s32_m_tied1, svint32_t,
> +             z0 = svmul_n_s32_m (p0, z0, 4),
> +             z0 = svmul_m (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s32_m_tied1:
> +**   lsl     z0\.s, p0/m, z0\.s, #30
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s32_m_tied1, svint32_t,
> +             z0 = svmul_n_s32_m (p0, z0, MAXPOW),
> +             z0 = svmul_m (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s32_m_tied1:
> +**   lsl     z0\.s, p0/m, z0\.s, #31
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s32_m_tied1, svint32_t,
> +             z0 = svmul_n_s32_m (p0, z0, INT32_MIN),
> +             z0 = svmul_m (p0, z0, INT32_MIN))
> +
> +/*
> +** mul_1_s32_m_tied1:
> +**   sel     z0\.s, p0, z0\.s, z0\.s
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s32_m_tied1, svint32_t,
> +             z0 = svmul_n_s32_m (p0, z0, 1),
> +             z0 = svmul_m (p0, z0, 1))
> +
> +/*
> +** mul_3_s32_m_tied1:
> +**   mov     (z[0-9]+\.s), #3
>  **   mul     z0\.s, p0/m, z0\.s, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s32_m_tied1, svint32_t,
> -             z0 = svmul_n_s32_m (p0, z0, 2),
> -             z0 = svmul_m (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_s32_m_tied1, svint32_t,
> +             z0 = svmul_n_s32_m (p0, z0, 3),
> +             z0 = svmul_m (p0, z0, 3))
>  
>  /*
> -** mul_2_s32_m_untied:
> -**   mov     (z[0-9]+\.s), #2
> +** mul_4dupop2_s32_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s32_m_untied, svint32_t,
> +             z0 = svmul_m (p0, z1, svdup_s32 (4)),
> +             z0 = svmul_m (p0, z1, svdup_s32 (4)))
> +
> +/*
> +** mul_4nop2_s32_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s32_m_untied, svint32_t,
> +             z0 = svmul_n_s32_m (p0, z1, 4),
> +             z0 = svmul_m (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s32_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.s, p0/m, z0\.s, #30
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s32_m_untied, svint32_t,
> +             z0 = svmul_n_s32_m (p0, z1, MAXPOW),
> +             z0 = svmul_m (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_s32_m_untied:
> +**   mov     (z[0-9]+\.s), #3
>  **   movprfx z0, z1
>  **   mul     z0\.s, p0/m, z0\.s, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s32_m_untied, svint32_t,
> -             z0 = svmul_n_s32_m (p0, z1, 2),
> -             z0 = svmul_m (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s32_m_untied, svint32_t,
> +             z0 = svmul_n_s32_m (p0, z1, 3),
> +             z0 = svmul_m (p0, z1, 3))
>  
>  /*
>  ** mul_m1_s32_m:
> @@ -147,19 +245,119 @@ TEST_UNIFORM_ZX (mul_w0_s32_z_untied, svint32_t, 
> int32_t,
>                z0 = svmul_z (p0, z1, x0))
>  
>  /*
> -** mul_2_s32_z_tied1:
> -**   mov     (z[0-9]+\.s), #2
> +** mul_4dupop1_s32_z_tied1:
> +**   movprfx z0\.s, p0/z, z0\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s32_z_tied1, svint32_t,
> +             z0 = svmul_z (p0, svdup_s32 (4), z0),
> +             z0 = svmul_z (p0, svdup_s32 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_s32_z_tied1:
> +**   lsl     z0\.s, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s32_z_tied1, svint32_t,
> +             z0 = svmul_z (svptrue_b32 (), svdup_s32 (4), z0),
> +             z0 = svmul_z (svptrue_b32 (), svdup_s32 (4), z0))
> +
> +/*
> +** mul_4dupop2_s32_z_tied1:
> +**   movprfx z0\.s, p0/z, z0\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s32_z_tied1, svint32_t,
> +             z0 = svmul_z (p0, z0, svdup_s32 (4)),
> +             z0 = svmul_z (p0, z0, svdup_s32 (4)))
> +
> +/*
> +** mul_4nop2_s32_z_tied1:
> +**   movprfx z0\.s, p0/z, z0\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s32_z_tied1, svint32_t,
> +             z0 = svmul_n_s32_z (p0, z0, 4),
> +             z0 = svmul_z (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s32_z_tied1:
> +**   movprfx z0\.s, p0/z, z0\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #30
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s32_z_tied1, svint32_t,
> +             z0 = svmul_n_s32_z (p0, z0, MAXPOW),
> +             z0 = svmul_z (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s32_z_tied1:
> +**   movprfx z0\.s, p0/z, z0\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #31
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s32_z_tied1, svint32_t,
> +             z0 = svmul_n_s32_z (p0, z0, INT32_MIN),
> +             z0 = svmul_z (p0, z0, INT32_MIN))
> +
> +/*
> +** mul_1_s32_z_tied1:
> +**   mov     z31.s, #1
> +**   movprfx z0.s, p0/z, z0.s
> +**   mul     z0.s, p0/m, z0.s, z31.s
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s32_z_tied1, svint32_t,
> +             z0 = svmul_n_s32_z (p0, z0, 1),
> +             z0 = svmul_z (p0, z0, 1))
> +
> +/*
> +** mul_3_s32_z_tied1:
> +**   mov     (z[0-9]+\.s), #3
>  **   movprfx z0\.s, p0/z, z0\.s
>  **   mul     z0\.s, p0/m, z0\.s, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s32_z_tied1, svint32_t,
> -             z0 = svmul_n_s32_z (p0, z0, 2),
> -             z0 = svmul_z (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_s32_z_tied1, svint32_t,
> +             z0 = svmul_n_s32_z (p0, z0, 3),
> +             z0 = svmul_z (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_s32_z_untied:
> +**   movprfx z0\.s, p0/z, z1\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s32_z_untied, svint32_t,
> +             z0 = svmul_z (p0, z1, svdup_s32 (4)),
> +             z0 = svmul_z (p0, z1, svdup_s32 (4)))
> +
> +/*
> +** mul_4nop2_s32_z_untied:
> +**   movprfx z0\.s, p0/z, z1\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s32_z_untied, svint32_t,
> +             z0 = svmul_n_s32_z (p0, z1, 4),
> +             z0 = svmul_z (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s32_z_untied:
> +**   movprfx z0\.s, p0/z, z1\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #30
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s32_z_untied, svint32_t,
> +             z0 = svmul_n_s32_z (p0, z1, MAXPOW),
> +             z0 = svmul_z (p0, z1, MAXPOW))
>  
>  /*
> -** mul_2_s32_z_untied:
> -**   mov     (z[0-9]+\.s), #2
> +** mul_3_s32_z_untied:
> +**   mov     (z[0-9]+\.s), #3
>  ** (
>  **   movprfx z0\.s, p0/z, z1\.s
>  **   mul     z0\.s, p0/m, z0\.s, \1
> @@ -169,9 +367,9 @@ TEST_UNIFORM_Z (mul_2_s32_z_tied1, svint32_t,
>  ** )
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s32_z_untied, svint32_t,
> -             z0 = svmul_n_s32_z (p0, z1, 2),
> -             z0 = svmul_z (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s32_z_untied, svint32_t,
> +             z0 = svmul_n_s32_z (p0, z1, 3),
> +             z0 = svmul_z (p0, z1, 3))
>  
>  /*
>  ** mul_s32_x_tied1:
> @@ -227,23 +425,112 @@ TEST_UNIFORM_ZX (mul_w0_s32_x_untied, svint32_t, 
> int32_t,
>                z0 = svmul_x (p0, z1, x0))
>  
>  /*
> -** mul_2_s32_x_tied1:
> -**   mul     z0\.s, z0\.s, #2
> +** mul_4dupop1_s32_x_tied1:
> +**   lsl     z0\.s, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s32_x_tied1, svint32_t,
> +             z0 = svmul_x (p0, svdup_s32 (4), z0),
> +             z0 = svmul_x (p0, svdup_s32 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_s32_x_tied1:
> +**   lsl     z0\.s, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s32_x_tied1, svint32_t,
> +             z0 = svmul_x (svptrue_b32 (), svdup_s32 (4), z0),
> +             z0 = svmul_x (svptrue_b32 (), svdup_s32 (4), z0))
> +
> +/*
> +** mul_4dupop2_s32_x_tied1:
> +**   lsl     z0\.s, z0\.s, #2
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s32_x_tied1, svint32_t,
> -             z0 = svmul_n_s32_x (p0, z0, 2),
> -             z0 = svmul_x (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_4dupop2_s32_x_tied1, svint32_t,
> +             z0 = svmul_x (p0, z0, svdup_s32 (4)),
> +             z0 = svmul_x (p0, z0, svdup_s32 (4)))
>  
>  /*
> -** mul_2_s32_x_untied:
> +** mul_4nop2_s32_x_tied1:
> +**   lsl     z0\.s, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s32_x_tied1, svint32_t,
> +             z0 = svmul_n_s32_x (p0, z0, 4),
> +             z0 = svmul_x (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s32_x_tied1:
> +**   lsl     z0\.s, z0\.s, #30
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s32_x_tied1, svint32_t,
> +             z0 = svmul_n_s32_x (p0, z0, MAXPOW),
> +             z0 = svmul_x (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s32_x_tied1:
> +**   lsl     z0\.s, z0\.s, #31
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s32_x_tied1, svint32_t,
> +             z0 = svmul_n_s32_x (p0, z0, INT32_MIN),
> +             z0 = svmul_x (p0, z0, INT32_MIN))
> +
> +/*
> +** mul_1_s32_x_tied1:
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s32_x_tied1, svint32_t,
> +             z0 = svmul_n_s32_x (p0, z0, 1),
> +             z0 = svmul_x (p0, z0, 1))
> +
> +/*
> +** mul_3_s32_x_tied1:
> +**   mul     z0\.s, z0\.s, #3
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_s32_x_tied1, svint32_t,
> +             z0 = svmul_n_s32_x (p0, z0, 3),
> +             z0 = svmul_x (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_s32_x_untied:
> +**   lsl     z0\.s, z1\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s32_x_untied, svint32_t,
> +             z0 = svmul_x (p0, z1, svdup_s32 (4)),
> +             z0 = svmul_x (p0, z1, svdup_s32 (4)))
> +
> +/*
> +** mul_4nop2_s32_x_untied:
> +**   lsl     z0\.s, z1\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s32_x_untied, svint32_t,
> +             z0 = svmul_n_s32_x (p0, z1, 4),
> +             z0 = svmul_x (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s32_x_untied:
> +**   lsl     z0\.s, z1\.s, #30
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s32_x_untied, svint32_t,
> +             z0 = svmul_n_s32_x (p0, z1, MAXPOW),
> +             z0 = svmul_x (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_s32_x_untied:
>  **   movprfx z0, z1
> -**   mul     z0\.s, z0\.s, #2
> +**   mul     z0\.s, z0\.s, #3
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s32_x_untied, svint32_t,
> -             z0 = svmul_n_s32_x (p0, z1, 2),
> -             z0 = svmul_x (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s32_x_untied, svint32_t,
> +             z0 = svmul_n_s32_x (p0, z1, 3),
> +             z0 = svmul_x (p0, z1, 3))
>  
>  /*
>  ** mul_127_s32_x:
> @@ -256,8 +543,7 @@ TEST_UNIFORM_Z (mul_127_s32_x, svint32_t,
>  
>  /*
>  ** mul_128_s32_x:
> -**   mov     (z[0-9]+\.s), #128
> -**   mul     z0\.s, p0/m, z0\.s, \1
> +**   lsl     z0\.s, z0\.s, #7
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_128_s32_x, svint32_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c
> index c3cf581a0a4..f82725973f8 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c
> @@ -2,6 +2,8 @@
>  
>  #include "test_sve_acle.h"
>  
> +#define MAXPOW 1ULL<<62
> +
>  /*
>  ** mul_s64_m_tied1:
>  **   mul     z0\.d, p0/m, z0\.d, z1\.d
> @@ -53,10 +55,75 @@ TEST_UNIFORM_ZX (mul_x0_s64_m_untied, svint64_t, int64_t,
>                z0 = svmul_n_s64_m (p0, z1, x0),
>                z0 = svmul_m (p0, z1, x0))
>  
> +/*
> +** mul_4dupop1_s64_m_tied1:
> +**   mov     (z[0-9]+)\.d, #4
> +**   mov     (z[0-9]+\.d), z0\.d
> +**   movprfx z0, \1
> +**   mul     z0\.d, p0/m, z0\.d, \2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s64_m_tied1, svint64_t,
> +             z0 = svmul_m (p0, svdup_s64 (4), z0),
> +             z0 = svmul_m (p0, svdup_s64 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_s64_m_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s64_m_tied1, svint64_t,
> +             z0 = svmul_m (svptrue_b64 (), svdup_s64 (4), z0),
> +             z0 = svmul_m (svptrue_b64 (), svdup_s64 (4), z0))
> +
> +/*
> +** mul_4dupop2_s64_m_tied1:
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s64_m_tied1, svint64_t,
> +             z0 = svmul_m (p0, z0, svdup_s64 (4)),
> +             z0 = svmul_m (p0, z0, svdup_s64 (4)))
> +
> +/*
> +** mul_4nop2_s64_m_tied1:
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s64_m_tied1, svint64_t,
> +             z0 = svmul_n_s64_m (p0, z0, 4),
> +             z0 = svmul_m (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s64_m_tied1:
> +**   lsl     z0\.d, p0/m, z0\.d, #62
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s64_m_tied1, svint64_t,
> +             z0 = svmul_n_s64_m (p0, z0, MAXPOW),
> +             z0 = svmul_m (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s64_m_tied1:
> +**   lsl     z0\.d, p0/m, z0\.d, #63
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s64_m_tied1, svint64_t,
> +             z0 = svmul_n_s64_m (p0, z0, INT64_MIN),
> +             z0 = svmul_m (p0, z0, INT64_MIN))
> +
> +/*
> +** mul_1_s64_m_tied1:
> +**   sel     z0\.d, p0, z0\.d, z0\.d
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s64_m_tied1, svint64_t,
> +             z0 = svmul_n_s64_m (p0, z0, 1),
> +             z0 = svmul_m (p0, z0, 1))
> +
>  /*
>  ** mul_2_s64_m_tied1:
> -**   mov     (z[0-9]+\.d), #2
> -**   mul     z0\.d, p0/m, z0\.d, \1
> +**   lsl     z0\.d, p0/m, z0\.d, #1
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_2_s64_m_tied1, svint64_t,
> @@ -64,15 +131,55 @@ TEST_UNIFORM_Z (mul_2_s64_m_tied1, svint64_t,
>               z0 = svmul_m (p0, z0, 2))
>  
>  /*
> -** mul_2_s64_m_untied:
> -**   mov     (z[0-9]+\.d), #2
> +** mul_3_s64_m_tied1:
> +**   mov     (z[0-9]+\.d), #3
> +**   mul     z0\.d, p0/m, z0\.d, \1
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_s64_m_tied1, svint64_t,
> +             z0 = svmul_n_s64_m (p0, z0, 3),
> +             z0 = svmul_m (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_s64_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s64_m_untied, svint64_t,
> +             z0 = svmul_m (p0, z1, svdup_s64 (4)),
> +             z0 = svmul_m (p0, z1, svdup_s64 (4)))
> +
> +/*
> +** mul_4nop2_s64_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s64_m_untied, svint64_t,
> +             z0 = svmul_n_s64_m (p0, z1, 4),
> +             z0 = svmul_m (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s64_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.d, p0/m, z0\.d, #62
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s64_m_untied, svint64_t,
> +             z0 = svmul_n_s64_m (p0, z1, MAXPOW),
> +             z0 = svmul_m (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_s64_m_untied:
> +**   mov     (z[0-9]+\.d), #3
>  **   movprfx z0, z1
>  **   mul     z0\.d, p0/m, z0\.d, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s64_m_untied, svint64_t,
> -             z0 = svmul_n_s64_m (p0, z1, 2),
> -             z0 = svmul_m (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s64_m_untied, svint64_t,
> +             z0 = svmul_n_s64_m (p0, z1, 3),
> +             z0 = svmul_m (p0, z1, 3))
>  
>  /*
>  ** mul_m1_s64_m:
> @@ -147,10 +254,79 @@ TEST_UNIFORM_ZX (mul_x0_s64_z_untied, svint64_t, 
> int64_t,
>                z0 = svmul_z (p0, z1, x0))
>  
>  /*
> -** mul_2_s64_z_tied1:
> -**   mov     (z[0-9]+\.d), #2
> +** mul_4dupop1_s64_z_tied1:
>  **   movprfx z0\.d, p0/z, z0\.d
> -**   mul     z0\.d, p0/m, z0\.d, \1
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s64_z_tied1, svint64_t,
> +             z0 = svmul_z (p0, svdup_s64 (4), z0),
> +             z0 = svmul_z (p0, svdup_s64 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_s64_z_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s64_z_tied1, svint64_t,
> +             z0 = svmul_z (svptrue_b64 (), svdup_s64 (4), z0),
> +             z0 = svmul_z (svptrue_b64 (), svdup_s64 (4), z0))
> +
> +/*
> +** mul_4dupop2_s64_z_tied1:
> +**   movprfx z0\.d, p0/z, z0\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s64_z_tied1, svint64_t,
> +             z0 = svmul_z (p0, z0, svdup_s64 (4)),
> +             z0 = svmul_z (p0, z0, svdup_s64 (4)))
> +
> +/*
> +** mul_4nop2_s64_z_tied1:
> +**   movprfx z0\.d, p0/z, z0\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s64_z_tied1, svint64_t,
> +             z0 = svmul_n_s64_z (p0, z0, 4),
> +             z0 = svmul_z (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s64_z_tied1:
> +**   movprfx z0\.d, p0/z, z0\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #62
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s64_z_tied1, svint64_t,
> +             z0 = svmul_n_s64_z (p0, z0, MAXPOW),
> +             z0 = svmul_z (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s64_z_tied1:
> +**   movprfx z0\.d, p0/z, z0\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #63
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s64_z_tied1, svint64_t,
> +             z0 = svmul_n_s64_z (p0, z0, INT64_MIN),
> +             z0 = svmul_z (p0, z0, INT64_MIN))
> +
> +/*
> +** mul_1_s64_z_tied1:
> +**   mov     z31.d, #1
> +**   movprfx z0.d, p0/z, z0.d
> +**   mul     z0.d, p0/m, z0.d, z31.d
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s64_z_tied1, svint64_t,
> +             z0 = svmul_n_s64_z (p0, z0, 1),
> +             z0 = svmul_z (p0, z0, 1))
> +
> +/*
> +** mul_2_s64_z_tied1:
> +**   movprfx z0.d, p0/z, z0.d
> +**   lsl     z0.d, p0/m, z0.d, #1
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_2_s64_z_tied1, svint64_t,
> @@ -158,8 +334,49 @@ TEST_UNIFORM_Z (mul_2_s64_z_tied1, svint64_t,
>               z0 = svmul_z (p0, z0, 2))
>  
>  /*
> -** mul_2_s64_z_untied:
> -**   mov     (z[0-9]+\.d), #2
> +** mul_3_s64_z_tied1:
> +**   mov     (z[0-9]+\.d), #3
> +**   movprfx z0\.d, p0/z, z0\.d
> +**   mul     z0\.d, p0/m, z0\.d, \1
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_s64_z_tied1, svint64_t,
> +             z0 = svmul_n_s64_z (p0, z0, 3),
> +             z0 = svmul_z (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_s64_z_untied:
> +**   movprfx z0\.d, p0/z, z1\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s64_z_untied, svint64_t,
> +             z0 = svmul_z (p0, z1, svdup_s64 (4)),
> +             z0 = svmul_z (p0, z1, svdup_s64 (4)))
> +
> +/*
> +** mul_4nop2_s64_z_untied:
> +**   movprfx z0\.d, p0/z, z1\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s64_z_untied, svint64_t,
> +             z0 = svmul_n_s64_z (p0, z1, 4),
> +             z0 = svmul_z (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s64_z_untied:
> +**   movprfx z0\.d, p0/z, z1\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #62
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s64_z_untied, svint64_t,
> +             z0 = svmul_n_s64_z (p0, z1, MAXPOW),
> +             z0 = svmul_z (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_s64_z_untied:
> +**   mov     (z[0-9]+\.d), #3
>  ** (
>  **   movprfx z0\.d, p0/z, z1\.d
>  **   mul     z0\.d, p0/m, z0\.d, \1
> @@ -169,9 +386,9 @@ TEST_UNIFORM_Z (mul_2_s64_z_tied1, svint64_t,
>  ** )
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s64_z_untied, svint64_t,
> -             z0 = svmul_n_s64_z (p0, z1, 2),
> -             z0 = svmul_z (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s64_z_untied, svint64_t,
> +             z0 = svmul_n_s64_z (p0, z1, 3),
> +             z0 = svmul_z (p0, z1, 3))
>  
>  /*
>  ** mul_s64_x_tied1:
> @@ -226,9 +443,71 @@ TEST_UNIFORM_ZX (mul_x0_s64_x_untied, svint64_t, int64_t,
>                z0 = svmul_n_s64_x (p0, z1, x0),
>                z0 = svmul_x (p0, z1, x0))
>  
> +/*
> +** mul_4dupop1_s64_x_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s64_x_tied1, svint64_t,
> +             z0 = svmul_x (p0, svdup_s64 (4), z0),
> +             z0 = svmul_x (p0, svdup_s64 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_s64_x_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s64_x_tied1, svint64_t,
> +             z0 = svmul_x (svptrue_b64 (), svdup_s64 (4), z0),
> +             z0 = svmul_x (svptrue_b64 (), svdup_s64 (4), z0))
> +
> +/*
> +** mul_4dupop2_s64_x_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s64_x_tied1, svint64_t,
> +             z0 = svmul_x (p0, z0, svdup_s64 (4)),
> +             z0 = svmul_x (p0, z0, svdup_s64 (4)))
> +
> +/*
> +** mul_4nop2_s64_x_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s64_x_tied1, svint64_t,
> +             z0 = svmul_n_s64_x (p0, z0, 4),
> +             z0 = svmul_x (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s64_x_tied1:
> +**   lsl     z0\.d, z0\.d, #62
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s64_x_tied1, svint64_t,
> +             z0 = svmul_n_s64_x (p0, z0, MAXPOW),
> +             z0 = svmul_x (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s64_x_tied1:
> +**   lsl     z0\.d, z0\.d, #63
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s64_x_tied1, svint64_t,
> +             z0 = svmul_n_s64_x (p0, z0, INT64_MIN),
> +             z0 = svmul_x (p0, z0, INT64_MIN))
> +
> +/*
> +** mul_1_s64_x_tied1:
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s64_x_tied1, svint64_t,
> +             z0 = svmul_n_s64_x (p0, z0, 1),
> +             z0 = svmul_x (p0, z0, 1))
> +
>  /*
>  ** mul_2_s64_x_tied1:
> -**   mul     z0\.d, z0\.d, #2
> +**   add     z0\.d, z0\.d, z0\.d
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_2_s64_x_tied1, svint64_t,
> @@ -236,14 +515,50 @@ TEST_UNIFORM_Z (mul_2_s64_x_tied1, svint64_t,
>               z0 = svmul_x (p0, z0, 2))
>  
>  /*
> -** mul_2_s64_x_untied:
> +** mul_3_s64_x_tied1:
> +**   mul     z0\.d, z0\.d, #3
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_s64_x_tied1, svint64_t,
> +             z0 = svmul_n_s64_x (p0, z0, 3),
> +             z0 = svmul_x (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_s64_x_untied:
> +**   lsl     z0\.d, z1\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s64_x_untied, svint64_t,
> +             z0 = svmul_x (p0, z1, svdup_s64 (4)),
> +             z0 = svmul_x (p0, z1, svdup_s64 (4)))
> +
> +/*
> +** mul_4nop2_s64_x_untied:
> +**   lsl     z0\.d, z1\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s64_x_untied, svint64_t,
> +             z0 = svmul_n_s64_x (p0, z1, 4),
> +             z0 = svmul_x (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s64_x_untied:
> +**   lsl     z0\.d, z1\.d, #62
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s64_x_untied, svint64_t,
> +             z0 = svmul_n_s64_x (p0, z1, MAXPOW),
> +             z0 = svmul_x (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_s64_x_untied:
>  **   movprfx z0, z1
> -**   mul     z0\.d, z0\.d, #2
> +**   mul     z0\.d, z0\.d, #3
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s64_x_untied, svint64_t,
> -             z0 = svmul_n_s64_x (p0, z1, 2),
> -             z0 = svmul_x (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s64_x_untied, svint64_t,
> +             z0 = svmul_n_s64_x (p0, z1, 3),
> +             z0 = svmul_x (p0, z1, 3))
>  
>  /*
>  ** mul_127_s64_x:
> @@ -256,8 +571,7 @@ TEST_UNIFORM_Z (mul_127_s64_x, svint64_t,
>  
>  /*
>  ** mul_128_s64_x:
> -**   mov     (z[0-9]+\.d), #128
> -**   mul     z0\.d, p0/m, z0\.d, \1
> +**   lsl     z0\.d, z0\.d, #7
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_128_s64_x, svint64_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c
> index 4ac4c8eeb2a..ee06e73f87f 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c
> @@ -2,6 +2,8 @@
>  
>  #include "test_sve_acle.h"
>  
> +#define MAXPOW 1<<6
> +
>  /*
>  ** mul_s8_m_tied1:
>  **   mul     z0\.b, p0/m, z0\.b, z1\.b
> @@ -54,30 +56,126 @@ TEST_UNIFORM_ZX (mul_w0_s8_m_untied, svint8_t, int8_t,
>                z0 = svmul_m (p0, z1, x0))
>  
>  /*
> -** mul_2_s8_m_tied1:
> -**   mov     (z[0-9]+\.b), #2
> +** mul_4dupop1_s8_m_tied1:
> +**   mov     (z[0-9]+)\.b, #4
> +**   mov     (z[0-9]+)\.d, z0\.d
> +**   movprfx z0, \1
> +**   mul     z0\.b, p0/m, z0\.b, \2\.b
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s8_m_tied1, svint8_t,
> +             z0 = svmul_m (p0, svdup_s8 (4), z0),
> +             z0 = svmul_m (p0, svdup_s8 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_s8_m_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s8_m_tied1, svint8_t,
> +             z0 = svmul_m (svptrue_b8 (), svdup_s8 (4), z0),
> +             z0 = svmul_m (svptrue_b8 (), svdup_s8 (4), z0))
> +
> +/*
> +** mul_4dupop2_s8_m_tied1:
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s8_m_tied1, svint8_t,
> +             z0 = svmul_m (p0, z0, svdup_s8 (4)),
> +             z0 = svmul_m (p0, z0, svdup_s8 (4)))
> +
> +/*
> +** mul_4nop2_s8_m_tied1:
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s8_m_tied1, svint8_t,
> +             z0 = svmul_n_s8_m (p0, z0, 4),
> +             z0 = svmul_m (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s8_m_tied1:
> +**   lsl     z0\.b, p0/m, z0\.b, #6
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s8_m_tied1, svint8_t,
> +             z0 = svmul_n_s8_m (p0, z0, MAXPOW),
> +             z0 = svmul_m (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s8_m_tied1:
> +**   lsl     z0\.b, p0/m, z0\.b, #7
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s8_m_tied1, svint8_t,
> +             z0 = svmul_n_s8_m (p0, z0, INT8_MIN),
> +             z0 = svmul_m (p0, z0, INT8_MIN))
> +
> +/*
> +** mul_1_s8_m_tied1:
> +**   sel     z0\.b, p0, z0\.b, z0\.b
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s8_m_tied1, svint8_t,
> +             z0 = svmul_n_s8_m (p0, z0, 1),
> +             z0 = svmul_m (p0, z0, 1))
> +
> +/*
> +** mul_3_s8_m_tied1:
> +**   mov     (z[0-9]+\.b), #3
>  **   mul     z0\.b, p0/m, z0\.b, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s8_m_tied1, svint8_t,
> -             z0 = svmul_n_s8_m (p0, z0, 2),
> -             z0 = svmul_m (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_s8_m_tied1, svint8_t,
> +             z0 = svmul_n_s8_m (p0, z0, 3),
> +             z0 = svmul_m (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_s8_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s8_m_untied, svint8_t,
> +             z0 = svmul_m (p0, z1, svdup_s8 (4)),
> +             z0 = svmul_m (p0, z1, svdup_s8 (4)))
>  
>  /*
> -** mul_2_s8_m_untied:
> -**   mov     (z[0-9]+\.b), #2
> +** mul_4nop2_s8_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s8_m_untied, svint8_t,
> +             z0 = svmul_n_s8_m (p0, z1, 4),
> +             z0 = svmul_m (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s8_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.b, p0/m, z0\.b, #6
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s8_m_untied, svint8_t,
> +             z0 = svmul_n_s8_m (p0, z1, MAXPOW),
> +             z0 = svmul_m (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_s8_m_untied:
> +**   mov     (z[0-9]+\.b), #3
>  **   movprfx z0, z1
>  **   mul     z0\.b, p0/m, z0\.b, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s8_m_untied, svint8_t,
> -             z0 = svmul_n_s8_m (p0, z1, 2),
> -             z0 = svmul_m (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s8_m_untied, svint8_t,
> +             z0 = svmul_n_s8_m (p0, z1, 3),
> +             z0 = svmul_m (p0, z1, 3))
>  
>  /*
>  ** mul_m1_s8_m:
> -**   mov     (z[0-9]+\.b), #-1
> -**   mul     z0\.b, p0/m, z0\.b, \1
> +**   mov     (z[0-9]+)\.b, #-1
> +**   mul     z0\.b, p0/m, z0\.b, \1\.b
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_m1_s8_m, svint8_t,
> @@ -147,19 +245,119 @@ TEST_UNIFORM_ZX (mul_w0_s8_z_untied, svint8_t, int8_t,
>                z0 = svmul_z (p0, z1, x0))
>  
>  /*
> -** mul_2_s8_z_tied1:
> -**   mov     (z[0-9]+\.b), #2
> +** mul_4dupop1_s8_z_tied1:
> +**   movprfx z0\.b, p0/z, z0\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s8_z_tied1, svint8_t,
> +             z0 = svmul_z (p0, svdup_s8 (4), z0),
> +             z0 = svmul_z (p0, svdup_s8 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_s8_z_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s8_z_tied1, svint8_t,
> +             z0 = svmul_z (svptrue_b8 (), svdup_s8 (4), z0),
> +             z0 = svmul_z (svptrue_b8 (), svdup_s8 (4), z0))
> +
> +/*
> +** mul_4dupop2_s8_z_tied1:
> +**   movprfx z0\.b, p0/z, z0\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s8_z_tied1, svint8_t,
> +             z0 = svmul_z (p0, z0, svdup_s8 (4)),
> +             z0 = svmul_z (p0, z0, svdup_s8 (4)))
> +
> +/*
> +** mul_4nop2_s8_z_tied1:
> +**   movprfx z0\.b, p0/z, z0\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s8_z_tied1, svint8_t,
> +             z0 = svmul_n_s8_z (p0, z0, 4),
> +             z0 = svmul_z (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s8_z_tied1:
> +**   movprfx z0\.b, p0/z, z0\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #6
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s8_z_tied1, svint8_t,
> +             z0 = svmul_n_s8_z (p0, z0, MAXPOW),
> +             z0 = svmul_z (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s8_z_tied1:
> +**   movprfx z0\.b, p0/z, z0\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #7
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s8_z_tied1, svint8_t,
> +             z0 = svmul_n_s8_z (p0, z0, INT8_MIN),
> +             z0 = svmul_z (p0, z0, INT8_MIN))
> +
> +/*
> +** mul_1_s8_z_tied1:
> +**   mov     z31.b, #1
> +**   movprfx z0.b, p0/z, z0.b
> +**   mul     z0.b, p0/m, z0.b, z31.b
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s8_z_tied1, svint8_t,
> +             z0 = svmul_n_s8_z (p0, z0, 1),
> +             z0 = svmul_z (p0, z0, 1))
> +
> +/*
> +** mul_3_s8_z_tied1:
> +**   mov     (z[0-9]+\.b), #3
>  **   movprfx z0\.b, p0/z, z0\.b
>  **   mul     z0\.b, p0/m, z0\.b, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s8_z_tied1, svint8_t,
> -             z0 = svmul_n_s8_z (p0, z0, 2),
> -             z0 = svmul_z (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_s8_z_tied1, svint8_t,
> +             z0 = svmul_n_s8_z (p0, z0, 3),
> +             z0 = svmul_z (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_s8_z_untied:
> +**   movprfx z0\.b, p0/z, z1\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s8_z_untied, svint8_t,
> +             z0 = svmul_z (p0, z1, svdup_s8 (4)),
> +             z0 = svmul_z (p0, z1, svdup_s8 (4)))
>  
>  /*
> -** mul_2_s8_z_untied:
> -**   mov     (z[0-9]+\.b), #2
> +** mul_4nop2_s8_z_untied:
> +**   movprfx z0\.b, p0/z, z1\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s8_z_untied, svint8_t,
> +             z0 = svmul_n_s8_z (p0, z1, 4),
> +             z0 = svmul_z (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s8_z_untied:
> +**   movprfx z0\.b, p0/z, z1\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #6
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s8_z_untied, svint8_t,
> +             z0 = svmul_n_s8_z (p0, z1, MAXPOW),
> +             z0 = svmul_z (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_s8_z_untied:
> +**   mov     (z[0-9]+\.b), #3
>  ** (
>  **   movprfx z0\.b, p0/z, z1\.b
>  **   mul     z0\.b, p0/m, z0\.b, \1
> @@ -169,9 +367,9 @@ TEST_UNIFORM_Z (mul_2_s8_z_tied1, svint8_t,
>  ** )
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s8_z_untied, svint8_t,
> -             z0 = svmul_n_s8_z (p0, z1, 2),
> -             z0 = svmul_z (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s8_z_untied, svint8_t,
> +             z0 = svmul_n_s8_z (p0, z1, 3),
> +             z0 = svmul_z (p0, z1, 3))
>  
>  /*
>  ** mul_s8_x_tied1:
> @@ -227,23 +425,112 @@ TEST_UNIFORM_ZX (mul_w0_s8_x_untied, svint8_t, int8_t,
>                z0 = svmul_x (p0, z1, x0))
>  
>  /*
> -** mul_2_s8_x_tied1:
> -**   mul     z0\.b, z0\.b, #2
> +** mul_4dupop1_s8_x_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_s8_x_tied1, svint8_t,
> +             z0 = svmul_x (p0, svdup_s8 (4), z0),
> +             z0 = svmul_x (p0, svdup_s8 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_s8_x_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_s8_x_tied1, svint8_t,
> +             z0 = svmul_x (svptrue_b8 (), svdup_s8 (4), z0),
> +             z0 = svmul_x (svptrue_b8 (), svdup_s8 (4), z0))
> +
> +/*
> +** mul_4dupop2_s8_x_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s8_x_tied1, svint8_t,
> +             z0 = svmul_x (p0, z0, svdup_s8 (4)),
> +             z0 = svmul_x (p0, z0, svdup_s8 (4)))
> +
> +/*
> +** mul_4nop2_s8_x_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s8_x_tied1, svint8_t,
> +             z0 = svmul_n_s8_x (p0, z0, 4),
> +             z0 = svmul_x (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_s8_x_tied1:
> +**   lsl     z0\.b, z0\.b, #6
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_s8_x_tied1, svint8_t,
> +             z0 = svmul_n_s8_x (p0, z0, MAXPOW),
> +             z0 = svmul_x (p0, z0, MAXPOW))
> +
> +/*
> +** mul_intminnop2_s8_x_tied1:
> +**   lsl     z0\.b, z0\.b, #7
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_intminnop2_s8_x_tied1, svint8_t,
> +             z0 = svmul_n_s8_x (p0, z0, INT8_MIN),
> +             z0 = svmul_x (p0, z0, INT8_MIN))
> +
> +/*
> +** mul_1_s8_x_tied1:
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_s8_x_tied1, svint8_t,
> +             z0 = svmul_n_s8_x (p0, z0, 1),
> +             z0 = svmul_x (p0, z0, 1))
> +
> +/*
> +** mul_3_s8_x_tied1:
> +**   mul     z0\.b, z0\.b, #3
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_s8_x_tied1, svint8_t,
> +             z0 = svmul_n_s8_x (p0, z0, 3),
> +             z0 = svmul_x (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_s8_x_untied:
> +**   lsl     z0\.b, z1\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_s8_x_untied, svint8_t,
> +             z0 = svmul_x (p0, z1, svdup_s8 (4)),
> +             z0 = svmul_x (p0, z1, svdup_s8 (4)))
> +
> +/*
> +** mul_4nop2_s8_x_untied:
> +**   lsl     z0\.b, z1\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_s8_x_untied, svint8_t,
> +             z0 = svmul_n_s8_x (p0, z1, 4),
> +             z0 = svmul_x (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_s8_x_untied:
> +**   lsl     z0\.b, z1\.b, #6
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s8_x_tied1, svint8_t,
> -             z0 = svmul_n_s8_x (p0, z0, 2),
> -             z0 = svmul_x (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_maxpownop2_s8_x_untied, svint8_t,
> +             z0 = svmul_n_s8_x (p0, z1, MAXPOW),
> +             z0 = svmul_x (p0, z1, MAXPOW))
>  
>  /*
> -** mul_2_s8_x_untied:
> +** mul_3_s8_x_untied:
>  **   movprfx z0, z1
> -**   mul     z0\.b, z0\.b, #2
> +**   mul     z0\.b, z0\.b, #3
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_s8_x_untied, svint8_t,
> -             z0 = svmul_n_s8_x (p0, z1, 2),
> -             z0 = svmul_x (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_s8_x_untied, svint8_t,
> +             z0 = svmul_n_s8_x (p0, z1, 3),
> +             z0 = svmul_x (p0, z1, 3))
>  
>  /*
>  ** mul_127_s8_x:
> @@ -256,7 +543,7 @@ TEST_UNIFORM_Z (mul_127_s8_x, svint8_t,
>  
>  /*
>  ** mul_128_s8_x:
> -**   mul     z0\.b, z0\.b, #-128
> +**   lsl     z0\.b, z0\.b, #7
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_128_s8_x, svint8_t,
> @@ -292,7 +579,7 @@ TEST_UNIFORM_Z (mul_m127_s8_x, svint8_t,
>  
>  /*
>  ** mul_m128_s8_x:
> -**   mul     z0\.b, z0\.b, #-128
> +**   lsl     z0\.b, z0\.b, #7
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_m128_s8_x, svint8_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c
> index affee965005..39e1afc83f9 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c
> @@ -2,6 +2,8 @@
>  
>  #include "test_sve_acle.h"
>  
> +#define MAXPOW 1ULL<<15
> +
>  /*
>  ** mul_u16_m_tied1:
>  **   mul     z0\.h, p0/m, z0\.h, z1\.h
> @@ -54,25 +56,112 @@ TEST_UNIFORM_ZX (mul_w0_u16_m_untied, svuint16_t, 
> uint16_t,
>                z0 = svmul_m (p0, z1, x0))
>  
>  /*
> -** mul_2_u16_m_tied1:
> -**   mov     (z[0-9]+\.h), #2
> +** mul_4dupop1_u16_m_tied1:
> +**   mov     (z[0-9]+)\.h, #4
> +**   mov     (z[0-9]+)\.d, z0\.d
> +**   movprfx z0, \1
> +**   mul     z0\.h, p0/m, z0\.h, \2\.h
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u16_m_tied1, svuint16_t,
> +             z0 = svmul_m (p0, svdup_u16 (4), z0),
> +             z0 = svmul_m (p0, svdup_u16 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u16_m_tied1:
> +**   lsl     z0\.h, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u16_m_tied1, svuint16_t,
> +             z0 = svmul_m (svptrue_b16 (), svdup_u16 (4), z0),
> +             z0 = svmul_m (svptrue_b16 (), svdup_u16 (4), z0))
> +
> +/*
> +** mul_4dupop2_u16_m_tied1:
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u16_m_tied1, svuint16_t,
> +             z0 = svmul_m (p0, z0, svdup_u16 (4)),
> +             z0 = svmul_m (p0, z0, svdup_u16 (4)))
> +
> +/*
> +** mul_4nop2_u16_m_tied1:
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u16_m_tied1, svuint16_t,
> +             z0 = svmul_n_u16_m (p0, z0, 4),
> +             z0 = svmul_m (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u16_m_tied1:
> +**   lsl     z0\.h, p0/m, z0\.h, #15
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u16_m_tied1, svuint16_t,
> +             z0 = svmul_n_u16_m (p0, z0, MAXPOW),
> +             z0 = svmul_m (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u16_m_tied1:
> +**   sel     z0\.h, p0, z0\.h, z0\.h
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u16_m_tied1, svuint16_t,
> +             z0 = svmul_n_u16_m (p0, z0, 1),
> +             z0 = svmul_m (p0, z0, 1))
> +
> +/*
> +** mul_3_u16_m_tied1:
> +**   mov     (z[0-9]+\.h), #3
>  **   mul     z0\.h, p0/m, z0\.h, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u16_m_tied1, svuint16_t,
> -             z0 = svmul_n_u16_m (p0, z0, 2),
> -             z0 = svmul_m (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_u16_m_tied1, svuint16_t,
> +             z0 = svmul_n_u16_m (p0, z0, 3),
> +             z0 = svmul_m (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u16_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u16_m_untied, svuint16_t,
> +             z0 = svmul_m (p0, z1, svdup_u16 (4)),
> +             z0 = svmul_m (p0, z1, svdup_u16 (4)))
>  
>  /*
> -** mul_2_u16_m_untied:
> -**   mov     (z[0-9]+\.h), #2
> +** mul_4nop2_u16_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u16_m_untied, svuint16_t,
> +             z0 = svmul_n_u16_m (p0, z1, 4),
> +             z0 = svmul_m (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u16_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.h, p0/m, z0\.h, #15
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u16_m_untied, svuint16_t,
> +             z0 = svmul_n_u16_m (p0, z1, MAXPOW),
> +             z0 = svmul_m (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_u16_m_untied:
> +**   mov     (z[0-9]+\.h), #3
>  **   movprfx z0, z1
>  **   mul     z0\.h, p0/m, z0\.h, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u16_m_untied, svuint16_t,
> -             z0 = svmul_n_u16_m (p0, z1, 2),
> -             z0 = svmul_m (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u16_m_untied, svuint16_t,
> +             z0 = svmul_n_u16_m (p0, z1, 3),
> +             z0 = svmul_m (p0, z1, 3))
>  
>  /*
>  ** mul_m1_u16_m:
> @@ -147,19 +236,109 @@ TEST_UNIFORM_ZX (mul_w0_u16_z_untied, svuint16_t, 
> uint16_t,
>                z0 = svmul_z (p0, z1, x0))
>  
>  /*
> -** mul_2_u16_z_tied1:
> -**   mov     (z[0-9]+\.h), #2
> +** mul_4dupop1_u16_z_tied1:
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u16_z_tied1, svuint16_t,
> +             z0 = svmul_z (p0, svdup_u16 (4), z0),
> +             z0 = svmul_z (p0, svdup_u16 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u16_z_tied1:
> +**   lsl     z0\.h, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u16_z_tied1, svuint16_t,
> +             z0 = svmul_z (svptrue_b16 (), svdup_u16 (4), z0),
> +             z0 = svmul_z (svptrue_b16 (), svdup_u16 (4), z0))
> +
> +/*
> +** mul_4dupop2_u16_z_tied1:
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u16_z_tied1, svuint16_t,
> +             z0 = svmul_z (p0, z0, svdup_u16 (4)),
> +             z0 = svmul_z (p0, z0, svdup_u16 (4)))
> +
> +/*
> +** mul_4nop2_u16_z_tied1:
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u16_z_tied1, svuint16_t,
> +             z0 = svmul_n_u16_z (p0, z0, 4),
> +             z0 = svmul_z (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u16_z_tied1:
> +**   movprfx z0\.h, p0/z, z0\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #15
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u16_z_tied1, svuint16_t,
> +             z0 = svmul_n_u16_z (p0, z0, MAXPOW),
> +             z0 = svmul_z (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u16_z_tied1:
> +**   mov     z31.h, #1
> +**   movprfx z0.h, p0/z, z0.h
> +**   mul     z0.h, p0/m, z0.h, z31.h
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u16_z_tied1, svuint16_t,
> +             z0 = svmul_n_u16_z (p0, z0, 1),
> +             z0 = svmul_z (p0, z0, 1))
> +
> +/*
> +** mul_3_u16_z_tied1:
> +**   mov     (z[0-9]+\.h), #3
>  **   movprfx z0\.h, p0/z, z0\.h
>  **   mul     z0\.h, p0/m, z0\.h, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u16_z_tied1, svuint16_t,
> -             z0 = svmul_n_u16_z (p0, z0, 2),
> -             z0 = svmul_z (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_u16_z_tied1, svuint16_t,
> +             z0 = svmul_n_u16_z (p0, z0, 3),
> +             z0 = svmul_z (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u16_z_untied:
> +**   movprfx z0\.h, p0/z, z1\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u16_z_untied, svuint16_t,
> +             z0 = svmul_z (p0, z1, svdup_u16 (4)),
> +             z0 = svmul_z (p0, z1, svdup_u16 (4)))
> +
> +/*
> +** mul_4nop2_u16_z_untied:
> +**   movprfx z0\.h, p0/z, z1\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u16_z_untied, svuint16_t,
> +             z0 = svmul_n_u16_z (p0, z1, 4),
> +             z0 = svmul_z (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u16_z_untied:
> +**   movprfx z0\.h, p0/z, z1\.h
> +**   lsl     z0\.h, p0/m, z0\.h, #15
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u16_z_untied, svuint16_t,
> +             z0 = svmul_n_u16_z (p0, z1, MAXPOW),
> +             z0 = svmul_z (p0, z1, MAXPOW))
>  
>  /*
> -** mul_2_u16_z_untied:
> -**   mov     (z[0-9]+\.h), #2
> +** mul_3_u16_z_untied:
> +**   mov     (z[0-9]+\.h), #3
>  ** (
>  **   movprfx z0\.h, p0/z, z1\.h
>  **   mul     z0\.h, p0/m, z0\.h, \1
> @@ -169,9 +348,9 @@ TEST_UNIFORM_Z (mul_2_u16_z_tied1, svuint16_t,
>  ** )
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u16_z_untied, svuint16_t,
> -             z0 = svmul_n_u16_z (p0, z1, 2),
> -             z0 = svmul_z (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u16_z_untied, svuint16_t,
> +             z0 = svmul_n_u16_z (p0, z1, 3),
> +             z0 = svmul_z (p0, z1, 3))
>  
>  /*
>  ** mul_u16_x_tied1:
> @@ -227,23 +406,103 @@ TEST_UNIFORM_ZX (mul_w0_u16_x_untied, svuint16_t, 
> uint16_t,
>                z0 = svmul_x (p0, z1, x0))
>  
>  /*
> -** mul_2_u16_x_tied1:
> -**   mul     z0\.h, z0\.h, #2
> +** mul_4dupop1_u16_x_tied1:
> +**   lsl     z0\.h, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u16_x_tied1, svuint16_t,
> +             z0 = svmul_x (p0, svdup_u16 (4), z0),
> +             z0 = svmul_x (p0, svdup_u16 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u16_x_tied1:
> +**   lsl     z0\.h, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u16_x_tied1, svuint16_t,
> +             z0 = svmul_x (svptrue_b16 (), svdup_u16 (4), z0),
> +             z0 = svmul_x (svptrue_b16 (), svdup_u16 (4), z0))
> +
> +/*
> +** mul_4dupop2_u16_x_tied1:
> +**   lsl     z0\.h, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u16_x_tied1, svuint16_t,
> +             z0 = svmul_x (p0, z0, svdup_u16 (4)),
> +             z0 = svmul_x (p0, z0, svdup_u16 (4)))
> +
> +/*
> +** mul_4nop2_u16_x_tied1:
> +**   lsl     z0\.h, z0\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u16_x_tied1, svuint16_t,
> +             z0 = svmul_n_u16_x (p0, z0, 4),
> +             z0 = svmul_x (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u16_x_tied1:
> +**   lsl     z0\.h, z0\.h, #15
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u16_x_tied1, svuint16_t,
> +             z0 = svmul_n_u16_x (p0, z0, MAXPOW),
> +             z0 = svmul_x (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u16_x_tied1:
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u16_x_tied1, svuint16_t,
> +             z0 = svmul_n_u16_x (p0, z0, 1),
> +             z0 = svmul_x (p0, z0, 1))
> +
> +/*
> +** mul_3_u16_x_tied1:
> +**   mul     z0\.h, z0\.h, #3
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_u16_x_tied1, svuint16_t,
> +             z0 = svmul_n_u16_x (p0, z0, 3),
> +             z0 = svmul_x (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u16_x_untied:
> +**   lsl     z0\.h, z1\.h, #2
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u16_x_tied1, svuint16_t,
> -             z0 = svmul_n_u16_x (p0, z0, 2),
> -             z0 = svmul_x (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_4dupop2_u16_x_untied, svuint16_t,
> +             z0 = svmul_x (p0, z1, svdup_u16 (4)),
> +             z0 = svmul_x (p0, z1, svdup_u16 (4)))
>  
>  /*
> -** mul_2_u16_x_untied:
> +** mul_4nop2_u16_x_untied:
> +**   lsl     z0\.h, z1\.h, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u16_x_untied, svuint16_t,
> +             z0 = svmul_n_u16_x (p0, z1, 4),
> +             z0 = svmul_x (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u16_x_untied:
> +**   lsl     z0\.h, z1\.h, #15
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u16_x_untied, svuint16_t,
> +             z0 = svmul_n_u16_x (p0, z1, MAXPOW),
> +             z0 = svmul_x (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_u16_x_untied:
>  **   movprfx z0, z1
> -**   mul     z0\.h, z0\.h, #2
> +**   mul     z0\.h, z0\.h, #3
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u16_x_untied, svuint16_t,
> -             z0 = svmul_n_u16_x (p0, z1, 2),
> -             z0 = svmul_x (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u16_x_untied, svuint16_t,
> +             z0 = svmul_n_u16_x (p0, z1, 3),
> +             z0 = svmul_x (p0, z1, 3))
>  
>  /*
>  ** mul_127_u16_x:
> @@ -256,8 +515,7 @@ TEST_UNIFORM_Z (mul_127_u16_x, svuint16_t,
>  
>  /*
>  ** mul_128_u16_x:
> -**   mov     (z[0-9]+\.h), #128
> -**   mul     z0\.h, p0/m, z0\.h, \1
> +**   lsl     z0\.h, z0\.h, #7
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_128_u16_x, svuint16_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c
> index 38b4bc71b40..5f685c07d11 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c
> @@ -2,6 +2,8 @@
>  
>  #include "test_sve_acle.h"
>  
> +#define MAXPOW 1ULL<<31
> +
>  /*
>  ** mul_u32_m_tied1:
>  **   mul     z0\.s, p0/m, z0\.s, z1\.s
> @@ -54,25 +56,112 @@ TEST_UNIFORM_ZX (mul_w0_u32_m_untied, svuint32_t, 
> uint32_t,
>                z0 = svmul_m (p0, z1, x0))
>  
>  /*
> -** mul_2_u32_m_tied1:
> -**   mov     (z[0-9]+\.s), #2
> +** mul_4dupop1_u32_m_tied1:
> +**   mov     (z[0-9]+)\.s, #4
> +**   mov     (z[0-9]+)\.d, z0\.d
> +**   movprfx z0, \1
> +**   mul     z0\.s, p0/m, z0\.s, \2\.s
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u32_m_tied1, svuint32_t,
> +             z0 = svmul_m (p0, svdup_u32 (4), z0),
> +             z0 = svmul_m (p0, svdup_u32 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u32_m_tied1:
> +**   lsl     z0\.s, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u32_m_tied1, svuint32_t,
> +             z0 = svmul_m (svptrue_b32 (), svdup_u32 (4), z0),
> +             z0 = svmul_m (svptrue_b32 (), svdup_u32 (4), z0))
> +
> +/*
> +** mul_4dupop2_u32_m_tied1:
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u32_m_tied1, svuint32_t,
> +             z0 = svmul_m (p0, z0, svdup_u32 (4)),
> +             z0 = svmul_m (p0, z0, svdup_u32 (4)))
> +
> +/*
> +** mul_4nop2_u32_m_tied1:
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u32_m_tied1, svuint32_t,
> +             z0 = svmul_n_u32_m (p0, z0, 4),
> +             z0 = svmul_m (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u32_m_tied1:
> +**   lsl     z0\.s, p0/m, z0\.s, #31
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u32_m_tied1, svuint32_t,
> +             z0 = svmul_n_u32_m (p0, z0, MAXPOW),
> +             z0 = svmul_m (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u32_m_tied1:
> +**   sel     z0\.s, p0, z0\.s, z0\.s
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u32_m_tied1, svuint32_t,
> +             z0 = svmul_n_u32_m (p0, z0, 1),
> +             z0 = svmul_m (p0, z0, 1))
> +
> +/*
> +** mul_3_u32_m_tied1:
> +**   mov     (z[0-9]+\.s), #3
>  **   mul     z0\.s, p0/m, z0\.s, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u32_m_tied1, svuint32_t,
> -             z0 = svmul_n_u32_m (p0, z0, 2),
> -             z0 = svmul_m (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_u32_m_tied1, svuint32_t,
> +             z0 = svmul_n_u32_m (p0, z0, 3),
> +             z0 = svmul_m (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u32_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u32_m_untied, svuint32_t,
> +             z0 = svmul_m (p0, z1, svdup_u32 (4)),
> +             z0 = svmul_m (p0, z1, svdup_u32 (4)))
>  
>  /*
> -** mul_2_u32_m_untied:
> -**   mov     (z[0-9]+\.s), #2
> +** mul_4nop2_u32_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u32_m_untied, svuint32_t,
> +             z0 = svmul_n_u32_m (p0, z1, 4),
> +             z0 = svmul_m (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u32_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.s, p0/m, z0\.s, #31
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u32_m_untied, svuint32_t,
> +             z0 = svmul_n_u32_m (p0, z1, MAXPOW),
> +             z0 = svmul_m (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_u32_m_untied:
> +**   mov     (z[0-9]+\.s), #3
>  **   movprfx z0, z1
>  **   mul     z0\.s, p0/m, z0\.s, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u32_m_untied, svuint32_t,
> -             z0 = svmul_n_u32_m (p0, z1, 2),
> -             z0 = svmul_m (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u32_m_untied, svuint32_t,
> +             z0 = svmul_n_u32_m (p0, z1, 3),
> +             z0 = svmul_m (p0, z1, 3))
>  
>  /*
>  ** mul_m1_u32_m:
> @@ -147,19 +236,109 @@ TEST_UNIFORM_ZX (mul_w0_u32_z_untied, svuint32_t, 
> uint32_t,
>                z0 = svmul_z (p0, z1, x0))
>  
>  /*
> -** mul_2_u32_z_tied1:
> -**   mov     (z[0-9]+\.s), #2
> +** mul_4dupop1_u32_z_tied1:
> +**   movprfx z0\.s, p0/z, z0\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u32_z_tied1, svuint32_t,
> +             z0 = svmul_z (p0, svdup_u32 (4), z0),
> +             z0 = svmul_z (p0, svdup_u32 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u32_z_tied1:
> +**   lsl     z0\.s, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u32_z_tied1, svuint32_t,
> +             z0 = svmul_z (svptrue_b32 (), svdup_u32 (4), z0),
> +             z0 = svmul_z (svptrue_b32 (), svdup_u32 (4), z0))
> +
> +/*
> +** mul_4dupop2_u32_z_tied1:
> +**   movprfx z0\.s, p0/z, z0\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u32_z_tied1, svuint32_t,
> +             z0 = svmul_z (p0, z0, svdup_u32 (4)),
> +             z0 = svmul_z (p0, z0, svdup_u32 (4)))
> +
> +/*
> +** mul_4nop2_u32_z_tied1:
> +**   movprfx z0\.s, p0/z, z0\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u32_z_tied1, svuint32_t,
> +             z0 = svmul_n_u32_z (p0, z0, 4),
> +             z0 = svmul_z (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u32_z_tied1:
> +**   movprfx z0\.s, p0/z, z0\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #31
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u32_z_tied1, svuint32_t,
> +             z0 = svmul_n_u32_z (p0, z0, MAXPOW),
> +             z0 = svmul_z (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u32_z_tied1:
> +**   mov     z31.s, #1
> +**   movprfx z0.s, p0/z, z0.s
> +**   mul     z0.s, p0/m, z0.s, z31.s
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u32_z_tied1, svuint32_t,
> +             z0 = svmul_n_u32_z (p0, z0, 1),
> +             z0 = svmul_z (p0, z0, 1))
> +
> +/*
> +** mul_3_u32_z_tied1:
> +**   mov     (z[0-9]+\.s), #3
>  **   movprfx z0\.s, p0/z, z0\.s
>  **   mul     z0\.s, p0/m, z0\.s, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u32_z_tied1, svuint32_t,
> -             z0 = svmul_n_u32_z (p0, z0, 2),
> -             z0 = svmul_z (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_u32_z_tied1, svuint32_t,
> +             z0 = svmul_n_u32_z (p0, z0, 3),
> +             z0 = svmul_z (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u32_z_untied:
> +**   movprfx z0\.s, p0/z, z1\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u32_z_untied, svuint32_t,
> +             z0 = svmul_z (p0, z1, svdup_u32 (4)),
> +             z0 = svmul_z (p0, z1, svdup_u32 (4)))
> +
> +/*
> +** mul_4nop2_u32_z_untied:
> +**   movprfx z0\.s, p0/z, z1\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u32_z_untied, svuint32_t,
> +             z0 = svmul_n_u32_z (p0, z1, 4),
> +             z0 = svmul_z (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u32_z_untied:
> +**   movprfx z0\.s, p0/z, z1\.s
> +**   lsl     z0\.s, p0/m, z0\.s, #31
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u32_z_untied, svuint32_t,
> +             z0 = svmul_n_u32_z (p0, z1, MAXPOW),
> +             z0 = svmul_z (p0, z1, MAXPOW))
>  
>  /*
> -** mul_2_u32_z_untied:
> -**   mov     (z[0-9]+\.s), #2
> +** mul_3_u32_z_untied:
> +**   mov     (z[0-9]+\.s), #3
>  ** (
>  **   movprfx z0\.s, p0/z, z1\.s
>  **   mul     z0\.s, p0/m, z0\.s, \1
> @@ -169,9 +348,9 @@ TEST_UNIFORM_Z (mul_2_u32_z_tied1, svuint32_t,
>  ** )
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u32_z_untied, svuint32_t,
> -             z0 = svmul_n_u32_z (p0, z1, 2),
> -             z0 = svmul_z (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u32_z_untied, svuint32_t,
> +             z0 = svmul_n_u32_z (p0, z1, 3),
> +             z0 = svmul_z (p0, z1, 3))
>  
>  /*
>  ** mul_u32_x_tied1:
> @@ -227,23 +406,103 @@ TEST_UNIFORM_ZX (mul_w0_u32_x_untied, svuint32_t, 
> uint32_t,
>                z0 = svmul_x (p0, z1, x0))
>  
>  /*
> -** mul_2_u32_x_tied1:
> -**   mul     z0\.s, z0\.s, #2
> +** mul_4dupop1_u32_x_tied1:
> +**   lsl     z0\.s, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u32_x_tied1, svuint32_t,
> +             z0 = svmul_x (p0, svdup_u32 (4), z0),
> +             z0 = svmul_x (p0, svdup_u32 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u32_x_tied1:
> +**   lsl     z0\.s, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u32_x_tied1, svuint32_t,
> +             z0 = svmul_x (svptrue_b32 (), svdup_u32 (4), z0),
> +             z0 = svmul_x (svptrue_b32 (), svdup_u32 (4), z0))
> +
> +/*
> +** mul_4dupop2_u32_x_tied1:
> +**   lsl     z0\.s, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u32_x_tied1, svuint32_t,
> +             z0 = svmul_x (p0, z0, svdup_u32 (4)),
> +             z0 = svmul_x (p0, z0, svdup_u32 (4)))
> +
> +/*
> +** mul_4nop2_u32_x_tied1:
> +**   lsl     z0\.s, z0\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u32_x_tied1, svuint32_t,
> +             z0 = svmul_n_u32_x (p0, z0, 4),
> +             z0 = svmul_x (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u32_x_tied1:
> +**   lsl     z0\.s, z0\.s, #31
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u32_x_tied1, svuint32_t,
> +             z0 = svmul_n_u32_x (p0, z0, MAXPOW),
> +             z0 = svmul_x (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u32_x_tied1:
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u32_x_tied1, svuint32_t,
> +             z0 = svmul_n_u32_x (p0, z0, 1),
> +             z0 = svmul_x (p0, z0, 1))
> +
> +/*
> +** mul_3_u32_x_tied1:
> +**   mul     z0\.s, z0\.s, #3
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_u32_x_tied1, svuint32_t,
> +             z0 = svmul_n_u32_x (p0, z0, 3),
> +             z0 = svmul_x (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u32_x_untied:
> +**   lsl     z0\.s, z1\.s, #2
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u32_x_tied1, svuint32_t,
> -             z0 = svmul_n_u32_x (p0, z0, 2),
> -             z0 = svmul_x (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_4dupop2_u32_x_untied, svuint32_t,
> +             z0 = svmul_x (p0, z1, svdup_u32 (4)),
> +             z0 = svmul_x (p0, z1, svdup_u32 (4)))
>  
>  /*
> -** mul_2_u32_x_untied:
> +** mul_4nop2_u32_x_untied:
> +**   lsl     z0\.s, z1\.s, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u32_x_untied, svuint32_t,
> +             z0 = svmul_n_u32_x (p0, z1, 4),
> +             z0 = svmul_x (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u32_x_untied:
> +**   lsl     z0\.s, z1\.s, #31
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u32_x_untied, svuint32_t,
> +             z0 = svmul_n_u32_x (p0, z1, MAXPOW),
> +             z0 = svmul_x (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_u32_x_untied:
>  **   movprfx z0, z1
> -**   mul     z0\.s, z0\.s, #2
> +**   mul     z0\.s, z0\.s, #3
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u32_x_untied, svuint32_t,
> -             z0 = svmul_n_u32_x (p0, z1, 2),
> -             z0 = svmul_x (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u32_x_untied, svuint32_t,
> +             z0 = svmul_n_u32_x (p0, z1, 3),
> +             z0 = svmul_x (p0, z1, 3))
>  
>  /*
>  ** mul_127_u32_x:
> @@ -256,8 +515,7 @@ TEST_UNIFORM_Z (mul_127_u32_x, svuint32_t,
>  
>  /*
>  ** mul_128_u32_x:
> -**   mov     (z[0-9]+\.s), #128
> -**   mul     z0\.s, p0/m, z0\.s, \1
> +**   lsl     z0\.s, z0\.s, #7
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_128_u32_x, svuint32_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c
> index ab655554db7..1302975ef43 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c
> @@ -2,6 +2,8 @@
>  
>  #include "test_sve_acle.h"
>  
> +#define MAXPOW 1ULL<<63
> +
>  /*
>  ** mul_u64_m_tied1:
>  **   mul     z0\.d, p0/m, z0\.d, z1\.d
> @@ -53,10 +55,66 @@ TEST_UNIFORM_ZX (mul_x0_u64_m_untied, svuint64_t, 
> uint64_t,
>                z0 = svmul_n_u64_m (p0, z1, x0),
>                z0 = svmul_m (p0, z1, x0))
>  
> +/*
> +** mul_4dupop1_u64_m_tied1:
> +**   mov     (z[0-9]+)\.d, #4
> +**   mov     (z[0-9]+\.d), z0\.d
> +**   movprfx z0, \1
> +**   mul     z0\.d, p0/m, z0\.d, \2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u64_m_tied1, svuint64_t,
> +             z0 = svmul_m (p0, svdup_u64 (4), z0),
> +             z0 = svmul_m (p0, svdup_u64 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u64_m_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u64_m_tied1, svuint64_t,
> +             z0 = svmul_m (svptrue_b64 (), svdup_u64 (4), z0),
> +             z0 = svmul_m (svptrue_b64 (), svdup_u64 (4), z0))
> +
> +/*
> +** mul_4dupop2_u64_m_tied1:
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u64_m_tied1, svuint64_t,
> +             z0 = svmul_m (p0, z0, svdup_u64 (4)),
> +             z0 = svmul_m (p0, z0, svdup_u64 (4)))
> +
> +/*
> +** mul_4nop2_u64_m_tied1:
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u64_m_tied1, svuint64_t,
> +             z0 = svmul_n_u64_m (p0, z0, 4),
> +             z0 = svmul_m (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u64_m_tied1:
> +**   lsl     z0\.d, p0/m, z0\.d, #63
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u64_m_tied1, svuint64_t,
> +             z0 = svmul_n_u64_m (p0, z0, MAXPOW),
> +             z0 = svmul_m (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u64_m_tied1:
> +**   sel     z0\.d, p0, z0\.d, z0\.d
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u64_m_tied1, svuint64_t,
> +             z0 = svmul_n_u64_m (p0, z0, 1),
> +             z0 = svmul_m (p0, z0, 1))
> +
>  /*
>  ** mul_2_u64_m_tied1:
> -**   mov     (z[0-9]+\.d), #2
> -**   mul     z0\.d, p0/m, z0\.d, \1
> +**   lsl     z0\.d, p0/m, z0\.d, #1
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_2_u64_m_tied1, svuint64_t,
> @@ -64,15 +122,55 @@ TEST_UNIFORM_Z (mul_2_u64_m_tied1, svuint64_t,
>               z0 = svmul_m (p0, z0, 2))
>  
>  /*
> -** mul_2_u64_m_untied:
> -**   mov     (z[0-9]+\.d), #2
> +** mul_3_u64_m_tied1:
> +**   mov     (z[0-9]+\.d), #3
> +**   mul     z0\.d, p0/m, z0\.d, \1
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_u64_m_tied1, svuint64_t,
> +             z0 = svmul_n_u64_m (p0, z0, 3),
> +             z0 = svmul_m (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u64_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u64_m_untied, svuint64_t,
> +             z0 = svmul_m (p0, z1, svdup_u64 (4)),
> +             z0 = svmul_m (p0, z1, svdup_u64 (4)))
> +
> +/*
> +** mul_4nop2_u64_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u64_m_untied, svuint64_t,
> +             z0 = svmul_n_u64_m (p0, z1, 4),
> +             z0 = svmul_m (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u64_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.d, p0/m, z0\.d, #63
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u64_m_untied, svuint64_t,
> +             z0 = svmul_n_u64_m (p0, z1, MAXPOW),
> +             z0 = svmul_m (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_u64_m_untied:
> +**   mov     (z[0-9]+\.d), #3
>  **   movprfx z0, z1
>  **   mul     z0\.d, p0/m, z0\.d, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u64_m_untied, svuint64_t,
> -             z0 = svmul_n_u64_m (p0, z1, 2),
> -             z0 = svmul_m (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u64_m_untied, svuint64_t,
> +             z0 = svmul_n_u64_m (p0, z1, 3),
> +             z0 = svmul_m (p0, z1, 3))
>  
>  /*
>  ** mul_m1_u64_m:
> @@ -147,10 +245,69 @@ TEST_UNIFORM_ZX (mul_x0_u64_z_untied, svuint64_t, 
> uint64_t,
>                z0 = svmul_z (p0, z1, x0))
>  
>  /*
> -** mul_2_u64_z_tied1:
> -**   mov     (z[0-9]+\.d), #2
> +** mul_4dupop1_u64_z_tied1:
>  **   movprfx z0\.d, p0/z, z0\.d
> -**   mul     z0\.d, p0/m, z0\.d, \1
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u64_z_tied1, svuint64_t,
> +             z0 = svmul_z (p0, svdup_u64 (4), z0),
> +             z0 = svmul_z (p0, svdup_u64 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u64_z_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u64_z_tied1, svuint64_t,
> +             z0 = svmul_z (svptrue_b64 (), svdup_u64 (4), z0),
> +             z0 = svmul_z (svptrue_b64 (), svdup_u64 (4), z0))
> +
> +/*
> +** mul_4dupop2_u64_z_tied1:
> +**   movprfx z0\.d, p0/z, z0\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u64_z_tied1, svuint64_t,
> +             z0 = svmul_z (p0, z0, svdup_u64 (4)),
> +             z0 = svmul_z (p0, z0, svdup_u64 (4)))
> +
> +/*
> +** mul_4nop2_u64_z_tied1:
> +**   movprfx z0\.d, p0/z, z0\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u64_z_tied1, svuint64_t,
> +             z0 = svmul_n_u64_z (p0, z0, 4),
> +             z0 = svmul_z (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u64_z_tied1:
> +**   movprfx z0\.d, p0/z, z0\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #63
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u64_z_tied1, svuint64_t,
> +             z0 = svmul_n_u64_z (p0, z0, MAXPOW),
> +             z0 = svmul_z (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u64_z_tied1:
> +**   mov     z31.d, #1
> +**   movprfx z0.d, p0/z, z0.d
> +**   mul     z0.d, p0/m, z0.d, z31.d
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u64_z_tied1, svuint64_t,
> +             z0 = svmul_n_u64_z (p0, z0, 1),
> +             z0 = svmul_z (p0, z0, 1))
> +
> +/*
> +** mul_2_u64_z_tied1:
> +**   movprfx z0.d, p0/z, z0.d
> +**   lsl     z0.d, p0/m, z0.d, #1
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_2_u64_z_tied1, svuint64_t,
> @@ -158,8 +315,49 @@ TEST_UNIFORM_Z (mul_2_u64_z_tied1, svuint64_t,
>               z0 = svmul_z (p0, z0, 2))
>  
>  /*
> -** mul_2_u64_z_untied:
> -**   mov     (z[0-9]+\.d), #2
> +** mul_3_u64_z_tied1:
> +**   mov     (z[0-9]+\.d), #3
> +**   movprfx z0\.d, p0/z, z0\.d
> +**   mul     z0\.d, p0/m, z0\.d, \1
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_u64_z_tied1, svuint64_t,
> +             z0 = svmul_n_u64_z (p0, z0, 3),
> +             z0 = svmul_z (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u64_z_untied:
> +**   movprfx z0\.d, p0/z, z1\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u64_z_untied, svuint64_t,
> +             z0 = svmul_z (p0, z1, svdup_u64 (4)),
> +             z0 = svmul_z (p0, z1, svdup_u64 (4)))
> +
> +/*
> +** mul_4nop2_u64_z_untied:
> +**   movprfx z0\.d, p0/z, z1\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u64_z_untied, svuint64_t,
> +             z0 = svmul_n_u64_z (p0, z1, 4),
> +             z0 = svmul_z (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u64_z_untied:
> +**   movprfx z0\.d, p0/z, z1\.d
> +**   lsl     z0\.d, p0/m, z0\.d, #63
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u64_z_untied, svuint64_t,
> +             z0 = svmul_n_u64_z (p0, z1, MAXPOW),
> +             z0 = svmul_z (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_u64_z_untied:
> +**   mov     (z[0-9]+\.d), #3
>  ** (
>  **   movprfx z0\.d, p0/z, z1\.d
>  **   mul     z0\.d, p0/m, z0\.d, \1
> @@ -169,9 +367,9 @@ TEST_UNIFORM_Z (mul_2_u64_z_tied1, svuint64_t,
>  ** )
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u64_z_untied, svuint64_t,
> -             z0 = svmul_n_u64_z (p0, z1, 2),
> -             z0 = svmul_z (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u64_z_untied, svuint64_t,
> +             z0 = svmul_n_u64_z (p0, z1, 3),
> +             z0 = svmul_z (p0, z1, 3))
>  
>  /*
>  ** mul_u64_x_tied1:
> @@ -226,9 +424,62 @@ TEST_UNIFORM_ZX (mul_x0_u64_x_untied, svuint64_t, 
> uint64_t,
>                z0 = svmul_n_u64_x (p0, z1, x0),
>                z0 = svmul_x (p0, z1, x0))
>  
> +/*
> +** mul_4dupop1_u64_x_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u64_x_tied1, svuint64_t,
> +             z0 = svmul_x (p0, svdup_u64 (4), z0),
> +             z0 = svmul_x (p0, svdup_u64 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u64_x_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u64_x_tied1, svuint64_t,
> +             z0 = svmul_x (svptrue_b64 (), svdup_u64 (4), z0),
> +             z0 = svmul_x (svptrue_b64 (), svdup_u64 (4), z0))
> +
> +/*
> +** mul_4dupop2_u64_x_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u64_x_tied1, svuint64_t,
> +             z0 = svmul_x (p0, z0, svdup_u64 (4)),
> +             z0 = svmul_x (p0, z0, svdup_u64 (4)))
> +
> +/*
> +** mul_4nop2_u64_x_tied1:
> +**   lsl     z0\.d, z0\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u64_x_tied1, svuint64_t,
> +             z0 = svmul_n_u64_x (p0, z0, 4),
> +             z0 = svmul_x (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u64_x_tied1:
> +**   lsl     z0\.d, z0\.d, #63
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u64_x_tied1, svuint64_t,
> +             z0 = svmul_n_u64_x (p0, z0, MAXPOW),
> +             z0 = svmul_x (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u64_x_tied1:
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u64_x_tied1, svuint64_t,
> +             z0 = svmul_n_u64_x (p0, z0, 1),
> +             z0 = svmul_x (p0, z0, 1))
> +
>  /*
>  ** mul_2_u64_x_tied1:
> -**   mul     z0\.d, z0\.d, #2
> +**   add     z0\.d, z0\.d, z0\.d
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_2_u64_x_tied1, svuint64_t,
> @@ -236,14 +487,50 @@ TEST_UNIFORM_Z (mul_2_u64_x_tied1, svuint64_t,
>               z0 = svmul_x (p0, z0, 2))
>  
>  /*
> -** mul_2_u64_x_untied:
> +** mul_3_u64_x_tied1:
> +**   mul     z0\.d, z0\.d, #3
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_u64_x_tied1, svuint64_t,
> +             z0 = svmul_n_u64_x (p0, z0, 3),
> +             z0 = svmul_x (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u64_x_untied:
> +**   lsl     z0\.d, z1\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u64_x_untied, svuint64_t,
> +             z0 = svmul_x (p0, z1, svdup_u64 (4)),
> +             z0 = svmul_x (p0, z1, svdup_u64 (4)))
> +
> +/*
> +** mul_4nop2_u64_x_untied:
> +**   lsl     z0\.d, z1\.d, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u64_x_untied, svuint64_t,
> +             z0 = svmul_n_u64_x (p0, z1, 4),
> +             z0 = svmul_x (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u64_x_untied:
> +**   lsl     z0\.d, z1\.d, #63
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u64_x_untied, svuint64_t,
> +             z0 = svmul_n_u64_x (p0, z1, MAXPOW),
> +             z0 = svmul_x (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_u64_x_untied:
>  **   movprfx z0, z1
> -**   mul     z0\.d, z0\.d, #2
> +**   mul     z0\.d, z0\.d, #3
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u64_x_untied, svuint64_t,
> -             z0 = svmul_n_u64_x (p0, z1, 2),
> -             z0 = svmul_x (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u64_x_untied, svuint64_t,
> +             z0 = svmul_n_u64_x (p0, z1, 3),
> +             z0 = svmul_x (p0, z1, 3))
>  
>  /*
>  ** mul_127_u64_x:
> @@ -256,8 +543,7 @@ TEST_UNIFORM_Z (mul_127_u64_x, svuint64_t,
>  
>  /*
>  ** mul_128_u64_x:
> -**   mov     (z[0-9]+\.d), #128
> -**   mul     z0\.d, p0/m, z0\.d, \1
> +**   lsl     z0\.d, z0\.d, #7
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_128_u64_x, svuint64_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c
> index ef0a5220dc0..ed74742f36d 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c
> @@ -2,6 +2,8 @@
>  
>  #include "test_sve_acle.h"
>  
> +#define MAXPOW 1<<7
> +
>  /*
>  ** mul_u8_m_tied1:
>  **   mul     z0\.b, p0/m, z0\.b, z1\.b
> @@ -54,30 +56,117 @@ TEST_UNIFORM_ZX (mul_w0_u8_m_untied, svuint8_t, uint8_t,
>                z0 = svmul_m (p0, z1, x0))
>  
>  /*
> -** mul_2_u8_m_tied1:
> -**   mov     (z[0-9]+\.b), #2
> +** mul_4dupop1_u8_m_tied1:
> +**   mov     (z[0-9]+)\.b, #4
> +**   mov     (z[0-9]+)\.d, z0\.d
> +**   movprfx z0, \1
> +**   mul     z0\.b, p0/m, z0\.b, \2\.b
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u8_m_tied1, svuint8_t,
> +             z0 = svmul_m (p0, svdup_u8 (4), z0),
> +             z0 = svmul_m (p0, svdup_u8 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u8_m_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u8_m_tied1, svuint8_t,
> +             z0 = svmul_m (svptrue_b8 (), svdup_u8 (4), z0),
> +             z0 = svmul_m (svptrue_b8 (), svdup_u8 (4), z0))
> +
> +/*
> +** mul_4dupop2_u8_m_tied1:
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u8_m_tied1, svuint8_t,
> +             z0 = svmul_m (p0, z0, svdup_u8 (4)),
> +             z0 = svmul_m (p0, z0, svdup_u8 (4)))
> +
> +/*
> +** mul_4nop2_u8_m_tied1:
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u8_m_tied1, svuint8_t,
> +             z0 = svmul_n_u8_m (p0, z0, 4),
> +             z0 = svmul_m (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u8_m_tied1:
> +**   lsl     z0\.b, p0/m, z0\.b, #7
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u8_m_tied1, svuint8_t,
> +             z0 = svmul_n_u8_m (p0, z0, MAXPOW),
> +             z0 = svmul_m (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u8_m_tied1:
> +**   sel     z0\.b, p0, z0\.b, z0\.b
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u8_m_tied1, svuint8_t,
> +             z0 = svmul_n_u8_m (p0, z0, 1),
> +             z0 = svmul_m (p0, z0, 1))
> +
> +/*
> +** mul_3_u8_m_tied1:
> +**   mov     (z[0-9]+\.b), #3
>  **   mul     z0\.b, p0/m, z0\.b, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u8_m_tied1, svuint8_t,
> -             z0 = svmul_n_u8_m (p0, z0, 2),
> -             z0 = svmul_m (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_u8_m_tied1, svuint8_t,
> +             z0 = svmul_n_u8_m (p0, z0, 3),
> +             z0 = svmul_m (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u8_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u8_m_untied, svuint8_t,
> +             z0 = svmul_m (p0, z1, svdup_u8 (4)),
> +             z0 = svmul_m (p0, z1, svdup_u8 (4)))
>  
>  /*
> -** mul_2_u8_m_untied:
> -**   mov     (z[0-9]+\.b), #2
> +** mul_4nop2_u8_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u8_m_untied, svuint8_t,
> +             z0 = svmul_n_u8_m (p0, z1, 4),
> +             z0 = svmul_m (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u8_m_untied:
> +**   movprfx z0, z1
> +**   lsl     z0\.b, p0/m, z0\.b, #7
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u8_m_untied, svuint8_t,
> +             z0 = svmul_n_u8_m (p0, z1, MAXPOW),
> +             z0 = svmul_m (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_u8_m_untied:
> +**   mov     (z[0-9]+\.b), #3
>  **   movprfx z0, z1
>  **   mul     z0\.b, p0/m, z0\.b, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u8_m_untied, svuint8_t,
> -             z0 = svmul_n_u8_m (p0, z1, 2),
> -             z0 = svmul_m (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u8_m_untied, svuint8_t,
> +             z0 = svmul_n_u8_m (p0, z1, 3),
> +             z0 = svmul_m (p0, z1, 3))
>  
>  /*
>  ** mul_m1_u8_m:
> -**   mov     (z[0-9]+\.b), #-1
> -**   mul     z0\.b, p0/m, z0\.b, \1
> +**   mov     (z[0-9]+)\.b, #-1
> +**   mul     z0\.b, p0/m, z0\.b, \1\.b
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_m1_u8_m, svuint8_t,
> @@ -147,19 +236,109 @@ TEST_UNIFORM_ZX (mul_w0_u8_z_untied, svuint8_t, 
> uint8_t,
>                z0 = svmul_z (p0, z1, x0))
>  
>  /*
> -** mul_2_u8_z_tied1:
> -**   mov     (z[0-9]+\.b), #2
> +** mul_4dupop1_u8_z_tied1:
> +**   movprfx z0\.b, p0/z, z0\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u8_z_tied1, svuint8_t,
> +             z0 = svmul_z (p0, svdup_u8 (4), z0),
> +             z0 = svmul_z (p0, svdup_u8 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u8_z_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u8_z_tied1, svuint8_t,
> +             z0 = svmul_z (svptrue_b8 (), svdup_u8 (4), z0),
> +             z0 = svmul_z (svptrue_b8 (), svdup_u8 (4), z0))
> +
> +/*
> +** mul_4dupop2_u8_z_tied1:
> +**   movprfx z0\.b, p0/z, z0\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u8_z_tied1, svuint8_t,
> +             z0 = svmul_z (p0, z0, svdup_u8 (4)),
> +             z0 = svmul_z (p0, z0, svdup_u8 (4)))
> +
> +/*
> +** mul_4nop2_u8_z_tied1:
> +**   movprfx z0\.b, p0/z, z0\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u8_z_tied1, svuint8_t,
> +             z0 = svmul_n_u8_z (p0, z0, 4),
> +             z0 = svmul_z (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u8_z_tied1:
> +**   movprfx z0\.b, p0/z, z0\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #7
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u8_z_tied1, svuint8_t,
> +             z0 = svmul_n_u8_z (p0, z0, MAXPOW),
> +             z0 = svmul_z (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u8_z_tied1:
> +**   mov     z31.b, #1
> +**   movprfx z0.b, p0/z, z0.b
> +**   mul     z0.b, p0/m, z0.b, z31.b
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u8_z_tied1, svuint8_t,
> +             z0 = svmul_n_u8_z (p0, z0, 1),
> +             z0 = svmul_z (p0, z0, 1))
> +
> +/*
> +** mul_3_u8_z_tied1:
> +**   mov     (z[0-9]+\.b), #3
>  **   movprfx z0\.b, p0/z, z0\.b
>  **   mul     z0\.b, p0/m, z0\.b, \1
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u8_z_tied1, svuint8_t,
> -             z0 = svmul_n_u8_z (p0, z0, 2),
> -             z0 = svmul_z (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_3_u8_z_tied1, svuint8_t,
> +             z0 = svmul_n_u8_z (p0, z0, 3),
> +             z0 = svmul_z (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u8_z_untied:
> +**   movprfx z0\.b, p0/z, z1\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u8_z_untied, svuint8_t,
> +             z0 = svmul_z (p0, z1, svdup_u8 (4)),
> +             z0 = svmul_z (p0, z1, svdup_u8 (4)))
>  
>  /*
> -** mul_2_u8_z_untied:
> -**   mov     (z[0-9]+\.b), #2
> +** mul_4nop2_u8_z_untied:
> +**   movprfx z0\.b, p0/z, z1\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u8_z_untied, svuint8_t,
> +             z0 = svmul_n_u8_z (p0, z1, 4),
> +             z0 = svmul_z (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u8_z_untied:
> +**   movprfx z0\.b, p0/z, z1\.b
> +**   lsl     z0\.b, p0/m, z0\.b, #7
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u8_z_untied, svuint8_t,
> +             z0 = svmul_n_u8_z (p0, z1, MAXPOW),
> +             z0 = svmul_z (p0, z1, MAXPOW))
> +
> +/*
> +** mul_3_u8_z_untied:
> +**   mov     (z[0-9]+\.b), #3
>  ** (
>  **   movprfx z0\.b, p0/z, z1\.b
>  **   mul     z0\.b, p0/m, z0\.b, \1
> @@ -169,9 +348,9 @@ TEST_UNIFORM_Z (mul_2_u8_z_tied1, svuint8_t,
>  ** )
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u8_z_untied, svuint8_t,
> -             z0 = svmul_n_u8_z (p0, z1, 2),
> -             z0 = svmul_z (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u8_z_untied, svuint8_t,
> +             z0 = svmul_n_u8_z (p0, z1, 3),
> +             z0 = svmul_z (p0, z1, 3))
>  
>  /*
>  ** mul_u8_x_tied1:
> @@ -227,23 +406,103 @@ TEST_UNIFORM_ZX (mul_w0_u8_x_untied, svuint8_t, 
> uint8_t,
>                z0 = svmul_x (p0, z1, x0))
>  
>  /*
> -** mul_2_u8_x_tied1:
> -**   mul     z0\.b, z0\.b, #2
> +** mul_4dupop1_u8_x_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1_u8_x_tied1, svuint8_t,
> +             z0 = svmul_x (p0, svdup_u8 (4), z0),
> +             z0 = svmul_x (p0, svdup_u8 (4), z0))
> +
> +/*
> +** mul_4dupop1ptrue_u8_x_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop1ptrue_u8_x_tied1, svuint8_t,
> +             z0 = svmul_x (svptrue_b8 (), svdup_u8 (4), z0),
> +             z0 = svmul_x (svptrue_b8 (), svdup_u8 (4), z0))
> +
> +/*
> +** mul_4dupop2_u8_x_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u8_x_tied1, svuint8_t,
> +             z0 = svmul_x (p0, z0, svdup_u8 (4)),
> +             z0 = svmul_x (p0, z0, svdup_u8 (4)))
> +
> +/*
> +** mul_4nop2_u8_x_tied1:
> +**   lsl     z0\.b, z0\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u8_x_tied1, svuint8_t,
> +             z0 = svmul_n_u8_x (p0, z0, 4),
> +             z0 = svmul_x (p0, z0, 4))
> +
> +/*
> +** mul_maxpownop2_u8_x_tied1:
> +**   lsl     z0\.b, z0\.b, #7
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_maxpownop2_u8_x_tied1, svuint8_t,
> +             z0 = svmul_n_u8_x (p0, z0, MAXPOW),
> +             z0 = svmul_x (p0, z0, MAXPOW))
> +
> +/*
> +** mul_1_u8_x_tied1:
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_1_u8_x_tied1, svuint8_t,
> +             z0 = svmul_n_u8_x (p0, z0, 1),
> +             z0 = svmul_x (p0, z0, 1))
> +
> +/*
> +** mul_3_u8_x_tied1:
> +**   mul     z0\.b, z0\.b, #3
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_3_u8_x_tied1, svuint8_t,
> +             z0 = svmul_n_u8_x (p0, z0, 3),
> +             z0 = svmul_x (p0, z0, 3))
> +
> +/*
> +** mul_4dupop2_u8_x_untied:
> +**   lsl     z0\.b, z1\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4dupop2_u8_x_untied, svuint8_t,
> +             z0 = svmul_x (p0, z1, svdup_u8 (4)),
> +             z0 = svmul_x (p0, z1, svdup_u8 (4)))
> +
> +/*
> +** mul_4nop2_u8_x_untied:
> +**   lsl     z0\.b, z1\.b, #2
> +**   ret
> +*/
> +TEST_UNIFORM_Z (mul_4nop2_u8_x_untied, svuint8_t,
> +             z0 = svmul_n_u8_x (p0, z1, 4),
> +             z0 = svmul_x (p0, z1, 4))
> +
> +/*
> +** mul_maxpownop2_u8_x_untied:
> +**   lsl     z0\.b, z1\.b, #7
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u8_x_tied1, svuint8_t,
> -             z0 = svmul_n_u8_x (p0, z0, 2),
> -             z0 = svmul_x (p0, z0, 2))
> +TEST_UNIFORM_Z (mul_maxpownop2_u8_x_untied, svuint8_t,
> +             z0 = svmul_n_u8_x (p0, z1, MAXPOW),
> +             z0 = svmul_x (p0, z1, MAXPOW))
>  
>  /*
> -** mul_2_u8_x_untied:
> +** mul_3_u8_x_untied:
>  **   movprfx z0, z1
> -**   mul     z0\.b, z0\.b, #2
> +**   mul     z0\.b, z0\.b, #3
>  **   ret
>  */
> -TEST_UNIFORM_Z (mul_2_u8_x_untied, svuint8_t,
> -             z0 = svmul_n_u8_x (p0, z1, 2),
> -             z0 = svmul_x (p0, z1, 2))
> +TEST_UNIFORM_Z (mul_3_u8_x_untied, svuint8_t,
> +             z0 = svmul_n_u8_x (p0, z1, 3),
> +             z0 = svmul_x (p0, z1, 3))
>  
>  /*
>  ** mul_127_u8_x:
> @@ -256,7 +515,7 @@ TEST_UNIFORM_Z (mul_127_u8_x, svuint8_t,
>  
>  /*
>  ** mul_128_u8_x:
> -**   mul     z0\.b, z0\.b, #-128
> +**   lsl     z0\.b, z0\.b, #7
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_128_u8_x, svuint8_t,
> @@ -292,7 +551,7 @@ TEST_UNIFORM_Z (mul_m127_u8_x, svuint8_t,
>  
>  /*
>  ** mul_m128_u8_x:
> -**   mul     z0\.b, z0\.b, #-128
> +**   lsl     z0\.b, z0\.b, #7
>  **   ret
>  */
>  TEST_UNIFORM_Z (mul_m128_u8_x, svuint8_t,
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mul_const_run.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/mul_const_run.c
> new file mode 100644
> index 00000000000..6af00439e39
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mul_const_run.c
> @@ -0,0 +1,101 @@
> +/* { dg-do run { target aarch64_sve128_hw } } */
> +/* { dg-options "-O2 -msve-vector-bits=128" } */
> +
> +#include <arm_sve.h>
> +#include <stdint.h>
> +
> +typedef svbool_t pred __attribute__((arm_sve_vector_bits(128)));
> +typedef svfloat16_t svfloat16_ __attribute__((arm_sve_vector_bits(128)));
> +typedef svfloat32_t svfloat32_ __attribute__((arm_sve_vector_bits(128)));
> +typedef svfloat64_t svfloat64_ __attribute__((arm_sve_vector_bits(128)));
> +typedef svint32_t svint32_ __attribute__((arm_sve_vector_bits(128)));
> +typedef svint64_t svint64_ __attribute__((arm_sve_vector_bits(128)));
> +typedef svuint32_t svuint32_ __attribute__((arm_sve_vector_bits(128)));
> +typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128)));
> +
> +#define F(T, TS, P, OP1, OP2)                                                
> \
> +{                                                                    \
> +  T##_t op1 = (T##_t) OP1;                                           \
> +  T##_t op2 = (T##_t) OP2;                                           \
> +  sv##T##_ res = svmul_##P (pg, svdup_##TS (op1), svdup_##TS (op2)); \
> +  sv##T##_ exp = svdup_##TS (op1 * op2);                             \
> +  if (svptest_any (pg, svcmpne (pg, exp, res)))                              
> \
> +    __builtin_abort ();                                                      
> \
> +                                                                     \
> +  sv##T##_ res_n = svmul_##P (pg, svdup_##TS (op1), op2);            \
> +  if (svptest_any (pg, svcmpne (pg, exp, res_n)))                    \
> +    __builtin_abort ();                                                      
> \
> +}
> +
> +#define TEST_TYPES_1(T, TS)                                          \
> +  F (T, TS, m, 79, 16)                                                       
> \
> +  F (T, TS, z, 79, 16)                                                       
> \
> +  F (T, TS, x, 79, 16)
> +
> +#define TEST_TYPES                                                   \
> +  TEST_TYPES_1 (float16, f16)                                                
> \
> +  TEST_TYPES_1 (float32, f32)                                                
> \
> +  TEST_TYPES_1 (float64, f64)                                                
> \
> +  TEST_TYPES_1 (int32, s32)                                          \
> +  TEST_TYPES_1 (int64, s64)                                          \
> +  TEST_TYPES_1 (uint32, u32)                                         \
> +  TEST_TYPES_1 (uint64, u64)
> +
> +#define TEST_VALUES_S_1(B, OP1, OP2)                                 \
> +  F (int##B, s##B, x, OP1, OP2)
> +
> +#define TEST_VALUES_S                                                        
> \
> +  TEST_VALUES_S_1 (32, INT32_MIN, INT32_MIN)                         \
> +  TEST_VALUES_S_1 (64, INT64_MIN, INT64_MIN)                         \
> +  TEST_VALUES_S_1 (32, 4, 4)                                         \
> +  TEST_VALUES_S_1 (32, -7, 4)                                                
> \
> +  TEST_VALUES_S_1 (32, 4, -7)                                                
> \
> +  TEST_VALUES_S_1 (64, 4, 4)                                         \
> +  TEST_VALUES_S_1 (64, -7, 4)                                                
> \
> +  TEST_VALUES_S_1 (64, 4, -7)                                                
> \
> +  TEST_VALUES_S_1 (32, INT32_MAX, (1 << 30))                         \
> +  TEST_VALUES_S_1 (32, (1 << 30), INT32_MAX)                         \
> +  TEST_VALUES_S_1 (64, INT64_MAX, (1ULL << 62))                              
> \
> +  TEST_VALUES_S_1 (64, (1ULL << 62), INT64_MAX)                              
> \
> +  TEST_VALUES_S_1 (32, INT32_MIN, (1 << 30))                         \
> +  TEST_VALUES_S_1 (64, INT64_MIN, (1ULL << 62))                              
> \
> +  TEST_VALUES_S_1 (32, INT32_MAX, 1)                                 \
> +  TEST_VALUES_S_1 (32, INT32_MAX, 1)                                 \
> +  TEST_VALUES_S_1 (64, 1, INT64_MAX)                                 \
> +  TEST_VALUES_S_1 (64, 1, INT64_MAX)                                 \
> +  TEST_VALUES_S_1 (32, INT32_MIN, 16)                                        
> \
> +  TEST_VALUES_S_1 (64, INT64_MIN, 16)                                        
> \
> +  TEST_VALUES_S_1 (32, INT32_MAX, -5)                                        
> \
> +  TEST_VALUES_S_1 (64, INT64_MAX, -5)                                        
> \
> +  TEST_VALUES_S_1 (32, INT32_MIN, -4)                                        
> \
> +  TEST_VALUES_S_1 (64, INT64_MIN, -4)
> +
> +#define TEST_VALUES_U_1(B, OP1, OP2)                                 \
> +  F (uint##B, u##B, x, OP1, OP2)
> +
> +#define TEST_VALUES_U                                                        
> \
> +  TEST_VALUES_U_1 (32, UINT32_MAX, UINT32_MAX)                               
> \
> +  TEST_VALUES_U_1 (64, UINT64_MAX, UINT64_MAX)                               
> \
> +  TEST_VALUES_U_1 (32, UINT32_MAX, (1 << 31))                                
> \
> +  TEST_VALUES_U_1 (64, UINT64_MAX, (1ULL << 63))                     \
> +  TEST_VALUES_U_1 (32, 7, 4)                                         \
> +  TEST_VALUES_U_1 (32, 4, 7)                                         \
> +  TEST_VALUES_U_1 (64, 7, 4)                                         \
> +  TEST_VALUES_U_1 (64, 4, 7)                                         \
> +  TEST_VALUES_U_1 (32, 7, 3)                                         \
> +  TEST_VALUES_U_1 (64, 7, 3)                                         \
> +  TEST_VALUES_U_1 (32, 11, 1)                                                
> \
> +  TEST_VALUES_U_1 (64, 11, 1)
> +
> +#define TEST_VALUES                                                  \
> +  TEST_VALUES_S                                                              
> \
> +  TEST_VALUES_U
> +
> +int
> +main (void)
> +{
> +  const pred pg = svptrue_b8 ();
> +  TEST_TYPES
> +  TEST_VALUES
> +  return 0;
> +}

Reply via email to