Re: [PATCH v2 2/4] aarch64: Add basic svmfloat8_t support to arm_sve.h

Richard Sandiford Mon, 11 Nov 2024 08:04:33 -0800

Claudio Bantaloukas <claudio.bantalou...@arm.com> writes:
> [...]
> @@ -231,12 +231,12 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
>  #define TYPES_all_arith(S, D) \
>    TYPES_all_float (S, D), TYPES_all_integer (S, D)
>  
> -/*     _bf16
> +/* _mf8 _bf16
>       _f16 _f32 _f64
>     _s8  _s16 _s32 _s64
>     _u8  _u16 _u32 _u64.  */
>  #define TYPES_all_data(S, D) \
> -  S (bf16), TYPES_all_arith (S, D)
> +  S(mf8), S (bf16), TYPES_all_arith (S, D)


Sorry for the clash, but I've since pushed the SVE2p1 patches, which
redefine all_data in terms of separate b_data, h_data, s_data, and d_data.
This would now be part of b_data, and we should now get things like
svluti2_lane_zt_mf8 for free.

We should probably also add mf8 to things like za_bhsd_data, but that
can be a separate follow-on patch.

> +/*
> +** caller_0:
> +**   ...
> +**   mov     (z[0-9]+\.b), w2
> +**   ...
> +**   st1b    \1, p[0-7], \[x1\]
> +**   ...
> +**   ret
> +*/
> +void __attribute__((noipa))
> +caller_0 (mfloat8_t *ptr, mfloat8_t in)
> +{
> +  callee_0 (ptr, svdup_mf8 (in));
> +}

w2 isn't a meaningful register here, since the data should be in via b0.
I suppose for now we should make the move into w2 as well, with a note
to say that this should be optimised away later.  (Although the hard-coded
w2 should be replaced with (w[0-9]+) for the move in and \1 for the move out.)
Same for the other callers in this file.

Looks good otherwise, thanks!

Richard

> +
> [...]
> +/*
> +** caller_1:
> +**   ...
> +**   mov     (z[0-9]+\.b), w3
> +**   ...
> +**   st1b    \1, p[0-7], \[x2\]
> +**   ...
> +**   ret
> +*/
> +void __attribute__((noipa))
> +caller_1 (mfloat8_t *ptr, mfloat8_t in)
> +{
> +  callee_1 (ptr, 1, svdup_mf8 (in));
> +}
> +
> +/*
> +** callee_7:
> +**   ...
> +**   ld1b    (z[0-9]+\.b), (p[0-7])/z, \[x7\]
> +**   ...
> +**   st1b    \1, p[0-7], \[x0\]
> +**   ...
> +**   ret
> +*/
> +void __attribute__((noipa))
> +callee_7 (mfloat8_t *ptr, ...)
> +{
> +  va_list va;
> +  svmfloat8_t vec;
> +
> +  va_start (va, ptr);
> +  va_arg (va, int);
> +  va_arg (va, int);
> +  va_arg (va, int);
> +  va_arg (va, int);
> +  va_arg (va, int);
> +  va_arg (va, int);
> +  vec = va_arg (va, svmfloat8_t);
> +  va_end (va);
> +  svst1 (svptrue_b8 (), ptr, vec);
> +}
> +
> +/*
> +** caller_7:
> +**   ...
> +**   mov     (z[0-9]+\.b), w8
> +**   ...
> +**   st1b    \1, p[0-7], \[x7\]
> +**   ...
> +**   ret
> +*/
> +void __attribute__((noipa))
> +caller_7 (mfloat8_t *ptr, mfloat8_t in)
> +{
> +  callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_mf8 (in));
> +}
> +
> +/* FIXME: We should be able to get rid of the va_list object.  */
> +/*
> +** callee_8:
> +**   sub     sp, sp, #([0-9]+)
> +**   ...
> +**   ldr     (x[0-9]+), \[sp, \1\]
> +**   ...
> +**   ld1b    (z[0-9]+\.b), (p[0-7])/z, \[\2\]
> +**   ...
> +**   st1b    \3, \4, \[x0\]
> +**   ...
> +**   ret
> +*/
> +void __attribute__((noipa))
> +callee_8 (mfloat8_t *ptr, ...)
> +{
> +  va_list va;
> +  svmfloat8_t vec;
> +
> +  va_start (va, ptr);
> +  va_arg (va, int);
> +  va_arg (va, int);
> +  va_arg (va, int);
> +  va_arg (va, int);
> +  va_arg (va, int);
> +  va_arg (va, int);
> +  va_arg (va, int);
> +  vec = va_arg (va, svmfloat8_t);
> +  va_end (va);
> +  svst1 (svptrue_b8 (), ptr, vec);
> +}
> +
> +/*
> +** caller_8:
> +**   ...
> +**   mov     (z[0-9]+\.b), w1
> +**   ...
> +**   st1b    \1, p[0-7], \[(x[0-9]+)\]
> +**   ...
> +**   str     \2, \[sp\]
> +**   ...
> +**   ret
> +*/
> +void __attribute__((noipa))
> +caller_8 (mfloat8_t *ptr, mfloat8_t in)
> +{
> +  callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_mf8 (in));
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbl2_mf8.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbl2_mf8.c
> new file mode 100644
> index 00000000000..19cc739e7ab
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbl2_mf8.c
> @@ -0,0 +1,31 @@
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +
> +/*
> +** tbl2_mf8_tied1:
> +**   tbl     z0\.b, {z0\.b(?:, | - )z1\.b}, z4\.b
> +**   ret
> +*/
> +TEST_TBL2 (tbl2_mf8_tied1, svmfloat8x2_t, svmfloat8_t, svuint8_t,
> +        z0_res = svtbl2_mf8 (z0, z4),
> +        z0_res = svtbl2 (z0, z4))
> +
> +/*
> +** tbl2_mf8_tied2:
> +**   tbl     z0\.b, {z1\.b(?:, | - )z2\.b}, z0\.b
> +**   ret
> +*/
> +TEST_TBL2_REV (tbl2_mf8_tied2, svmfloat8x2_t, svmfloat8_t, svuint8_t,
> +            z0_res = svtbl2_mf8 (z1, z0),
> +            z0_res = svtbl2 (z1, z0))
> +
> +/*
> +** tbl2_mf8_untied:
> +**   tbl     z0\.b, {z2\.b(?:, | - )z3\.b}, z4\.b
> +**   ret
> +*/
> +TEST_TBL2 (tbl2_mf8_untied, svmfloat8x2_t, svmfloat8_t, svuint8_t,
> +        z0_res = svtbl2_mf8 (z2, z4),
> +        z0_res = svtbl2 (z2, z4))
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbx_mf8.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbx_mf8.c
> new file mode 100644
> index 00000000000..ba0fef3934b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbx_mf8.c
> @@ -0,0 +1,37 @@
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include "test_sve_acle.h"
> +
> +/*
> +** tbx_mf8_tied1:
> +**   tbx     z0\.b, z1\.b, z4\.b
> +**   ret
> +*/
> +TEST_DUAL_Z (tbx_mf8_tied1, svmfloat8_t, svuint8_t,
> +          z0 = svtbx_mf8 (z0, z1, z4),
> +          z0 = svtbx (z0, z1, z4))
> +
> +/* Bad RA choice: no preferred output sequence.  */
> +TEST_DUAL_Z (tbx_mf8_tied2, svmfloat8_t, svuint8_t,
> +          z0 = svtbx_mf8 (z1, z0, z4),
> +          z0 = svtbx (z1, z0, z4))
> +
> +/* Bad RA choice: no preferred output sequence.  */
> +TEST_DUAL_Z_REV (tbx_mf8_tied3, svmfloat8_t, svuint8_t,
> +              z0_res = svtbx_mf8 (z4, z5, z0),
> +              z0_res = svtbx (z4, z5, z0))
> +
> +/*
> +** tbx_mf8_untied:
> +** (
> +**   mov     z0\.d, z1\.d
> +**   tbx     z0\.b, z2\.b, z4\.b
> +** |
> +**   tbx     z1\.b, z2\.b, z4\.b
> +**   mov     z0\.d, z1\.d
> +** )
> +**   ret
> +*/
> +TEST_DUAL_Z (tbx_mf8_untied, svmfloat8_t, svuint8_t,
> +          z0 = svtbx_mf8 (z1, z2, z4),
> +          z0 = svtbx (z1, z2, z4))
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilerw_mf8.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilerw_mf8.c
> new file mode 100644
> index 00000000000..12cf0d2c365
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilerw_mf8.c
> @@ -0,0 +1,50 @@
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! 
> ilp32 } } } } */
> +
> +#include "test_sve_acle.h"
> +
> +/*
> +** whilerw_rr_mf8:
> +**   whilerw p0\.b, x0, x1
> +**   ret
> +*/
> +TEST_COMPARE_S (whilerw_rr_mf8, const mfloat8_t *,
> +             p0 = svwhilerw_mf8 (x0, x1),
> +             p0 = svwhilerw (x0, x1))
> +
> +/*
> +** whilerw_0r_mf8:
> +**   whilerw p0\.b, xzr, x1
> +**   ret
> +*/
> +TEST_COMPARE_S (whilerw_0r_mf8, const mfloat8_t *,
> +             p0 = svwhilerw_mf8 ((const mfloat8_t *) 0, x1),
> +             p0 = svwhilerw ((const mfloat8_t *) 0, x1))
> +
> +/*
> +** whilerw_cr_mf8:
> +**   mov     (x[0-9]+), #?1073741824
> +**   whilerw p0\.b, \1, x1
> +**   ret
> +*/
> +TEST_COMPARE_S (whilerw_cr_mf8, const mfloat8_t *,
> +             p0 = svwhilerw_mf8 ((const mfloat8_t *) 1073741824, x1),
> +             p0 = svwhilerw ((const mfloat8_t *) 1073741824, x1))
> +
> +/*
> +** whilerw_r0_mf8:
> +**   whilerw p0\.b, x0, xzr
> +**   ret
> +*/
> +TEST_COMPARE_S (whilerw_r0_mf8, const mfloat8_t *,
> +             p0 = svwhilerw_mf8 (x0, (const mfloat8_t *) 0),
> +             p0 = svwhilerw (x0, (const mfloat8_t *) 0))
> +
> +/*
> +** whilerw_rc_mf8:
> +**   mov     (x[0-9]+), #?1073741824
> +**   whilerw p0\.b, x0, \1
> +**   ret
> +*/
> +TEST_COMPARE_S (whilerw_rc_mf8, const mfloat8_t *,
> +             p0 = svwhilerw_mf8 (x0, (const mfloat8_t *) 1073741824),
> +             p0 = svwhilerw (x0, (const mfloat8_t *) 1073741824))
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilewr_mf8.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilewr_mf8.c
> new file mode 100644
> index 00000000000..c4023a2fbff
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilewr_mf8.c
> @@ -0,0 +1,50 @@
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! 
> ilp32 } } } } */
> +
> +#include "test_sve_acle.h"
> +
> +/*
> +** whilewr_rr_mf8:
> +**   whilewr p0\.b, x0, x1
> +**   ret
> +*/
> +TEST_COMPARE_S (whilewr_rr_mf8, const mfloat8_t *,
> +             p0 = svwhilewr_mf8 (x0, x1),
> +             p0 = svwhilewr (x0, x1))
> +
> +/*
> +** whilewr_0r_mf8:
> +**   whilewr p0\.b, xzr, x1
> +**   ret
> +*/
> +TEST_COMPARE_S (whilewr_0r_mf8, const mfloat8_t *,
> +             p0 = svwhilewr_mf8 ((const mfloat8_t *) 0, x1),
> +             p0 = svwhilewr ((const mfloat8_t *) 0, x1))
> +
> +/*
> +** whilewr_cr_mf8:
> +**   mov     (x[0-9]+), #?1073741824
> +**   whilewr p0\.b, \1, x1
> +**   ret
> +*/
> +TEST_COMPARE_S (whilewr_cr_mf8, const mfloat8_t *,
> +             p0 = svwhilewr_mf8 ((const mfloat8_t *) 1073741824, x1),
> +             p0 = svwhilewr ((const mfloat8_t *) 1073741824, x1))
> +
> +/*
> +** whilewr_r0_mf8:
> +**   whilewr p0\.b, x0, xzr
> +**   ret
> +*/
> +TEST_COMPARE_S (whilewr_r0_mf8, const mfloat8_t *,
> +             p0 = svwhilewr_mf8 (x0, (const mfloat8_t *) 0),
> +             p0 = svwhilewr (x0, (const mfloat8_t *) 0))
> +
> +/*
> +** whilewr_rc_mf8:
> +**   mov     (x[0-9]+), #?1073741824
> +**   whilewr p0\.b, x0, \1
> +**   ret
> +*/
> +TEST_COMPARE_S (whilewr_rc_mf8, const mfloat8_t *,
> +             p0 = svwhilewr_mf8 (x0, (const mfloat8_t *) 1073741824),
> +             p0 = svwhilewr (x0, (const mfloat8_t *) 1073741824))

Re: [PATCH v2 2/4] aarch64: Add basic svmfloat8_t support to arm_sve.h

Reply via email to