Dear Richard, Thanks for the feedback. Great to see this patch approved! I made the changes as suggested. Best, Jennifer
0001-SVE-intrinsics-Add-strength-reduction-for-division-b.patch
Description: Binary data
> On 29 Jul 2024, at 22:55, Richard Sandiford <richard.sandif...@arm.com> wrote: > > External email: Use caution opening links or attachments > > > Thanks for doing this. > > Jennifer Schmitz <jschm...@nvidia.com> writes: >> [...] >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c >> b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c >> index c49ca1aa524..6500b64c41b 100644 >> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c >> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c >> @@ -1,6 +1,9 @@ >> /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ >> >> #include "test_sve_acle.h" >> +#include <stdint.h> >> + > > I think it'd better to drop the explicit include of stdint.h. arm_sve.h > is defined to include stdint.h itself, and we rely on that elsewhere. > > Same for div_s64.c. Done. > >> +#define MAXPOW 1<<30 >> >> /* >> ** div_s32_m_tied1: >> @@ -53,10 +56,27 @@ TEST_UNIFORM_ZX (div_w0_s32_m_untied, svint32_t, int32_t, >> z0 = svdiv_n_s32_m (p0, z1, x0), >> z0 = svdiv_m (p0, z1, x0)) >> >> +/* >> +** div_1_s32_m_tied1: >> +** sel z0\.s, p0, z0\.s, z0\.s >> +** ret >> +*/ >> +TEST_UNIFORM_Z (div_1_s32_m_tied1, svint32_t, >> + z0 = svdiv_n_s32_m (p0, z0, 1), >> + z0 = svdiv_m (p0, z0, 1)) >> + >> +/* >> +** div_1_s32_m_untied: >> +** sel z0\.s, p0, z1\.s, z1\.s >> +** ret >> +*/ >> +TEST_UNIFORM_Z (div_1_s32_m_untied, svint32_t, >> + z0 = svdiv_n_s32_m (p0, z1, 1), >> + z0 = svdiv_m (p0, z1, 1)) >> + > > [ Thanks for adding the tests (which look good to me). If the output > ever improves in future, we can "defend" the improvement by changing > the test. But in the meantime, the above defends something that is > known to work. ] > >> [...] >> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c >> b/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c >> new file mode 100644 >> index 00000000000..1a3c25b817d >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c >> @@ -0,0 +1,91 @@ >> +/* { dg-do run { target aarch64_sve128_hw } } */ >> +/* { dg-options "-O2 -msve-vector-bits=128" } */ >> + >> +#include <arm_sve.h> >> +#include <stdint.h> >> + >> +typedef svbool_t pred __attribute__((arm_sve_vector_bits(128))); >> +typedef svfloat16_t svfloat16_ __attribute__((arm_sve_vector_bits(128))); >> +typedef svfloat32_t svfloat32_ __attribute__((arm_sve_vector_bits(128))); >> +typedef svfloat64_t svfloat64_ __attribute__((arm_sve_vector_bits(128))); >> +typedef svint32_t svint32_ __attribute__((arm_sve_vector_bits(128))); >> +typedef svint64_t svint64_ __attribute__((arm_sve_vector_bits(128))); >> +typedef svuint32_t svuint32_ __attribute__((arm_sve_vector_bits(128))); >> +typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128))); >> + >> +#define F(T, TS, P, OP1, OP2) >> \ >> +{ \ >> + T##_t op1 = (T##_t) OP1; \ >> + T##_t op2 = (T##_t) OP2; \ >> + sv##T##_ res = svdiv_##P (pg, svdup_##TS (op1), svdup_##TS (op2)); \ >> + sv##T##_ exp = svdup_##TS (op1 / op2); \ >> + if (svptest_any (pg, svcmpne (pg, exp, res))) >> \ >> + __builtin_abort (); >> \ >> + \ >> + sv##T##_ res_n = svdiv_##P (pg, svdup_##TS (op1), op2); \ >> + if (svptest_any (pg, svcmpne (pg, exp, res_n))) \ >> + __builtin_abort (); >> \ >> +} >> + >> +#define TEST_TYPES_1(T, TS) \ >> + F (T, TS, m, 79, 16) >> \ >> + F (T, TS, z, 79, 16) >> \ >> + F (T, TS, x, 79, 16) >> + >> +#define TEST_TYPES \ >> + TEST_TYPES_1 (float16, f16) >> \ >> + TEST_TYPES_1 (float32, f32) >> \ >> + TEST_TYPES_1 (float64, f64) >> \ >> + TEST_TYPES_1 (int32, s32) \ >> + TEST_TYPES_1 (int64, s64) \ >> + TEST_TYPES_1 (uint32, u32) \ >> + TEST_TYPES_1 (uint64, u64) >> + >> +#define TEST_VALUES_S_1(B, OP1, OP2) \ >> + F (int##B, s##B, x, OP1, OP2) >> + >> +#define TEST_VALUES_S >> \ >> + TEST_VALUES_S_1 (32, INT32_MIN, INT32_MIN) \ >> + TEST_VALUES_S_1 (64, INT64_MIN, INT64_MIN) \ >> + TEST_VALUES_S_1 (32, -7, 4) >> \ >> + TEST_VALUES_S_1 (64, -7, 4) >> \ >> + TEST_VALUES_S_1 (32, INT32_MAX, (1 << 30)) \ >> + TEST_VALUES_S_1 (64, INT64_MAX, (1ULL << 62)) >> \ >> + TEST_VALUES_S_1 (32, INT32_MIN, (1 << 30)) \ >> + TEST_VALUES_S_1 (64, INT64_MIN, (1ULL << 62)) >> \ >> + TEST_VALUES_S_1 (32, INT32_MAX, 1) \ >> + TEST_VALUES_S_1 (64, INT64_MAX, 1) \ >> + TEST_VALUES_S_1 (32, INT32_MIN, 16) >> \ >> + TEST_VALUES_S_1 (64, INT64_MIN, 16) >> \ >> + TEST_VALUES_S_1 (32, INT32_MAX, -5) >> \ >> + TEST_VALUES_S_1 (64, INT64_MAX, -5) >> \ >> + TEST_VALUES_S_1 (32, INT32_MIN, -4) >> \ >> + TEST_VALUES_S_1 (64, INT64_MIN, -4) >> + >> +#define TEST_VALUES_U_1(B, OP1, OP2) \ >> + F (uint##B, u##B, x, OP1, OP2) >> + >> +#define TEST_VALUES_U >> \ >> + TEST_VALUES_U_1 (32, UINT32_MAX, UINT32_MAX) >> \ >> + TEST_VALUES_U_1 (64, UINT64_MAX, UINT64_MAX) >> \ >> + TEST_VALUES_U_1 (32, UINT32_MAX, (1 << 31)) >> \ >> + TEST_VALUES_U_1 (64, UINT64_MAX, (1ULL << 63)) \ >> + TEST_VALUES_U_1 (32, 7, 4) \ >> + TEST_VALUES_U_1 (64, 7, 4) \ >> + TEST_VALUES_U_1 (32, 7, 3) \ >> + TEST_VALUES_U_1 (64, 7, 3) \ >> + TEST_VALUES_U_1 (32, 11, 1) >> \ >> + TEST_VALUES_U_1 (64, 11, 1) >> + >> +#define TEST_VALUES \ >> + TEST_VALUES_S >> \ >> + TEST_VALUES_U >> + >> +int >> +main (void) >> +{ >> + const pred pg = svptrue_b64 (); > > I think this should svptrue_b8 instead. As it stands, the: > > if (svptest_any (pg, svcmpne (pg, ...))) > __builtin_abort (); > > tests will only check the first element in each 64-bit chunk. Done. > > OK with those changes from my POV, but please give others 24 hours > to comment. > > Thanks, > Richard > >> + TEST_TYPES >> + TEST_VALUES >> + return 0; >> +}
smime.p7s
Description: S/MIME cryptographic signature