On 28/08/18 22:58, James Greenhalgh wrote:
On Tue, Aug 28, 2018 at 03:59:25AM -0500, Vlad Lazar wrote:
Gentle ping.

On 08/08/18 17:38, Vlad Lazar wrote:
On 01/08/18 18:35, James Greenhalgh wrote:
On Wed, Aug 01, 2018 at 07:13:53AM -0500, Vlad Lazar wrote:
On 31/07/18 22:48, James Greenhalgh wrote:
On Fri, Jul 20, 2018 at 04:37:34AM -0500, Vlad Lazar wrote:
Hi,

The patch adds implementations for the NEON intrinsics vabsd_s64 and vnegd_s64.
(https://developer.arm.com/products/architecture/cpu-architecture/a-profile/docs/ihi0073/latest/arm-neon-intrinsics-reference-architecture-specification)

Bootstrapped and regtested on aarch64-none-linux-gnu and there are no 
regressions.

OK for trunk?

+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vnegd_s64 (int64_t __a)
+{
+  return -__a;
+}

Does this give the correct behaviour for the minimum value of int64_t? That
would be undefined behaviour in C, but well-defined under ACLE.

Thanks,
James


Hi. Thanks for the review.

For the minimum value of int64_t it behaves as the ACLE specifies:
"The negative of the minimum (signed) value is itself."

What should happen in this testcase? The spoiler is below, but try to work out
what should happen and what goes wrong with your implementation.

    int foo (int64_t x)
    {
      if (x < (int64_t) 0)
        return vnegd_s64(x) < (int64_t) 0;
      else
        return 0;
    }
    int bar (void)
    {
      return foo (INT64_MIN);
    }
Thanks,
James


-----

<spoiler!>




INT64_MIN < 0 should be true, so we should return vnegd_s64(INT64_MIN) < 0.
vnegd_s64(INT64_MIN) is identity, so the return value should be
INT64_MIN < 0; i.e. True.

This isn't what the compiler thinks... The compiler makes use of the fact
that -INT64_MIN is undefined behaviour in C, and doesn't need to be considered
as a special case. The if statement gives you a range reduction to [-INF, -1],
negating that gives you a range [1, INF], and [1, INF] is never less than 0,
so the compiler folds the function to return false. We have a mismatch in
semantics

I see your point now. I have updated the vnegd_s64 intrinsic to convert to
unsigned before negating. This means that if the predicted range of x is
[INT64_MIN, y], then the predicted range of vnegd_s64 (x) will be
~[INT64_MIN + 1, y] which seems to resolve the issue. I've also added testcases
which reflect the issue you've pointed out. Note that I've change the vabsd_s64
intrinsic in order to avoid moves between integer and vector registers.

I think from my reading of the standard that this is OK, but I may be rusty
and missing a corner case.

OK for trunk.

Thanks,
James

Committed with an obvious change to testsuite/gcc.target/aarch64/vneg_s.c 
testcase:
merged two scan assembler directives which were searching for the same pattern.
See the patch below.

Thanks,
Vlad
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 264018)
+++ ChangeLog	(working copy)
@@ -1,3 +1,8 @@
+2018-08-31  Vlad Lazar  <vlad.la...@arm.com>
+
+	* config/aarch64/arm_neon.h (vabsd_s64): New.
+	(vnegd_s64): Likewise.
+
 2018-08-31  Martin Jambor  <mjam...@suse.cz>
 
 	* ipa-cp.c (estimate_local_effects): Replace wrong MAX with MIN.
Index: config/aarch64/arm_neon.h
===================================================================
--- config/aarch64/arm_neon.h	(revision 264018)
+++ config/aarch64/arm_neon.h	(working copy)
@@ -11822,6 +11822,18 @@
   return __builtin_aarch64_absv2di (__a);
 }
 
+/* Try to avoid moving between integer and vector registers.
+   For why the cast to unsigned is needed check the vnegd_s64 intrinsic.
+   There is a testcase related to this issue:
+   gcc.target/aarch64/vabsd_s64.c.  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vabsd_s64 (int64_t __a)
+{
+  return __a < 0 ? - (uint64_t) __a : __a;
+}
+
 /* vadd */
 
 __extension__ extern __inline int64_t
@@ -22907,6 +22919,25 @@
   return -__a;
 }
 
+/* According to the ACLE, the negative of the minimum (signed)
+   value is itself.  This leads to a semantics mismatch, as this is
+   undefined behaviour in C.  The value range predictor is not
+   aware that the negation of a negative number can still be negative
+   and it may try to fold the expression.  See the test in
+   gcc.target/aarch64/vnegd_s64.c for an example.
+
+   The cast below tricks the value range predictor to include
+   INT64_MIN in the range it computes.  So for x in the range
+   [INT64_MIN, y] the range prediction after vnegd_s64 (x) will
+   be ~[INT64_MIN + 1, y].  */
+
+__extension__ extern __inline int64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vnegd_s64 (int64_t __a)
+{
+  return - (uint64_t) __a;
+}
+
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vnegq_f32 (float32x4_t __a)
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 264018)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,12 @@
+2018-08-31  Vlad Lazar  <vlad.la...@arm.com>
+
+	* gcc.target/aarch64/scalar_intrinsics.c (test_vnegd_s64): New.
+	* gcc.target/aarch64/vneg_s.c (RUN_TEST_SCALAR): New.
+	(test_vnegd_s64): Likewise.
+	* gcc.target/aarch64/vnegd_64.c: New.
+	* gcc.target/aarch64/vabsd_64.c: New.
+	* gcc.tartget/aarch64/vabs_intrinsic_3.c: New.
+
 2018-08-31  Nathan Sidwell  <nat...@acm.org>
 
 	PR c++/87155
Index: testsuite/gcc.target/aarch64/scalar_intrinsics.c
===================================================================
--- testsuite/gcc.target/aarch64/scalar_intrinsics.c	(revision 264018)
+++ testsuite/gcc.target/aarch64/scalar_intrinsics.c	(working copy)
@@ -627,6 +627,14 @@
   return vqabss_s32 (a);
 }
 
+/* { dg-final { scan-assembler-times "\\tneg\\tx\[0-9\]+" 1 } } */
+
+int64_t
+test_vnegd_s64 (int64_t a)
+{
+  return vnegd_s64 (a);
+}
+
 /* { dg-final { scan-assembler-times "\\tsqneg\\tb\[0-9\]+" 1 } } */
 
 int8_t
Index: testsuite/gcc.target/aarch64/vabs_intrinsic_3.c
===================================================================
--- testsuite/gcc.target/aarch64/vabs_intrinsic_3.c	(revision 0)
+++ testsuite/gcc.target/aarch64/vabs_intrinsic_3.c	(working copy)
@@ -0,0 +1,39 @@
+/* Test the vabsd_s64 intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+#define force_simd(V1)   asm volatile ("mov %d0, %1.d[0]"       \
+           : "=w"(V1)                                           \
+           : "w"(V1)                                            \
+           : /* No clobbers */);
+
+#define RUN_TEST(test, answ)   \
+{                                      \
+  force_simd (test);                   \
+  force_simd (answ);                   \
+  int64_t res = vabsd_s64 (test);      \
+  force_simd (res);                    \
+  if (res != answ)                     \
+    abort ();                          \
+}
+
+int64_t input[] = {INT64_MAX, 10, 0, -10, INT64_MIN + 1, INT64_MIN};
+int64_t expected[] = {INT64_MAX, 10, 0, 10, INT64_MAX, INT64_MIN};
+
+int main (void)
+{
+  RUN_TEST (input[0], expected[0]);
+  RUN_TEST (input[1], expected[1]);
+  RUN_TEST (input[2], expected[2]);
+  RUN_TEST (input[3], expected[3]);
+  RUN_TEST (input[4], expected[4]);
+  RUN_TEST (input[5], expected[5]);
+
+  return 0;
+}
Index: testsuite/gcc.target/aarch64/vabsd_s64.c
===================================================================
--- testsuite/gcc.target/aarch64/vabsd_s64.c	(revision 0)
+++ testsuite/gcc.target/aarch64/vabsd_s64.c	(working copy)
@@ -0,0 +1,34 @@
+/* Check that the compiler does not optimise the vabsd_s64 call out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he absolute value of the minimum
+   (signed) value is itself, and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -fno-inline -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+bar (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vabsd_s64 (x) < (int64_t) 0;
+  else
+	return -1;
+}
+
+int
+main (void)
+{
+  int ans = 1;
+  int res_abs = bar (INT64_MIN);
+
+  if (res_abs != ans)
+    abort ();
+
+  return 0;
+}
+
Index: testsuite/gcc.target/aarch64/vneg_s.c
===================================================================
--- testsuite/gcc.target/aarch64/vneg_s.c	(revision 264018)
+++ testsuite/gcc.target/aarch64/vneg_s.c	(working copy)
@@ -75,6 +75,18 @@
       }									\
   }
 
+#define RUN_TEST_SCALAR(test_val, answ_val, a, b)     \
+  {                                                   \
+    int64_t res;                                      \
+    INHIB_OPTIMIZATION;                               \
+    a = test_val;                                     \
+    b = answ_val;                                     \
+    force_simd (b);                                   \
+    force_simd (a);                                   \
+    res = vnegd_s64 (a);                              \
+    force_simd (res);                                 \
+  }
+
 int
 test_vneg_s8 ()
 {
@@ -177,8 +189,25 @@
   return 0;
 }
 
-/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
+int
+test_vnegd_s64 ()
+{
+  int64_t a, b;
 
+  RUN_TEST_SCALAR (TEST0, ANSW0, a, b);
+  RUN_TEST_SCALAR (TEST1, ANSW1, a, b);
+  RUN_TEST_SCALAR (TEST2, ANSW2, a, b);
+  RUN_TEST_SCALAR (TEST3, ANSW3, a, b);
+  RUN_TEST_SCALAR (TEST4, ANSW4, a, b);
+  RUN_TEST_SCALAR (TEST5, ANSW5, a, b);
+  RUN_TEST_SCALAR (LLONG_MAX, LLONG_MIN + 1, a, b);
+  RUN_TEST_SCALAR (LLONG_MIN, LLONG_MIN, a, b);
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 16 } } */
+
 int
 test_vnegq_s8 ()
 {
@@ -283,6 +312,9 @@
   if (test_vneg_s64 ())
     abort ();
 
+  if (test_vnegd_s64 ())
+    abort ();
+
   if (test_vnegq_s8 ())
     abort ();
 
Index: testsuite/gcc.target/aarch64/vnegd_s64.c
===================================================================
--- testsuite/gcc.target/aarch64/vnegd_s64.c	(revision 0)
+++ testsuite/gcc.target/aarch64/vnegd_s64.c	(working copy)
@@ -0,0 +1,36 @@
+/* Check that the compiler does not optimise the negation out.
+   We need to check for this because there is a mismatch in semantics
+   between the ACLE, which states that he negative of the minimum
+   (signed) value is itself and C, where this is undefined behaviour.  */
+
+/* { dg-do run } */
+/* { dg-options "--save-temps -O2" } */
+
+#include <arm_neon.h>
+#include <limits.h>
+
+extern void abort (void);
+
+int
+foo (int64_t x)
+{
+  if (x < (int64_t) 0)
+    return vnegd_s64 (x) < (int64_t) 0;
+  else
+    return -1;
+}
+
+/* { dg-final { scan-assembler-times {neg\tx[0-9]+, x[0-9]+} 1 } } */
+
+int
+main (void)
+{
+  int ans = 1;
+  int res = foo (INT64_MIN);
+
+  if (res != ans)
+    abort ();
+
+  return 0;
+}
+

Reply via email to