[PATCH][AArch64] Vneg NEON intrinsics modified

2013-10-08 Thread Alex Velenko

Hi,

This patch implements the behavior of the following
neon intrinsics using C:
vneg[q]_f[32,64]
vneg[q]_s[8,16,32,64]

Regression tests for listed intrinsics included.
I ran a full regression test for aarch64-none-elf
with no regressions.

Ok?

Thanks,
Alex

gcc/testsuite/

2013-10-08  Alex Velenko  

* gcc.target/aarch64/vneg_f.c: New testcase.
* gcc.target/aarch64/vneg_s.c: New testcase.

gcc/

2013-10-08  Alex Velenko  

* config/aarch64/arm_neon.h (vneg_f32): Asm replaced with C.
(vneg_f64): New intrinsic.
(vneg_s8): Asm replaced with C.
(vneg_s16): Likewise.
(vneg_s32): Likewise.
(vneg_s64): New intrinsic.
(vnegq_f32): Asm replaced with C.
(vnegq_f64): Likewise.
(vnegq_s8): Likewise.
(vnegq_s16): Likewise.
(vnegq_s32): Likewise.
(vnegq_s64): Likewise.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index cb5860206a1812f347a77d4a6e06519f8c3a696f..1bd098d2a9c3a204c0fb57ee3ef31cbb5f328d8e 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -9785,115 +9785,6 @@ vmvnq_u32 (uint32x4_t a)
   return result;
 }
 
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vneg_f32 (float32x2_t a)
-{
-  float32x2_t result;
-  __asm__ ("fneg %0.2s,%1.2s"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vneg_s8 (int8x8_t a)
-{
-  int8x8_t result;
-  __asm__ ("neg %0.8b,%1.8b"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vneg_s16 (int16x4_t a)
-{
-  int16x4_t result;
-  __asm__ ("neg %0.4h,%1.4h"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vneg_s32 (int32x2_t a)
-{
-  int32x2_t result;
-  __asm__ ("neg %0.2s,%1.2s"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vnegq_f32 (float32x4_t a)
-{
-  float32x4_t result;
-  __asm__ ("fneg %0.4s,%1.4s"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vnegq_f64 (float64x2_t a)
-{
-  float64x2_t result;
-  __asm__ ("fneg %0.2d,%1.2d"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vnegq_s8 (int8x16_t a)
-{
-  int8x16_t result;
-  __asm__ ("neg %0.16b,%1.16b"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vnegq_s16 (int16x8_t a)
-{
-  int16x8_t result;
-  __asm__ ("neg %0.8h,%1.8h"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vnegq_s32 (int32x4_t a)
-{
-  int32x4_t result;
-  __asm__ ("neg %0.4s,%1.4s"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vnegq_s64 (int64x2_t a)
-{
-  int64x2_t result;
-  __asm__ ("neg %0.2d,%1.2d"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vpadal_s8 (int16x4_t a, int8x8_t b)
@@ -21241,6 +21132,80 @@ vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
   return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
 }
 
+/* vneg  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vneg_f32 (float32x2_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vneg_f64 (float64x1_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vneg_s8 (int8x8_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vneg_s16 (int16x4_t __a)
+{
+  return -__a;
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+v

[PATCH][AARCH64] Vdiv NEON intrinsic

2013-10-08 Thread Alex Velenko

Hi,

This patch implements the behavior of vdiv_f64 intrinsic
and adds regression tests for vdiv[q]_f[32,64] NEON intrinsics.

Full aarch64-none-elf regression test ran with no regressions.

Is it OK?

Thanks,
Alex

gcc/testsuite/

2013-09-10  Alex Velenko  

* gcc.target/aarch64/vdiv_f.c: New testcase.

gcc/

2013-09-10  Alex Velenko  

* config/aarch64/arm_neon.h (vdiv_f64): Added.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index b8791b7b5dd7123b6d708aeb2321986673a0c0cd..db9bf28227e87072b48f5dca8835be8007c6b93d 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -1210,6 +1210,12 @@ vdiv_f32 (float32x2_t __a, float32x2_t __b)
   return __a / __b;
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vdiv_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return __a / __b;
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vdivq_f32 (float32x4_t __a, float32x4_t __b)
 {
diff --git a/gcc/testsuite/gcc.target/aarch64/vdiv_f.c b/gcc/testsuite/gcc.target/aarch64/vdiv_f.c
new file mode 100644
index ..cc3a9570c0fac0dcbf38f38314a416cca5e58c6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdiv_f.c
@@ -0,0 +1,361 @@
+/* Test vdiv works correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include 
+
+#define FLT_INFINITY (__builtin_inff ())
+#define DBL_INFINITY (__builtin_inf ())
+
+#define NAN (0.0 / 0.0)
+
+#define PI 3.141592653589793
+#define PI_4 0.7853981633974483
+#define SQRT2 1.4142135623730951
+#define SQRT1_2 0.7071067811865475
+
+#define TESTA0 PI
+#define TESTA1 -PI
+#define TESTA2 PI
+#define TESTA3 -PI
+#define TESTA4 1.0
+#define TESTA5 -1.0
+#define TESTA6 1.0
+#define TESTA7 -1.0
+/* 2^25+1, float has 24 significand bits
+   according to Single-precision floating-point format.  */
+#define TESTA8_FLT 33554433
+/* 2^54+1, double has 53 significand bits
+   according to Double-precision floating-point format.  */
+#define TESTA8_DBL 18014398509481985
+#define TESTA9 -TESTA8
+#define TESTA10 TESTA8
+#define TESTA11 -TESTA8
+#define TESTA12 NAN
+#define TESTA13 1.0
+#define TESTA14 INFINITY
+#define TESTA15 -INFINITY
+#define TESTA16 INFINITY
+#define TESTA17 9.0
+#define TESTA18 11.0
+#define TESTA19 13.0
+
+#define TESTB0 4.0
+#define TESTB1 4.0
+#define TESTB2 -4.0
+#define TESTB3 -4.0
+#define TESTB4 SQRT2
+#define TESTB5 SQRT2
+#define TESTB6 -SQRT2
+#define TESTB7 -SQRT2
+#define TESTB8 2.0
+#define TESTB9 2.0
+#define TESTB10 -2.0
+#define TESTB11 -2.0
+#define TESTB12 3.0
+#define TESTB13 NAN
+#define TESTB14 5.0
+#define TESTB15 7.0
+#define TESTB16 INFINITY
+#define TESTB17 INFINITY
+#define TESTB18 -INFINITY
+#define TESTB19 0
+
+#define ANSW0 PI_4
+#define ANSW1 -PI_4
+#define ANSW2 -PI_4
+#define ANSW3 PI_4
+#define ANSW4 SQRT1_2
+#define ANSW5 -SQRT1_2
+#define ANSW6 -SQRT1_2
+#define ANSW7 SQRT1_2
+#define ANSW8_FLT 16777216
+#define ANSW8_DBL 9007199254740992
+#define ANSW9 -ANSW8
+#define ANSW10 -ANSW8
+#define ANSW11 ANSW8
+#define ANSW12 NAN
+#define ANSW13 NAN
+#define ANSW14 INFINITY
+#define ANSW15 -INFINITY
+#define ANSW16 NAN
+#define ANSW17 0
+#define ANSW18 0
+#define ANSW19 INFINITY
+
+#define CONCAT(a, b) a##b
+#define CONCAT1(a, b) CONCAT (a, b)
+#define REG_INFEX64 _
+#define REG_INFEX128 q_
+#define REG_INFEX(reg_len) REG_INFEX##reg_len
+#define POSTFIX(reg_len, data_len) \
+  CONCAT1 (REG_INFEX (reg_len), f##data_len)
+
+#define DATA_TYPE_32 float
+#define DATA_TYPE_64 double
+#define DATA_TYPE(data_len) DATA_TYPE_##data_len
+
+#define EPSILON_32 __FLT_EPSILON__
+#define EPSILON_64 __DBL_EPSILON__
+#define EPSILON(data_len) EPSILON_##data_len
+
+#define INDEX64_32 [i]
+#define INDEX64_64
+#define INDEX128_32 [i]
+#define INDEX128_64 [i]
+#define INDEX(reg_len, data_len) \
+  CONCAT1 (INDEX, reg_len##_##data_len)
+
+#define LOAD_INST(reg_len, data_len) \
+  CONCAT1 (vld1, POSTFIX (reg_len, data_len))
+#define DIV_INST(reg_len, data_len) \
+  CONCAT1 (vdiv, POSTFIX (reg_len, data_len))
+
+#define ABS(a) __builtin_fabs (a)
+#define ISNAN(a) __builtin_isnan (a)
+#define FP_equals(a, b, epsilon)			\
+  (			\
+   ((a) == (b))		\
+|| (ISNAN (a) && ISNAN (b))\
+|| (ABS (a - b) < epsilon)\
+  )
+
+#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
+
+#define RUN_TEST(a, b, c, testseta, testsetb, answset, count,		\
+		 reg_len, data_len, n)	\
+{	\
+  int i;\
+  INHIB_OPTIMIZATION;			\
+  (a) = LOAD_INST (reg_len, data_len) (testseta[count]);		\
+  (b) = LOAD_INST (reg_len, data_len) (testsetb[count]);		\
+  (c) = LOAD_INST (reg_len, data_len) (answset[count]);			\
+  INHIB_OPTIMIZATION;			\
+  (a) = DIV_INST (reg_len, data_len) (a, b);\
+  for (i = 0; i < n; i++)		\
+  {	\
+INHIB_OPTIMIZATION;			\
+if (!FP_equals ((a) INDEX (reg_l

[PATCH][AArch64] NEON vadd_f64 and vsub_f64 intrinsics modified

2013-10-08 Thread Alex Velenko


Hi,

This patch implements the behavior of vadd_f64 and
vsub_f64 NEON intrinsics. Regression tests are added.
Regression tests for aarch64-none-elf completed with no
regressions.

OK?

Thanks,
Alex

gcc/testsuite/

2013-10-08  Alex Velenko  

* gcc.target/aarch64/vadd_f64.c: New testcase.
* gcc.target/aarch64/vsub_f64.c: New testcase.

gcc/

2013-10-08  Alex Velenko  

* config/aarch64/arm_neon.h (vadd_f64): Implementation added.
(vsub_f64): Likewise.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 1bd098d2a9c3a204c0fb57ee3ef31cbb5f328d8e..b8791b7b5dd7123b6d708aeb2321986673a0c0cd 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -634,6 +634,12 @@ vadd_f32 (float32x2_t __a, float32x2_t __b)
   return __a + __b;
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vadd_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return __a + __b;
+}
+
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
 {
@@ -1824,6 +1830,12 @@ vsub_f32 (float32x2_t __a, float32x2_t __b)
   return __a - __b;
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vsub_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return __a - __b;
+}
+
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
 {
diff --git a/gcc/testsuite/gcc.target/aarch64/vadd_f64.c b/gcc/testsuite/gcc.target/aarch64/vadd_f64.c
new file mode 100644
index ..c3bf7349597aa9b75e0bc34cfd4cde4dc16b95f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vadd_f64.c
@@ -0,0 +1,114 @@
+/* Test vadd works correctly.  */
+/* { dg-do run } */
+/* { dg-options "--save-temps" } */
+
+#include 
+
+#define FLT_EPSILON __FLT_EPSILON__
+#define DBL_EPSILON __DBL_EPSILON__
+
+#define TESTA0 0.3
+#define TESTA1 -1.
+#define TESTA2 0
+#define TESTA3 1.23456
+/* 2^54, double has 53 significand bits
+   according to Double-precision floating-point format.  */
+#define TESTA4 18014398509481984
+#define TESTA5 (1.0 / TESTA4)
+
+#define TESTB0 0.7
+#define TESTB1 2
+#define TESTB2 0
+#define TESTB3 -2
+#define TESTB4 1.0
+#define TESTB5 2.0
+
+#define ANSW0 1
+#define ANSW1 0.2223
+#define ANSW2 0
+#define ANSW3 -0.76544
+#define ANSW4 TESTA4
+#define ANSW5 2.0
+
+extern void abort (void);
+
+#define EPSILON __DBL_EPSILON__
+#define ABS(a) __builtin_fabs (a)
+#define ISNAN(a) __builtin_isnan (a)
+#define FP_equals(a, b, epsilon)			\
+  (			\
+   ((a) == (b))		\
+|| (ISNAN (a) && ISNAN (b))\
+|| (ABS (a - b) < epsilon)\
+   )
+
+int
+test_vadd_f64 ()
+{
+  float64x1_t a;
+  float64x1_t b;
+  float64x1_t c;
+
+  a = TESTA0;
+  b = TESTB0;
+  c = ANSW0;
+
+  a = vadd_f64 (a, b);
+  if (!FP_equals (a, c, EPSILON))
+return 1;
+
+  a = TESTA1;
+  b = TESTB1;
+  c = ANSW1;
+
+  a = vadd_f64 (a, b);
+  if (!FP_equals (a, c, EPSILON))
+return 1;
+
+  a = TESTA2;
+  b = TESTB2;
+  c = ANSW2;
+
+  a = vadd_f64 (a, b);
+  if (!FP_equals (a, c, EPSILON))
+return 1;
+
+  a = TESTA3;
+  b = TESTB3;
+  c = ANSW3;
+
+  a = vadd_f64 (a, b);
+  if (!FP_equals (a, c, EPSILON))
+return 1;
+
+  a = TESTA4;
+  b = TESTB4;
+  c = ANSW4;
+
+  a = vadd_f64 (a, b);
+  if (!FP_equals (a, c, EPSILON))
+return 1;
+
+  a = TESTA5;
+  b = TESTB5;
+  c = ANSW5;
+
+  a = vadd_f64 (a, b);
+  if (!FP_equals (a, c, EPSILON))
+return 1;
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "fadd\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 6 } } */
+
+int
+main (int argc, char **argv)
+{
+  if (test_vadd_f64 ())
+abort ();
+
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vsub_f64.c b/gcc/testsuite/gcc.target/aarch64/vsub_f64.c
new file mode 100644
index ..abf4fc42d49dc695f435b1e0f331737c8e9367b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vsub_f64.c
@@ -0,0 +1,116 @@
+/* Test vsub works correctly.  */
+/* { dg-do run } */
+/* { dg-options "--save-temps" } */
+
+#include 
+
+#define FLT_EPSILON __FLT_EPSILON__
+#define DBL_EPSILON __DBL_EPSILON__
+
+#define TESTA0 1
+#define TESTA1 0.2223
+#define TESTA2 0
+#define TESTA3 -0.76544
+/* 2^54, double has 53 significand bits
+   according to Double-precision floating-point format.  */
+#define TESTA4 18014398509481984
+#define TESTA5 2.0
+
+#define TESTB0 0.7
+#define TESTB1 2
+#define TESTB2 0
+#define TESTB3 -2
+#define TESTB4 1.0
+#define TESTB5 (1.0 / TESTA4)
+
+#define ANSW0 0.3
+#define ANSW1 -1.
+#define ANSW2 0
+#define ANSW3 1.23456
+#define ANSW4 TESTA4
+#define ANSW5 2.0
+
+extern void abort (void);
+
+#define EPSILON __DBL_EPSILON__
+#define ISNAN(a) __builtin_isnan (a)
+/* FP_equals is implemented like this to execute subtraction

[PATCH][AArch64] NEON vclz intrinsic modified

2013-10-08 Thread Alex Velenko

Hi,

This patch implements the behavior and regression
test for NEON intrinsics vclz[q]_[s,u][8,16,32]
No problems found when running aarch64-none-elf
regressions tests.

Is patch OK?

Thanks,
Alex

gcc/testsuite/

2013-10-08  Alex Velenko  

* gcc.target/aarch64/vclz.c: New testcase.

gcc/

2013-10-08  Alex Velenko  

* config/aarch64/arm_neon.h (vclz_s8): Asm replaced with C
  (vclz_s16): Likewise.
  (vclz_s32): Likewise.
  (vclzq_s8): Likewise.
  (vclzq_s16): Likewise.
  (vclzq_s32): Likewise.
  (vclz_u8): Likewise.
  (vclz_u16): Likewise.
  (vclz_u32): Likewise.
  (vclzq_u8): Likewise.
  (vclzq_u16): Likewise.
  (vclzq_u32): Likewise.

* config/aarch64/aarch64.h (CLZ_DEFINED_VALUE_AT_ZERO): 
Macro fixed for clz.


* config/aarch64/aarch64-simd-builtins.def (VAR1 (UNOP, 
clz, 0, v4si)): Replaced with iterator.
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 35897f3939556d7bb804d4b4ae692a300b103681..c18b150a1f5f2131deb54e3f66f93330c43bcefd 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -45,7 +45,7 @@
   BUILTIN_VDQF (UNOP, sqrt, 2)
   BUILTIN_VD_BHSI (BINOP, addp, 0)
   VAR1 (UNOP, addp, 0, di)
-  VAR1 (UNOP, clz, 2, v4si)
+  BUILTIN_VDQ_BHSI (UNOP, clz, 2)
 
   BUILTIN_VALL (GETLANE, get_lane, 0)
   VAR1 (GETLANE, get_lane, 0, di)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index da2b46d14cf02814f93aeda1535461c242174aae..7a80e96385f935e032bc0421d1aeea52de7bcd1d 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -739,7 +739,7 @@ do {	 \
: reverse_condition (CODE))
 
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
-  ((VALUE) = ((MODE) == SImode ? 32 : 64), 2)
+  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE))
 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
   ((VALUE) = ((MODE) == SImode ? 32 : 64), 2)
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index db9bf28227e87072b48f5dca8835be8007c6b93d..482d7d03ed4995d46bef14a0c2c42903aafc6986 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -5158,138 +5158,6 @@ vclsq_s32 (int32x4_t a)
   return result;
 }
 
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vclz_s8 (int8x8_t a)
-{
-  int8x8_t result;
-  __asm__ ("clz %0.8b,%1.8b"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vclz_s16 (int16x4_t a)
-{
-  int16x4_t result;
-  __asm__ ("clz %0.4h,%1.4h"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vclz_s32 (int32x2_t a)
-{
-  int32x2_t result;
-  __asm__ ("clz %0.2s,%1.2s"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vclz_u8 (uint8x8_t a)
-{
-  uint8x8_t result;
-  __asm__ ("clz %0.8b,%1.8b"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vclz_u16 (uint16x4_t a)
-{
-  uint16x4_t result;
-  __asm__ ("clz %0.4h,%1.4h"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vclz_u32 (uint32x2_t a)
-{
-  uint32x2_t result;
-  __asm__ ("clz %0.2s,%1.2s"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vclzq_s8 (int8x16_t a)
-{
-  int8x16_t result;
-  __asm__ ("clz %0.16b,%1.16b"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vclzq_s16 (int16x8_t a)
-{
-  int16x8_t result;
-  __asm__ ("clz %0.8h,%1.8h"
-   : "=w"(result)
-   : "w"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vclzq_s32 (int32x4_t a)
-{
-  int32x4_t result;
-  __asm__ ("clz %0.4s,%1.4s"
-   : "=w"(result)
-   : "w&qu

[PATCH][AArch64] Vector shift by 64 fix

2014-01-06 Thread Alex Velenko

Hi,

This patch fixes vector shift by 64 behavior to meet reference
manual expectations. Testcase included to check that expectations
are now met. No regressions found.

Is patch OK?

Thanks,
Alex

2014-01-06  Alex Velenko  

gcc/

* config/aarch64/aarch64-simd-builtins.def (ashr): DI mode removed.
(ashr_simd): New builtin handling DI mode.
* config/aarch64/aarch64-simd.md (aarch64_ashr_simddi): New pattern.
(aarch64_sshr_simddi): New match pattern.
* config/aarch64/arm_neon.h (vshr_n_s32): Builtin call modified.
(vshrd_n_s64): Likewise.
* config/aarch64/predicates.md (aarch64_shift_imm64_di): New predicate.

gcc/testsuite/

* gcc.target/aarch64/sshr64_1.c: New testcase.
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 1dc3c1fe33fdb8148d2ff9c7198e4d85d5dac5d7..1e88661fd2f0f756ce1427681c843fc0783ab6a2 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -189,7 +189,8 @@
   BUILTIN_VSDQ_I_DI (BINOP, srshl, 0)
   BUILTIN_VSDQ_I_DI (BINOP, urshl, 0)
 
-  BUILTIN_VSDQ_I_DI (SHIFTIMM, ashr, 3)
+  BUILTIN_VDQ_I (SHIFTIMM, ashr, 3)
+  VAR1 (SHIFTIMM, ashr_simd, 0, di)
   BUILTIN_VSDQ_I_DI (SHIFTIMM, lshr, 3)
   /* Implemented by aarch64_shr_n.  */
   BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 158b3dca6da12322de0af80d35f593039d716de6..839186a5e3e3363973186d68aeed6fbaf7f0dfea 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -668,6 +668,32 @@
   DONE;
 })
 
+;; DI vector shift
+(define_expand "aarch64_ashr_simddi"
+  [(match_operand:DI 0 "register_operand" "=w")
+   (match_operand:DI 1 "register_operand" "w")
+   (match_operand:QI 2 "aarch64_shift_imm64_di" "")]
+  "TARGET_SIMD"
+  {
+if (INTVAL (operands[2]) == 64)
+  emit_insn (gen_aarch64_sshr_simddi (operands[0], operands[1]));
+else
+  emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
+DONE;
+  }
+)
+
+;; SIMD shift by 64.  This pattern is a special case as standard pattern does
+;; not handle NEON shifts by 64.
+(define_insn "aarch64_sshr_simddi"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+(unspec:DI
+  [(match_operand:DI 1 "register_operand" "w")] UNSPEC_SSHR64))]
+  "TARGET_SIMD"
+  "sshr\t%d0, %d1, 64"
+  [(set_attr "type" "neon_shift_imm")]
+)
+
 (define_expand "vlshr3"
  [(match_operand:VQ_S 0 "register_operand" "")
   (match_operand:VQ_S 1 "register_operand" "")
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 8b3dbd7550e8e9037de1a1384276bee28d21cb3d..130a11c0231c32440573276fd78e62b6f019d302 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -92,6 +92,7 @@
 UNSPEC_SISD_SSHL
 UNSPEC_SISD_USHL
 UNSPEC_SSHL_2S
+UNSPEC_SSHR64
 UNSPEC_ST2
 UNSPEC_ST3
 UNSPEC_ST4
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 03549bd7a27cccb14ed8cdce91cbd4e4278c273f..64012775b3fa7d174af1472f73aadf4174d0d291 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -23235,7 +23235,7 @@ vshr_n_s32 (int32x2_t __a, const int __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vshr_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
+  return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -23313,7 +23313,7 @@ vshrq_n_u64 (uint64x2_t __a, const int __b)
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
 vshrd_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
+  return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index dbc90826665d19a6ac6131918efb2c8a32bd1f04..9538107a5c148408f5c6e8e37aeef92aa5be0856 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -86,6 +86,10 @@
   (and (match_code "const_int")
(match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 64")))
 
+(define_predicate "aarch64_shift_imm64_di"
+  (and (match_code "const_int")
+   (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) <= 64")))
+
 (define_predicate "aarch64_reg_or_shift_imm_si"
   (ior (match_operand 0 "register_operand")
(match_opera

Re: [Patch AArch64] Implement Vector Permute Support

2014-01-14 Thread Alex Velenko

Hi,

This patch turns off the vec_perm patterns for aarch64_be, this should 
resolve
the issue  highlighted here 
http://gcc.gnu.org/ml/gcc-patches/2014-01/msg00321.html
With this patch applied, the test case provided in that link compiles 
without an ICE.


However, the Big-Endian port is still in development. This patch exposes
another known but unrelated issue with Big-Endian Large-Int modes.

The patch has been tested on aarch64-none-elf and aarch64_be-none-elf 
resulting in five
further regression due to the broken implementation of Big-Endian 
Large-Int modes.


Kind regards,
Alex Velenko

gcc/

2014-01-14  Alex Velenko  

* config/aarch64/aarch64-simd.md (vec_perm): Add BE check.
* config/aarch64/aarch64.c (aarch64_expand_vec_perm): Add comment.

gcc/testsuite/

2014-01-14  Alex Velenko  

* lib/target-supports.exp
(check_effective_target_vect_perm): Exclude aarch64_be.
(check_effective_target_vect_perm_byte): Likewise.
(check_effective_target_vect_perm_short): Likewise.

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index bc47a291de4b9b24d829e4dbf060fff7a321558f..43a9c5b27d78a47cf965636a03232005a4c8e7c3 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3840,7 +3840,7 @@
(match_operand:VB 1 "register_operand")
(match_operand:VB 2 "register_operand")
(match_operand:VB 3 "register_operand")]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
 {
   aarch64_expand_vec_perm (operands[0], operands[1],
 			   operands[2], operands[3]);
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 95360089b89d5fef2997dc6dbe7f47a6864143ea..084668af5124aa1c4a7f25495cf44b52811d0e62 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3417,7 +3417,8 @@ proc check_effective_target_vect_perm { } {
 } else {
 set et_vect_perm_saved 0
 if { [is-effective-target arm_neon_ok]
-	 || [istarget aarch64*-*-*]
+	 || ([istarget aarch64*-*-*]
+		 && ![istarget aarch64_be*-*-*])
 	 || [istarget powerpc*-*-*]
  || [istarget spu-*-*]
 	 || [istarget i?86-*-*]
@@ -3445,7 +3446,8 @@ proc check_effective_target_vect_perm_byte { } {
 set et_vect_perm_byte_saved 0
 if { ([is-effective-target arm_neon_ok]
 	  && [is-effective-target arm_little_endian])
-	 || [istarget aarch64*-*-*]
+	 || ([istarget aarch64*-*-*]
+		 && ![istarget aarch64_be*-*-*])
 	 || [istarget powerpc*-*-*]
  || [istarget spu-*-*] } {
 set et_vect_perm_byte_saved 1
@@ -3469,7 +3471,8 @@ proc check_effective_target_vect_perm_short { } {
 set et_vect_perm_short_saved 0
 if { ([is-effective-target arm_neon_ok]
 	  && [is-effective-target arm_little_endian])
-	 || [istarget aarch64*-*-*]
+	 || ([istarget aarch64*-*-*]
+		 && ![istarget aarch64_be*-*-*])
 	 || [istarget powerpc*-*-*]
  || [istarget spu-*-*] } {
 set et_vect_perm_short_saved 1


Re: Re: Re: [PATCH] [PATCH][ARM] Fix split-live-ranges-for-shrink-wrap.c testcase.

2015-07-21 Thread Alex Velenko

On 25/06/15 14:35, Ramana Radhakrishnan wrote:

On Mon, Jun 22, 2015 at 5:56 PM, Alex Velenko  wrote:

On 20/05/15 21:14, Joseph Myers wrote:


Again, the condition you propose to add doesn't make sense.  arm_arch_X_ok
is only appropriate for tests using an explicit -march=X.  Testing with
-march=armv7* should automatically skip this test anyway because it would
cause arm_thumb1_ok to fail.



Hi,

I adjusted the patch to skip execution split-live-ranges-for-shrink-wrap.c
with explicitly specified -march=armv4t and provide -march=armv5t flag =
for
arm_arch_v5t_ok targets.

Is patch ok?

Alex

gcc/testsuite

2015-06-22  Alex Velenko  

  * gcc.target/arm/split-live-ranges-for-shrink-wrap.c (dg-skip-if):
 Skip -march=armv4t.
 (dg-additional-options): Set armv5t flag.

diff --git
a/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c
b/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c
index e36000b..3cb93dc 100644
--- a/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c
+++ b/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c
@@ -1,6 +1,8 @@
   /* { dg-do assemble } */
   /* { dg-options "-mthumb -Os -fdump-rtl-ira " }  */
   /* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "do not test on armv4t" { *-*-* } { "-march=armv4t" } } =
*/
+/* { dg-additional-options "-march=armv5t" {target arm_arch_v5t_ok} } */

   int foo (char *, char *, int);
   int test (int d, char * out, char *in, int len)



OK - please watch out for any multilibs fallout and apply this.

Ramana


--1.8.1.2--




Committed to trunk r226036.
Is patch ok for fsf-5?
kind regards,
Alex



[PATCH][ARM] Fix thumb-bitfld1.c testcase.

2015-07-21 Thread Alex Velenko
Hi,

This patch fixes testcase thumb-bitfld1.c to be compiled without specifying C
standard.

Is patch ok for trunk and fsf-5?

gcc/testsuite

2015-07-21  Alex Velenko  

* gcc.target/arm/thumb-bitfld1.c (foo): Return type fixed.
---
 gcc/testsuite/gcc.target/arm/thumb-bitfld1.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/thumb-bitfld1.c 
b/gcc/testsuite/gcc.target/arm/thumb-bitfld1.c
index ee39887..37630f1 100644
--- a/gcc/testsuite/gcc.target/arm/thumb-bitfld1.c
+++ b/gcc/testsuite/gcc.target/arm/thumb-bitfld1.c
@@ -10,6 +10,8 @@ struct foo
   unsigned b28 : 1;
   unsigned rest : 28;
 };
+
+unsigned
 foo(a)
  struct foo a;
 {
-- 
1.8.1.2



Re: [PATCH][ARM] Fix thumb-bitfld1.c testcase.

2015-07-21 Thread Alex Velenko



On 21/07/15 15:05, Kyrill Tkachov wrote:


On 21/07/15 15:04, Kyrill Tkachov wrote:

On 21/07/15 14:38, Alex Velenko wrote:

Hi,

This patch fixes testcase thumb-bitfld1.c to be compiled without specifying C
standard.

Is patch ok for trunk and fsf-5?

gcc/testsuite

2015-07-21  Alex Velenko  

   * gcc.target/arm/thumb-bitfld1.c (foo): Return type fixed.

Better to say:

*gcc.target/arm/thumb-bitfld1.c (foo): Add explicit return type.


With a space between the '*' and the filename, of course.

Kyrill



Ok with that ChangeLog entry.
Thanks,
Kyrill

Committed on trunk r226043 and backported to fsf-5 r226044
Kind regards,
Alex





---
gcc/testsuite/gcc.target/arm/thumb-bitfld1.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/thumb-bitfld1.c 
b/gcc/testsuite/gcc.target/arm/thumb-bitfld1.c
index ee39887..37630f1 100644
--- a/gcc/testsuite/gcc.target/arm/thumb-bitfld1.c
+++ b/gcc/testsuite/gcc.target/arm/thumb-bitfld1.c
@@ -10,6 +10,8 @@ struct foo
  unsigned b28 : 1;
  unsigned rest : 28;
};
+
+unsigned
foo(a)
 struct foo a;
{






[PATCH] [PATCH][ARM] Fix pr63210.c testcase.

2015-07-23 Thread Alex Velenko
Hi,

This patch prevents testcase pr63210.c from running with -march=armv4t.
Object size check should be skipped with explicit -march=armv4t, because
expected size is only correct using pop pc instruction which is unsafe for
armv4t. For arm_arch_v5t_ok cases, an explicit -march=armv5t flag is set.

Is patch ok for trunk and fsf-5?

gcc/testsuite

2015-07-23  Alex Velenko  

* gcc.target/arm/pr63210.c (dg-skip-if): Skip armv4t.
(dg-additional-options): Add -march=armv5t if arm_arch_v5t_ok.
---
 gcc/testsuite/gcc.target/arm/pr63210.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/pr63210.c 
b/gcc/testsuite/gcc.target/arm/pr63210.c
index c3ae928..9b63a67 100644
--- a/gcc/testsuite/gcc.target/arm/pr63210.c
+++ b/gcc/testsuite/gcc.target/arm/pr63210.c
@@ -1,6 +1,8 @@
 /* { dg-do assemble } */
 /* { dg-options "-mthumb -Os " }  */
 /* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "do not test on armv4t" { *-*-* } { "-march=armv4t" } } */
+/* { dg-additional-options "-march=armv5t" {target arm_arch_v5t_ok} } */
 
 int foo1 (int c);
 int foo2 (int c);
-- 
1.8.1.2



Re: Re: [PATCH] [PATCH][ARM] Fix sibcall testcases.

2015-07-28 Thread Alex Velenko

Hi,

Following last patch, this patch patch prevents arm_thumb1 XPASS in
sibcall-3.c and sibcall-4.c by skipping on arm_thumb1 and arm_thumb2
respectively.
This patch also documents arm_thumb1 and arm_thumb2 effective target 
options.


Is patch ok for trunk and fsf-5?

gcc/testsuite

2015-07-28  Alex Velenko  

* gcc.dg/sibcall-3.c (dg-skip-if): Skip if arm_thumb1.
* gcc.dg/sibcall-4.c (dg-skip-if): Likewise.

gcc/

2015-07-28  Alex Velenko  

* doc/sourcebuild.texi (arm_thumb1): Documented.
(arm-thumb2): Likewise.
---
 gcc/doc/sourcebuild.texi | 8 
 gcc/testsuite/gcc.dg/sibcall-3.c | 1 +
 gcc/testsuite/gcc.dg/sibcall-4.c | 1 +
 3 files changed, 10 insertions(+)

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index c6ef40e..ca42a09 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1551,6 +1551,14 @@ options.  Some multilibs may be incompatible with 
these options.
 ARM Target supports @code{-mfpu=neon-fp16 -mfloat-abi=softfp} or 
compatible

 options.  Some multilibs may be incompatible with these options.

+@item arm_thumb1
+ARM target interworks with Thumb-1 - given @code{-mthumb-interwork} 
both ARM and

+Thumb code may be generated interleaved.
+
+@item arm_thumb2
+ARM target interworks with Thumb-2 - given @code{-mthumb-interwork} 
both ARM and

+Thumb code may be generated interleaved.
+
 @item arm_thumb1_ok
 ARM target generates Thumb-1 code for @code{-mthumb}.

diff --git a/gcc/testsuite/gcc.dg/sibcall-3.c 
b/gcc/testsuite/gcc.dg/sibcall-3.c

index eafe8dd..e44596e 100644
--- a/gcc/testsuite/gcc.dg/sibcall-3.c
+++ b/gcc/testsuite/gcc.dg/sibcall-3.c
@@ -8,6 +8,7 @@
 /* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* 
m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* 
v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */

 /* -mlongcall disables sibcall patterns.  */
 /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
+/* { dg-skip-if "" { arm_thumb1 } } */
 /* { dg-options "-O2 -foptimize-sibling-calls" } */

 /* The option -foptimize-sibling-calls is the default, but serves as
diff --git a/gcc/testsuite/gcc.dg/sibcall-4.c 
b/gcc/testsuite/gcc.dg/sibcall-4.c

index 1e039c6..5c69490 100644
--- a/gcc/testsuite/gcc.dg/sibcall-4.c
+++ b/gcc/testsuite/gcc.dg/sibcall-4.c
@@ -8,6 +8,7 @@
 /* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* 
m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* 
v850*-*-* vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */

 /* -mlongcall disables sibcall patterns.  */
 /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
+/* { dg-skip-if "" { arm_thumb1 } } */
 /* { dg-options "-O2 -foptimize-sibling-calls" } */

 /* The option -foptimize-sibling-calls is the default, but serves as
--
1.8.1.2



Re: Re: ira.c update_equiv_regs patch causes gcc/testsuite/gcc.target/arm/pr43920-2.c regression

2015-07-28 Thread Alex Velenko

On 21/04/15 06:27, Jeff Law wrote:

On 04/20/2015 01:09 AM, Shiva Chen wrote:

Hi, Jeff

Thanks for your advice.

can_replace_by.patch is the new patch to handle both cases.

pr43920-2.c.244r.jump2.ori is the original  jump2 rtl dump

pr43920-2.c.244r.jump2.patch_can_replace_by is the jump2 rtl dump
after patch  can_replace_by.patch

Could you help me to review the patch?

Thanks.  This looks pretty good.

I expanded the comment for the new function a bit and renamed the
function in an effort to clarify its purpose.  From reviewing
can_replace_by, it seems it should have been handling this case, but
clearly wasn't due to implementation details.

I then bootstrapped and regression tested the patch on x86_64-linux-gnu
where it passed.  I also instrumented that compiler to see how often
this code triggers.  During a bootstrap it triggers a couple hundred
times (which is obviously a proxy for cross jumping improvements).  So
it's triggering regularly on x86_64, which is good.

I also verified that this fixes BZ64916 for an arm-non-eabi toolchain
configured with --with-arch=armv7.

Installed on the trunk.  No new testcase as it's covered by existing tests.

Thanks,,
jeff



Hi,
I see this patch been committed in r56 on trunk. Is it okay to port 
this to fsf-5?

Kind regards,
Alex



Re: ira.c update_equiv_regs patch causes gcc/testsuite/gcc.target/arm/pr43920-2.c regression

2015-07-31 Thread Alex Velenko

On 29/07/15 23:14, Jeff Law wrote:

On 07/28/2015 12:18 PM, Alex Velenko wrote:

On 21/04/15 06:27, Jeff Law wrote:

On 04/20/2015 01:09 AM, Shiva Chen wrote:

Hi, Jeff

Thanks for your advice.

can_replace_by.patch is the new patch to handle both cases.

pr43920-2.c.244r.jump2.ori is the original  jump2 rtl dump

pr43920-2.c.244r.jump2.patch_can_replace_by is the jump2 rtl dump
after patch  can_replace_by.patch

Could you help me to review the patch?

Thanks.  This looks pretty good.

I expanded the comment for the new function a bit and renamed the
function in an effort to clarify its purpose.  From reviewing
can_replace_by, it seems it should have been handling this case, but
clearly wasn't due to implementation details.

I then bootstrapped and regression tested the patch on x86_64-linux-gnu
where it passed.  I also instrumented that compiler to see how often
this code triggers.  During a bootstrap it triggers a couple hundred
times (which is obviously a proxy for cross jumping improvements).  So
it's triggering regularly on x86_64, which is good.

I also verified that this fixes BZ64916 for an arm-non-eabi toolchain
configured with --with-arch=armv7.

Installed on the trunk.  No new testcase as it's covered by existing
tests.

Thanks,,
jeff



Hi,
I see this patch been committed in r56 on trunk. Is it okay to port
this to fsf-5?

It's not a regression, so backporting it would be generally frowned
upon.  If you feel strongly about it, you should ask Jakub, Joseph or
Richi (the release managers) for an exception to the general policy.

jeff


Hi Jakub,
Can this commit be ported to fsf-5? It fixed gcc.target/arm/pr43920-2.c
at the time, so I think it is a good idea to port. Please, see
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64916
Kind regards,
Alex



[PATCH][AArch64] vneg testcase made big-endian safe

2013-11-19 Thread Alex Velenko

Hi,

This patch makes testcase for vneg[q]_s[8,16,32,64] big-endian safe.

vneg_s.c testcase ran with both big and little endian compilers with no 
problems.


is patch OK?

Thanks,
Alex

gcc/testsuite/

2013-11-19  Alex Velenko  

	* gcc.target/aarch64/vneg_s.c (test_vneg_s8): fixed to not use 
vector indexing.

(test_vneg_s16): Likewise.
(test_vneg_s32): Likewise.
(test_vneg_s64): Likewise.
(test_vnegq_s8): Likewise.
(test_vnegq_s16): Likewise.
(test_vnegq_s32): Likewise.
(test_vnegq_s64): Likewise.
diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_s.c b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
index accbf14074b9f9569f7e3662b6571075421f6a27..ac7dd6d87d66b8e192edf0d58ead3952ddb2de43 100644
--- a/gcc/testsuite/gcc.target/aarch64/vneg_s.c
+++ b/gcc/testsuite/gcc.target/aarch64/vneg_s.c
@@ -35,17 +35,11 @@ extern void abort (void);
 #define REG_INFEX(reg_len) REG_INFEX##reg_len
 #define POSTFIX(reg_len, data_len) \
   CONCAT1 (REG_INFEX (reg_len), s##data_len)
+#define LANE_POSTFIX(reg_len, data_len) \
+  CONCAT1 (REG_INFEX (reg_len),lane_s##data_len)
 #define DATA_TYPE_32 float
 #define DATA_TYPE_64 double
 #define DATA_TYPE(data_len) DATA_TYPE_##data_len
-#define INDEX64_8 [i]
-#define INDEX64_16 [i]
-#define INDEX64_32 [i]
-#define INDEX64_64
-#define INDEX128_8 [i]
-#define INDEX128_16 [i]
-#define INDEX128_32 [i]
-#define INDEX128_64 [i]
 
 #define FORCE_SIMD_INST64_8(data)
 #define FORCE_SIMD_INST64_16(data)
@@ -56,8 +50,8 @@ extern void abort (void);
 #define FORCE_SIMD_INST128_32(data)
 #define FORCE_SIMD_INST128_64(data)
 
-#define INDEX(reg_len, data_len) \
-  CONCAT1 (INDEX, reg_len##_##data_len)
+#define GET_ELEMENT(reg_len, data_len) \
+  CONCAT1 (vget, LANE_POSTFIX (reg_len, data_len))
 #define FORCE_SIMD_INST(reg_len, data_len, data) \
   CONCAT1 (FORCE_SIMD_INST, reg_len##_##data_len) (data)
 #define LOAD_INST(reg_len, data_len) \
@@ -65,29 +59,31 @@ extern void abort (void);
 #define NEG_INST(reg_len, data_len) \
   CONCAT1 (vneg, POSTFIX (reg_len, data_len))
 
-#define RUN_TEST(test_set, answ_set, reg_len, data_len, n, a, b)	\
-  {	\
-int i;\
-INHIB_OPTIMIZATION;			\
-(a) = LOAD_INST (reg_len, data_len) (test_set);			\
-(b) = LOAD_INST (reg_len, data_len) (answ_set);			\
-FORCE_SIMD_INST (reg_len, data_len, a)\
-a = NEG_INST (reg_len, data_len) (a);\
-FORCE_SIMD_INST (reg_len, data_len, a)\
-for (i = 0; i < n; i++)		\
-  {	\
-INHIB_OPTIMIZATION;		\
-	if (a INDEX (reg_len, data_len)	\
-	!= b INDEX (reg_len, data_len))\
-	  return 1;			\
-  }	\
-  }
+#define RUN_TEST(test_set, answ_set, reg_len,		\
+ data_len, n, a, b, _a, _b)		\
+{			\
+  int i;		\
+  INHIB_OPTIMIZATION;	\
+  (a) = LOAD_INST (reg_len, data_len) (test_set);	\
+  (b) = LOAD_INST (reg_len, data_len) (answ_set);	\
+  FORCE_SIMD_INST (reg_len, data_len, a)		\
+a = NEG_INST (reg_len, data_len) (a);		\
+  FORCE_SIMD_INST (reg_len, data_len, a)		\
+for (i = 0; i < n; i++)\
+  {			\
+INHIB_OPTIMIZATION;\
+	_a = GET_ELEMENT (reg_len, data_len) (a, i);	\
+	_b = GET_ELEMENT (reg_len, data_len) (b, i);	\
+	if (_a != _b)	\
+	  return 1;	\
+  }			\
+}
 
 int
 test_vneg_s8 ()
 {
-  int8x8_t a;
-  int8x8_t b;
+  int8x8_t a, b;
+  int8_t _a, _b;
 
   int8_t test_set0[8] = {
 TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, SCHAR_MAX, SCHAR_MIN
@@ -96,7 +92,7 @@ test_vneg_s8 ()
 ANSW0, ANSW1, ANSW2, ANSW3, ANSW4, ANSW5, SCHAR_MIN + 1, SCHAR_MIN
   };
 
-  RUN_TEST (test_set0, answ_set0, 64, 8, 8, a, b);
+  RUN_TEST (test_set0, answ_set0, 64, 8, 8, a, b, _a, _b);
 
   return 0;
 }
@@ -106,8 +102,8 @@ test_vneg_s8 ()
 int
 test_vneg_s16 ()
 {
-  int16x4_t a;
-  int16x4_t b;
+  int16x4_t a, b;
+  int16_t _a,_b;
 
   int16_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
   int16_t test_set1[4] = { TEST4, TEST5, SHRT_MAX, SHRT_MIN };
@@ -115,8 +111,8 @@ test_vneg_s16 ()
   int16_t answ_set0[4] = { ANSW0, ANSW1, ANSW2, ANSW3 };
   int16_t answ_set1[4] = { ANSW4, ANSW5, SHRT_MIN + 1, SHRT_MIN };
 
-  RUN_TEST (test_set0, answ_set0, 64, 16, 4, a, b);
-  RUN_TEST (test_set1, answ_set1, 64, 16, 4, a, b);
+  RUN_TEST (test_set0, answ_set0, 64, 16, 4, a, b, _a, _b);
+  RUN_TEST (test_set1, answ_set1, 64, 16, 4, a, b, _a, _b);
 
   return 0;
 }
@@ -126,8 +122,8 @@ test_vneg_s16 ()
 int
 test_vneg_s32 ()
 {
-  int32x2_t a;
-  int32x2_t b;
+  int32x2_t a, b;
+  int32_t _a, _b;
 
   int32_t test_set0[2] = { TEST0, TEST1 };
   int32_t test_set1[2] = { TEST2, TEST3 };
@@ -139,10 +135,10 @@ test_vneg_s32 ()
   int32_t answ_set2[2] = { ANSW4, ANSW5 };
   int32_t answ_set3[2] = { INT_MIN + 1, INT_MIN };
 
-  RUN_TEST (test_set0, answ_set0, 64, 32, 2, a, b);
-  RUN_TEST (test_set1, answ_set1, 64, 32, 2, a, b);
-  RUN_TEST (test_set2, answ_set2, 64, 32, 2, a, b);
-  RU

[PATCH][AArch64] FP vdiv tescase made big-endian safe

2013-11-19 Thread Alex Velenko

Hi,

This patch fixes vdiv[q]_f[32,64] neon intrinsics testcase.
Testcase ran on both little and big endian targets with no problems.

OK?

Thanks,
Alex

gcc/testsuite/

2013-11-19  Alex Velenko  

	* gcc.target/aarch64/vdiv_f.c (test_vdiv_f32): vector indexing 
replaced with builtins.

(test_vdiv_f64): Likewise.
(test_vdivq_f32): Likewise.
(test_vdivq_f64): Likewise.
diff --git a/gcc/testsuite/gcc.target/aarch64/vdiv_f.c b/gcc/testsuite/gcc.target/aarch64/vdiv_f.c
index cc3a9570c0fac0dcbf38f38314a416cca5e58c6e..98aae58acb9df3da0148fc1835a51c63a7c47d5d 100644
--- a/gcc/testsuite/gcc.target/aarch64/vdiv_f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vdiv_f.c
@@ -90,6 +90,8 @@
 #define REG_INFEX(reg_len) REG_INFEX##reg_len
 #define POSTFIX(reg_len, data_len) \
   CONCAT1 (REG_INFEX (reg_len), f##data_len)
+#define LANE_POSTFIX(reg_len, data_len) \
+  CONCAT1 (REG_INFEX (reg_len),lane_f##data_len)
 
 #define DATA_TYPE_32 float
 #define DATA_TYPE_64 double
@@ -99,10 +101,9 @@
 #define EPSILON_64 __DBL_EPSILON__
 #define EPSILON(data_len) EPSILON_##data_len
 
-#define INDEX64_32 [i]
-#define INDEX64_64
-#define INDEX128_32 [i]
-#define INDEX128_64 [i]
+#define GET_ELEMENT(reg_len, data_len) \
+  CONCAT1 (vget, LANE_POSTFIX (reg_len, data_len))
+
 #define INDEX(reg_len, data_len) \
   CONCAT1 (INDEX, reg_len##_##data_len)
 
@@ -122,7 +123,7 @@
 
 #define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
 
-#define RUN_TEST(a, b, c, testseta, testsetb, answset, count,		\
+#define RUN_TEST(a, b, c, a1, c1, testseta, testsetb, answset, count,	\
 		 reg_len, data_len, n)	\
 {	\
   int i;\
@@ -135,8 +136,10 @@
   for (i = 0; i < n; i++)		\
   {	\
 INHIB_OPTIMIZATION;			\
-if (!FP_equals ((a) INDEX (reg_len, data_len),			\
-		(c) INDEX (reg_len, data_len),			\
+(a1) = GET_ELEMENT (reg_len, data_len) ((a), (i));			\
+(c1) = GET_ELEMENT (reg_len, data_len) ((c), (i));			\
+if (!FP_equals ((a1),		\
+		(c1),		\
 		EPSILON (data_len)))\
   return 1;\
   }	\
@@ -152,9 +155,8 @@ int
 test_vdiv_f32 ()
 {
   int count;
-  float32x2_t a;
-  float32x2_t b;
-  float32x2_t c;
+  float32x2_t a, b, c;
+  float32_t a1, c1;
 
   float32_t testseta[10][2] = {
 { TESTA0, TESTA1 }, { TESTA2, TESTA3 },
@@ -182,7 +184,8 @@ test_vdiv_f32 ()
 
   for (count = 0; count < 10; count++)
 {
-  RUN_TEST (a, b, c, testseta, testsetb, answset, count, 64, 32, 2);
+  RUN_TEST (a, b, c, a1, c1, testseta, testsetb, \
+		answset, count, 64, 32, 2);
 }
 
   return 0;
@@ -202,10 +205,8 @@ int
 test_vdiv_f64 ()
 {
   int count;
-  float64x1_t a;
-  float64x1_t b;
-  float64x1_t c;
-
+  float64x1_t a, b ,c;
+  float64_t a1, c1;
   float64_t testseta[20][1] = {
 { TESTA0 }, { TESTA1 }, { TESTA2 }, { TESTA3 },
 { TESTA4 }, { TESTA5 }, { TESTA6 }, { TESTA7 },
@@ -232,7 +233,8 @@ test_vdiv_f64 ()
 
   for (count = 0; count < 20; count++)
 {
-  RUN_TEST (a, b, c, testseta, testsetb, answset, count, 64, 64, 1);
+  RUN_TEST (a, b, c, a1, c1, testseta, testsetb, \
+		answset, count, 64, 64, 1);
 }
   return 0;
 }
@@ -253,9 +255,8 @@ int
 test_vdivq_f32 ()
 {
   int count;
-  float32x4_t a;
-  float32x4_t b;
-  float32x4_t c;
+  float32x4_t a, b, c;
+  float32_t a1, c1;
 
   float32_t testseta[5][4] = {
 { TESTA0, TESTA1, TESTA2, TESTA3 },
@@ -283,7 +284,8 @@ test_vdivq_f32 ()
 
   for (count = 0; count < 5; count++)
 {
-  RUN_TEST (a, b, c, testseta, testsetb, answset, count, 128, 32, 4);
+  RUN_TEST (a, b, c, a1, c1, testseta, testsetb, \
+		answset, count, 128, 32, 4);
 }
   return 0;
 }
@@ -302,9 +304,8 @@ int
 test_vdivq_f64 ()
 {
   int count;
-  float64x2_t a;
-  float64x2_t b;
-  float64x2_t c;
+  float64x2_t a, b, c;
+  float64_t a1, c1;
 
   float64_t testseta[10][2] = {
 { TESTA0, TESTA1 }, { TESTA2, TESTA3 },
@@ -332,7 +333,8 @@ test_vdivq_f64 ()
 
   for (count = 0; count < 10; count++)
 {
-  RUN_TEST (a, b, c, testseta, testsetb, answset, count, 128, 64, 2);
+  RUN_TEST (a, b, c, a1, c1, testseta, testsetb, \
+		answset, count, 128, 64, 2);
 }
 
   return 0;


[PATCH][AArch64] vmov_n changes

2013-11-21 Thread Alex Velenko

Hi,

This patch adds C implementation for intrinsics matching:
vmov[q]_n_f[32,64]
vmov[q]_n_[u,s,p][8,16]
vmov[q]_n_[u,s][32,64]

Regression tests for those intrinsics added.

Full regression test ran with no regressions. Tested with Big Endian.

Any objections?

Thanks,
Alex

2013-11-21  Alex Velenko  

* config/aarch64/arm_neon.h (vmov_n_f32): Implemented in C.
(vmov_n_f64): Likewise.
(vmov_n_p8): Likewise.
(vmov_n_p16): Likewise.
(vmov_n_s8): Likewise.
(vmov_n_s16): Likewise.
(vmov_n_s32): Likewise.
(vmov_n_s64): Likewise.
(vmov_n_u8): Likewise.
(vmov_n_u16): Likewise.
(vmov_n_u32): Likewise.
(vmov_n_u64): Likewise.
(vmovq_n_f32): Likewise.
(vmovq_n_f64): Likewise.
(vmovq_n_p8): Likewise.
(vmovq_n_p16): Likewise.
(vmovq_n_s8): Likewise.
(vmovq_n_s16): Likewise.
(vmovq_n_s32): Likewise.
(vmovq_n_s64): Likewise.
(vmovq_n_u8): Likewise.
(vmovq_n_u16): Likewise.
(vmovq_n_u32): Likewise.
(vmovq_n_u64): Likewise.

gcc/testsuite/

2013-11-21  Alex Velenko  

* gcc.target/aarch64/vmov_n_1.c: New testcase.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index f7c9db692a48575a3772214d08ae35f491ad8a73..90e7079b0f88e09c7a2c8be1c8dd7cd7bd9941ab 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -8314,127 +8314,6 @@ vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
   return result;
 }
 
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vmov_n_f32 (float32_t a)
-{
-  float32x2_t result;
-  __asm__ ("dup %0.2s, %w1"
-   : "=w"(result)
-   : "r"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-vmov_n_p8 (uint32_t a)
-{
-  poly8x8_t result;
-  __asm__ ("dup %0.8b,%w1"
-   : "=w"(result)
-   : "r"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-vmov_n_p16 (uint32_t a)
-{
-  poly16x4_t result;
-  __asm__ ("dup %0.4h,%w1"
-   : "=w"(result)
-   : "r"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vmov_n_s8 (int32_t a)
-{
-  int8x8_t result;
-  __asm__ ("dup %0.8b,%w1"
-   : "=w"(result)
-   : "r"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vmov_n_s16 (int32_t a)
-{
-  int16x4_t result;
-  __asm__ ("dup %0.4h,%w1"
-   : "=w"(result)
-   : "r"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vmov_n_s32 (int32_t a)
-{
-  int32x2_t result;
-  __asm__ ("dup %0.2s,%w1"
-   : "=w"(result)
-   : "r"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vmov_n_s64 (int64_t a)
-{
-  int64x1_t result;
-  __asm__ ("ins %0.d[0],%x1"
-   : "=w"(result)
-   : "r"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vmov_n_u8 (uint32_t a)
-{
-  uint8x8_t result;
-  __asm__ ("dup %0.8b,%w1"
-   : "=w"(result)
-   : "r"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-vmov_n_u16 (uint32_t a)
-{
-  uint16x4_t result;
-  __asm__ ("dup %0.4h,%w1"
-   : "=w"(result)
-   : "r"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vmov_n_u32 (uint32_t a)
-{
-  uint32x2_t result;
-  __asm__ ("dup %0.2s,%w1"
-   : "=w"(result)
-   : "r"(a)
-   : /* No clobbers */);
-  return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vmov_n_u64 (uint64_t a)
-{
-  uint64x1_t result;
-  __asm__ ("ins %0.d[0],%x1"
-   : "=w"(result)
-   : "r"(a)
-   : /* No clobbers */);
-  return result;
-}
-
 __extension__ static __inline int16x

Re: [PATCH] [AArch32] Additional bics patterns.

2015-05-15 Thread Alex Velenko

On 01/05/15 10:28, Kyrill Tkachov wrote:


Can you please confirm that bootstraps with both arm and thumb pass?
That is, configured with --with-mode=arm and --with-mode=thumb



Hi Kyrill,

Bootstrapped on arm-none-gnueabihf with arm and thumb mode.

Following patch requires bics shift operand on Thumb2 to be
const int, as bics shifted by register is not supported by
Thumb2.

Is patch ok?

gcc

2015-05-15  Alex Velenko  

   * config/arm/arm.md (andsi_not_shiftsi_si_scc): New pattern.
   * (andsi_not_shiftsi_si_scc_no_reuse): New pattern.

gcc/testsuite

2015-05-15  Alex Velenko 

   * gcc.target/arm/bics_1.c : New testcase.
   * gcc.target/arm/bics_2.c : New testcase.
   * gcc.target/arm/bics_3.c : New testcase.
   * gcc.target/arm/bics_4.c : New testcase.
---
  gcc/config/arm/arm.md | 49 ++
  gcc/testsuite/gcc.target/arm/bics_1.c | 54
+
  gcc/testsuite/gcc.target/arm/bics_2.c | 57
+++
  gcc/testsuite/gcc.target/arm/bics_3.c | 41 +
  gcc/testsuite/gcc.target/arm/bics_4.c | 49 ++
  5 files changed, 250 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/arm/bics_1.c
  create mode 100644 gcc/testsuite/gcc.target/arm/bics_2.c
  create mode 100644 gcc/testsuite/gcc.target/arm/bics_3.c
  create mode 100644 gcc/testsuite/gcc.target/arm/bics_4.c

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 164ac13..26d3ad2 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -2768,6 +2768,55 @@
  (const_string "logic_shift_reg")))]
  )

+;; Shifted bics pattern used to set up CC status register and not reusing
+;; bics output.  Pattern restricts Thumb2 shift operand as bics for Thumb2
+;; does not support shift by register.
+(define_insn "andsi_not_shiftsi_si_scc_no_reuse"
+  [(set (reg:CC_NOOV CC_REGNUM)
+   (compare:CC_NOOV
+   (and:SI (not:SI (match_operator:SI 0 "shift_operator"
+   [(match_operand:SI 1 "s_register_operand" "r")
+(match_operand:SI 2 "arm_rhs_operand" "rM")]))
+   (match_operand:SI 3 "s_register_operand" "r"))
+   (const_int 0)))
+   (clobber (match_scratch:SI 4 "=3Dr"))]
+  "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))"
+  "bic%.%?\\t%4, %3, %1%S0"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set (attr "type") (if_then_else (match_operand 2
"const_int_operand" "")
+ (const_string "logic_shift_imm")
+ (const_string "logic_shift_reg")))]
+)
+
+;; Same as andsi_not_shiftsi_si_scc_no_reuse, but the bics result is also
+;; getting reused later.
+(define_insn "andsi_not_shiftsi_si_scc"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+   (compare:CC_NOOV
+   (and:SI (not:SI (match_operator:SI 0 "shift_operator"
+   [(match_operand:SI 1 "s_register_operand" "r")
+(match_operand:SI 2 "arm_rhs_operand" "rM")]))
+   (match_operand:SI 3 "s_register_operand" "r"))
+   (const_int 0)))
+   (set (match_operand:SI 4 "s_register_operand" "=3Dr")
+(and:SI (not:SI (match_op_dup 0
+[(match_dup 1)
+ (match_dup 2)]))
+(match_dup 3)))])]
+  "TARGET_ARM || (TARGET_THUMB2 && CONST_INT_P (operands[2]))"
+  "bic%.%?\\t%4, %3, %1%S0"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set (attr "type") (if_then_else (match_operand 2
"const_int_operand" "")
+ (const_string "logic_shift_imm")
+ (const_string "logic_shift_reg")))]
+)
+
  (define_insn "*andsi_notsi_si_compare0"
[(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV
diff --git a/gcc/testsuite/gcc.target/arm/bics_1.c
b/gcc/testsuite/gcc.target/arm/bics_1.c
new file mode 100644
index 000..173eb89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/bics_1.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps -fno-inline" } */
+/* { dg-require-effective-target arm32 } */
+
+extern void abort (void);
+
+int
+bics_si_test1 (int a, int b, int c)
+{
+  int d =3D a & ~b;
+
+  /* { dg-final

[PATCH] [PATCH][ARM] Fix thumb1-far-jump-2.c testcase.

2015-05-15 Thread Alex Velenko
Hi,

This patch fixes testcase thumb1-far-jump-2.c to confirm to newer compilation
defaults.

Is patch ok?

gcc/testsuite

2015-05-15  Alex Velenko  

* gcc.target/arm/thumb1-far-jump-2.c (r4): Added int in definition.
---
 gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c 
b/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
index c6878f8..78fcafa 100644
--- a/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
+++ b/gcc/testsuite/gcc.target/arm/thumb1-far-jump-2.c
@@ -5,7 +5,7 @@
 /* { dg-options "-Os" } */
 /* { dg-skip-if "" { ! { arm_thumb1 } } } */
 
-volatile register r4 asm("r4");
+volatile register int r4 asm ("r4");
 void f3(int i)
 {
 #define GO(n) \
-- 
1.8.1.2



Re: [PATCH] [AArch32] Additional bics patterns.

2015-05-18 Thread Alex Velenko

Committed r223295.

Alex.



[PATCH] [PATCH][ARM] Fix sibcall testcases.

2015-05-20 Thread Alex Velenko
Hi,

This patch prevents arm_thumb1_ok XPASS in sibcall-3.c and sibcall-4.c
testcases. Sibcalls are not ok for Thumb1 and testcases need to be fixed.

Is patch ok?

gcc/testsuite

2015-05-20  Alex Velenko  

* gcc.dg/sibcall-3.c (dg-skip-if): Skip if arm_thumb1_ok.
* gcc.dg/sibcall-4.c (dg-skip-if): Likewise.
---
 gcc/testsuite/gcc.dg/sibcall-3.c | 1 +
 gcc/testsuite/gcc.dg/sibcall-4.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/sibcall-3.c b/gcc/testsuite/gcc.dg/sibcall-3.c
index eafe8dd..37f44a1 100644
--- a/gcc/testsuite/gcc.dg/sibcall-3.c
+++ b/gcc/testsuite/gcc.dg/sibcall-3.c
@@ -8,6 +8,7 @@
 /* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* 
m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* v850*-*-* 
vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
 /* -mlongcall disables sibcall patterns.  */
 /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
+/* { dg-skip-if "" { arm*-*-* && arm_thumb1_ok } } */
 /* { dg-options "-O2 -foptimize-sibling-calls" } */
 
 /* The option -foptimize-sibling-calls is the default, but serves as
diff --git a/gcc/testsuite/gcc.dg/sibcall-4.c b/gcc/testsuite/gcc.dg/sibcall-4.c
index 1e039c6..9554a95 100644
--- a/gcc/testsuite/gcc.dg/sibcall-4.c
+++ b/gcc/testsuite/gcc.dg/sibcall-4.c
@@ -8,6 +8,7 @@
 /* { dg-do run { xfail { { cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* 
m32r-*-* mcore-*-* mn10300-*-* msp430*-*-* nds32*-*-* xstormy16-*-* v850*-*-* 
vax-*-* xtensa*-*-* } || { arm*-*-* && { ! arm32 } } } } } */
 /* -mlongcall disables sibcall patterns.  */
 /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
+/* { dg-skip-if "" { arm*-*-* && arm_thumb1_ok } } */
 /* { dg-options "-O2 -foptimize-sibling-calls" } */
 
 /* The option -foptimize-sibling-calls is the default, but serves as
-- 
1.8.1.2



[PATCH] [PATCH][ARM] Fix split-live-ranges-for-shrink-wrap.c testcase.

2015-05-20 Thread Alex Velenko
Hi,

This patch limits testcase split-live-ranges-for-shrink-wrap.c runs to
supported achitecture versions.
Object size with -march=armv4t check fails because pop pc is not interworking
safe on armv4t.
This test is not supported for -march=armv7 as this test is for thumb1.

Is patch ok?

gcc/testsuite

2015-05-20  Alex Velenko  

* gcc.target/arm/split-live-ranges-for-shrink-wrap.c (dg-skip-if):
Skip armv4t, armv7-a and later.
---
 gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c 
b/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c
index e36000b..c649bc1 100644
--- a/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c
+++ b/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c
@@ -1,6 +1,8 @@
 /* { dg-do assemble } */
 /* { dg-options "-mthumb -Os -fdump-rtl-ira " }  */
 /* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "" { arm_arch_v4t_ok } } */
+/* { dg-skip-if "" { arm_arch_v7a_ok } } */
 
 int foo (char *, char *, int);
 int test (int d, char * out, char *in, int len)
-- 
1.8.1.2



[PATCH] [PATCH][ARM] Fix thumb-ltu.c testcase.

2015-06-01 Thread Alex Velenko
Hi,

This patch fix thumb-ltu.c to pass excess error test.
Without default -std=gnu90 flag, this testcase started failing
as some functions were called before being predefined.

Is patch ok?

gcc/testsuite

2015-06-01  Alex Velenko  

* gcc.target/arm/thumb-ltu.c (foo): Predefined.
(bar): Predefined.
---
 gcc/testsuite/gcc.target/arm/thumb-ltu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/thumb-ltu.c 
b/gcc/testsuite/gcc.target/arm/thumb-ltu.c
index d057ea3..124b025 100644
--- a/gcc/testsuite/gcc.target/arm/thumb-ltu.c
+++ b/gcc/testsuite/gcc.target/arm/thumb-ltu.c
@@ -2,6 +2,9 @@
 /* { dg-require-effective-target arm_thumb1_ok } */
 /* { dg-options "-mcpu=arm1136jf-s -mthumb -O2" } */
 
+int foo();
+int bar();
+
 void f(unsigned a, unsigned b, unsigned c, unsigned d)
 {
   if (a <= b || c > d)
-- 
1.8.1.2



Re: Re: [PATCH] [PATCH][ARM] Fix thumb-ltu.c testcase.

2015-06-04 Thread Alex Velenko

On 01/06/15 10:50, Ramana Radhakrishnan wrote:



On 01/06/15 10:48, Alex Velenko wrote:

Hi,

This patch fix thumb-ltu.c to pass excess error test.
Without default -std=gnu90 flag, this testcase started failing
as some functions were called before being predefined.

Is patch ok?

gcc/testsuite

2015-06-01  Alex Velenko  

 * gcc.target/arm/thumb-ltu.c (foo): Predefined.
 (bar): Predefined.
---
  gcc/testsuite/gcc.target/arm/thumb-ltu.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/thumb-ltu.c
b/gcc/testsuite/gcc.target/arm/thumb-ltu.c
index d057ea3..124b025 100644
--- a/gcc/testsuite/gcc.target/arm/thumb-ltu.c
+++ b/gcc/testsuite/gcc.target/arm/thumb-ltu.c
@@ -2,6 +2,9 @@
  /* { dg-require-effective-target arm_thumb1_ok } */
  /* { dg-options "-mcpu=arm1136jf-s -mthumb -O2" } */

+int foo();
+int bar();
+


Surely this is,


extern int foo (void);
extern int bar (void);


  void f(unsigned a, unsigned b, unsigned c, unsigned d)
  {
if (a <= b || c > d)




OK with that change.

Ramana


Committed with said change r223982.
Is patch ok for fsf-5 backport?
Alex



[PATCH] [AArch32] Additional bics patterns.

2015-04-22 Thread Alex Velenko
Hi,

This patch adds arm rtl patterns to generate bics instructions with shift.

Done full regression run on arm-none-eabi.

Is patch ok?

gcc/config

2015-04-22  Alex Velenko  

  * arm/arm.md (andsi_not_shiftsi_si_scc): New pattern.
  * (andsi_not_shiftsi_si_scc_no_reuse): New pattern.

gcc/testsuite

2015-04-22  Alex Velenko 

  * gcc.target/arm/bics_1.c : New testcase.
  * gcc.target/arm/bics_2.c : New testcase.
  * gcc.target/arm/bics_3.c : New testcase.
  * gcc.target/arm/bics_4.c : New testcase.
---
 gcc/config/arm/arm.md | 42 ++
 gcc/testsuite/gcc.target/arm/bics_1.c | 54 +
 gcc/testsuite/gcc.target/arm/bics_2.c | 57 +++
 gcc/testsuite/gcc.target/arm/bics_3.c | 41 +
 gcc/testsuite/gcc.target/arm/bics_4.c | 49 ++
 5 files changed, 243 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_2.c
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_3.c
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_4.c

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 164ac13..51a149e 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -2768,6 +2768,48 @@
  (const_string "logic_shift_reg")))]
 )
 
+(define_insn "andsi_not_shiftsi_si_scc_no_reuse"
+  [(set (reg:CC_NOOV CC_REGNUM)
+   (compare:CC_NOOV
+   (and:SI (not:SI (match_operator:SI 0 "shift_operator"
+   [(match_operand:SI 1 "s_register_operand" "r")
+(match_operand:SI 2 "arm_rhs_operand" "rM")]))
+   (match_operand:SI 3 "s_register_operand" "r"))
+   (const_int 0)))
+   (clobber (match_scratch:SI 4 "=r"))]
+  "TARGET_32BIT"
+  "bic%.%?\\t%4, %3, %1%S0"
+  [(set_attr "predicable" "yes")
+   (set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
+ (const_string "logic_shift_imm")
+ (const_string "logic_shift_reg")))]
+)
+
+(define_insn "andsi_not_shiftsi_si_scc"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+   (compare:CC_NOOV
+   (and:SI (not:SI (match_operator:SI 0 "shift_operator"
+   [(match_operand:SI 1 "s_register_operand" "r")
+(match_operand:SI 2 "arm_rhs_operand" "rM")]))
+   (match_operand:SI 3 "s_register_operand" "r"))
+   (const_int 0)))
+   (set (match_operand:SI 4 "s_register_operand" "=r")
+(and:SI (not:SI (match_op_dup 0
+[(match_dup 1)
+ (match_dup 2)]))
+(match_dup 3)))])]
+  "TARGET_32BIT"
+  "bic%.%?\\t%4, %3, %1%S0"
+  [(set_attr "predicable" "yes")
+   (set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
+ (const_string "logic_shift_imm")
+ (const_string "logic_shift_reg")))]
+)
+
 (define_insn "*andsi_notsi_si_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV
diff --git a/gcc/testsuite/gcc.target/arm/bics_1.c 
b/gcc/testsuite/gcc.target/arm/bics_1.c
new file mode 100644
index 000..173eb89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/bics_1.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps -fno-inline" } */
+/* { dg-require-effective-target arm32 } */
+
+extern void abort (void);
+
+int
+bics_si_test1 (int a, int b, int c)
+{
+  int d = a & ~b;
+
+  /* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+" 
2 } } */
+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+int
+bics_si_test2 (int a, int b, int c)
+{
+  int d = a & ~(b << 3);
+
+  /* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, r\[0-9\]+, 
.sl \#3" 1 } } */
+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+int
+main ()
+{
+  int x;
+
+  x = bics_si_test1 (29, ~4, 5);
+  if (x != ((29 & 4) + ~4 + 5))
+abort ();
+
+  x = bics_si_test1 (5, ~2, 20);
+  if (x != 25)
+abort ();
+
+x = bics_si_test2 (35, ~4, 5);
+  if (x != ((35 & ~(~4 << 3)) + ~4 + 5))
+abort ();
+
+  x = bics_si_test2 (96, ~2, 20);
+  if (x != 116)
+  abort ();
+
+  return 0;
+}
+/* { dg-final {

Re: [PATCH] [RTL] Relax CSE check to set REG_EQUAL notes.

2015-04-24 Thread Alex Velenko



On 24/04/15 02:16, Jeff Law wrote:

On 04/10/2015 03:14 AM, Alex Velenko wrote:

On 09/03/15 17:40, Jeff Law wrote:

On 03/09/15 03:53, Steven Bosscher wrote:

On Wed, Mar 4, 2015 at 12:09 PM, Alex Velenko wrote:

For example, in arm testcase pr43920-2.c, CSE previously decided not
to put
an "obvious" note on insn 9, as set value was the same as note value.
At the same time, other insns set up as -1 were set up through a
register
and did get a note:


...which is the point of the REG_EQUAL notes. In insn 8 there is a
REG_EQUAL note to show that the value of r111 is known. In insn 9 the
known value is, well, known from SET_SRC so there is no need for a
REG_EQUAL note. Adding REG_EQUAL notes in such cases is just wasteful.

RIght.  I'd rather look into why later passes aren't discovering
whatever equivalences are important rather than adding the redundant
notes.

Regardless, I think this is a gcc-6 issue, so I'm not likely to look at
it in the immediate future.

jeff



Hi Jeff,
I reworked the patch to satisfy your preference.

This patch enables cfgcleanup.c to use const int rtx as REG_EQUAL notes.
For example, this benefits Jump2 to find extra optimisation opportunities.
This patch fixes gcc.target/arm/pr43920-2.c for arm-none-eabi.

Bootstraped on x86, run full regression run on arm-none-eabi and
aarch64-none-elf.

Is this patch ok?

gcc/

2015-03-17  Alex Velenko  

  * cfgcleanup.c (can_replace_by): Use const int rtx of single set as
  REG_EQUAL note.

Now I finally see this in my queue.  I recalled the discussion around
whether or not to add the redundant notes, but hadn't had a chance to
look at the updated patch.

AFAICT, this is redundant with Shiva's patch, right?

jeff



Hi Jeff,
Yes, you are correct, this patch is now redundant.
Kind regards,
Alex



Re: [PATCH] [AArch32] Additional bics patterns.

2015-04-24 Thread Alex Velenko

Hi,

This patch adds rtl patterns to generate bics instructions with shift.

Added attribute predicable_short_it since last respin.

Done full regression run on arm-none-eabi and arm-none-gnueabihf.
Bootstrapped on arm-none-gnueabihf.

Is this patch ok?

gcc/config

2015-04-24  Alex Velenko  

  * arm/arm.md (andsi_not_shiftsi_si_scc): New pattern.
  * (andsi_not_shiftsi_si_scc_no_reuse): New pattern.

gcc/testsuite

2015-04-24  Alex Velenko 

  * gcc.target/arm/bics_1.c : New testcase.
  * gcc.target/arm/bics_2.c : New testcase.
  * gcc.target/arm/bics_3.c : New testcase.
  * gcc.target/arm/bics_4.c : New testcase.
---
 gcc/config/arm/arm.md | 44 +++
 gcc/testsuite/gcc.target/arm/bics_1.c | 54 
+
 gcc/testsuite/gcc.target/arm/bics_2.c | 57 
+++

 gcc/testsuite/gcc.target/arm/bics_3.c | 41 +
 gcc/testsuite/gcc.target/arm/bics_4.c | 49 ++
 5 files changed, 245 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_2.c
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_3.c
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_4.c

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 164ac13..9e774c1 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -2768,6 +2768,50 @@
  (const_string "logic_shift_reg")))]
 )

+(define_insn "andsi_not_shiftsi_si_scc_no_reuse"
+  [(set (reg:CC_NOOV CC_REGNUM)
+   (compare:CC_NOOV
+   (and:SI (not:SI (match_operator:SI 0 "shift_operator"
+   [(match_operand:SI 1 "s_register_operand" "r")
+(match_operand:SI 2 "arm_rhs_operand" "rM")]))
+   (match_operand:SI 3 "s_register_operand" "r"))
+   (const_int 0)))
+   (clobber (match_scratch:SI 4 "=r"))]
+  "TARGET_32BIT"
+  "bic%.%?\\t%4, %3, %1%S0"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set (attr "type") (if_then_else (match_operand 2 
"const_int_operand" "")

+ (const_string "logic_shift_imm")
+ (const_string "logic_shift_reg")))]
+)
+
+(define_insn "andsi_not_shiftsi_si_scc"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+   (compare:CC_NOOV
+   (and:SI (not:SI (match_operator:SI 0 "shift_operator"
+   [(match_operand:SI 1 "s_register_operand" "r")
+(match_operand:SI 2 "arm_rhs_operand" "rM")]))
+   (match_operand:SI 3 "s_register_operand" "r"))
+   (const_int 0)))
+   (set (match_operand:SI 4 "s_register_operand" "=r")
+(and:SI (not:SI (match_op_dup 0
+[(match_dup 1)
+ (match_dup 2)]))
+(match_dup 3)))])]
+  "TARGET_32BIT"
+  "bic%.%?\\t%4, %3, %1%S0"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set (attr "type") (if_then_else (match_operand 2 
"const_int_operand" "")

+ (const_string "logic_shift_imm")
+ (const_string "logic_shift_reg")))]
+)
+
 (define_insn "*andsi_notsi_si_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV
diff --git a/gcc/testsuite/gcc.target/arm/bics_1.c 
b/gcc/testsuite/gcc.target/arm/bics_1.c

new file mode 100644
index 000..173eb89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/bics_1.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps -fno-inline" } */
+/* { dg-require-effective-target arm32 } */
+
+extern void abort (void);
+
+int
+bics_si_test1 (int a, int b, int c)
+{
+  int d = a & ~b;
+
+  /* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, 
r\[0-9\]+" 2 } } */

+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+int
+bics_si_test2 (int a, int b, int c)
+{
+  int d = a & ~(b << 3);
+
+  /* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, 
r\[0-9\]+, .sl \#3" 1 } } */

+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+int
+main ()
+{
+  int x;
+
+  x = bics_si_test1 (29, ~4, 5);
+  if (x != ((29 & 4) + ~4 + 5))
+abort ();
+
+  x = bics_si_test1 (5, 

Re: [PATCH] [AArch32] Additional bics patterns.

2015-04-27 Thread Alex Velenko

On 24/04/15 16:41, Alex Velenko wrote:

Hi,

This patch adds rtl patterns to generate bics instructions with shift.

Added attribute predicable_short_it since last respin.

Done full regression run on arm-none-eabi and arm-none-gnueabihf.
Bootstrapped on arm-none-gnueabihf.

Is this patch ok?

gcc/config

2015-04-24  Alex Velenko  

  * arm/arm.md (andsi_not_shiftsi_si_scc): New pattern.
  * (andsi_not_shiftsi_si_scc_no_reuse): New pattern.

gcc/testsuite

2015-04-24  Alex Velenko 

  * gcc.target/arm/bics_1.c : New testcase.
  * gcc.target/arm/bics_2.c : New testcase.
  * gcc.target/arm/bics_3.c : New testcase.
  * gcc.target/arm/bics_4.c : New testcase.
---
 gcc/config/arm/arm.md | 44 +++
 gcc/testsuite/gcc.target/arm/bics_1.c | 54 
+
 gcc/testsuite/gcc.target/arm/bics_2.c | 57 
+++

 gcc/testsuite/gcc.target/arm/bics_3.c | 41 +
 gcc/testsuite/gcc.target/arm/bics_4.c | 49 
++

 5 files changed, 245 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_2.c
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_3.c
 create mode 100644 gcc/testsuite/gcc.target/arm/bics_4.c

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 164ac13..9e774c1 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -2768,6 +2768,50 @@
   (const_string "logic_shift_reg")))]
 )

+(define_insn "andsi_not_shiftsi_si_scc_no_reuse"
+  [(set (reg:CC_NOOV CC_REGNUM)
+(compare:CC_NOOV
+(and:SI (not:SI (match_operator:SI 0 "shift_operator"
+[(match_operand:SI 1 "s_register_operand" "r")
+ (match_operand:SI 2 "arm_rhs_operand" "rM")]))
+(match_operand:SI 3 "s_register_operand" "r"))
+(const_int 0)))
+   (clobber (match_scratch:SI 4 "=r"))]
+  "TARGET_32BIT"
+  "bic%.%?\\t%4, %3, %1%S0"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set (attr "type") (if_then_else (match_operand 2 
"const_int_operand" "")

+  (const_string "logic_shift_imm")
+  (const_string "logic_shift_reg")))]
+)
+
+(define_insn "andsi_not_shiftsi_si_scc"
+  [(parallel [(set (reg:CC_NOOV CC_REGNUM)
+(compare:CC_NOOV
+(and:SI (not:SI (match_operator:SI 0 "shift_operator"
+[(match_operand:SI 1 "s_register_operand" "r")
+ (match_operand:SI 2 "arm_rhs_operand" "rM")]))
+(match_operand:SI 3 "s_register_operand" "r"))
+(const_int 0)))
+(set (match_operand:SI 4 "s_register_operand" "=r")
+ (and:SI (not:SI (match_op_dup 0
+ [(match_dup 1)
+  (match_dup 2)]))
+ (match_dup 3)))])]
+  "TARGET_32BIT"
+  "bic%.%?\\t%4, %3, %1%S0"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "conds" "set")
+   (set_attr "shift" "1")
+   (set (attr "type") (if_then_else (match_operand 2 
"const_int_operand" "")

+  (const_string "logic_shift_imm")
+  (const_string "logic_shift_reg")))]
+)
+
 (define_insn "*andsi_notsi_si_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 (compare:CC_NOOV
diff --git a/gcc/testsuite/gcc.target/arm/bics_1.c 
b/gcc/testsuite/gcc.target/arm/bics_1.c

new file mode 100644
index 000..173eb89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/bics_1.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps -fno-inline" } */
+/* { dg-require-effective-target arm32 } */
+
+extern void abort (void);
+
+int
+bics_si_test1 (int a, int b, int c)
+{
+  int d = a & ~b;
+
+  /* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, 
r\[0-9\]+" 2 } } */

+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+int
+bics_si_test2 (int a, int b, int c)
+{
+  int d = a & ~(b << 3);
+
+  /* { dg-final { scan-assembler-times "bics\tr\[0-9\]+, r\[0-9\]+, 
r\[0-9\]+, .sl \#3" 1 } } */

+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+int
+main ()
+{
+  int x;
+
+  x = bics_si_test1 (29, ~4, 5);
+  if (x != ((29 & 4) + ~4 + 5))
+abort ();
+
+  x = bics_si_test1 (5, ~2, 20);
+  if (x != 25)
+abort ();
+
+x = bics_si_test2 (35, ~4, 5);
+  if (x != ((35 & ~(~4 << 3)) + 

Add to maintainers list.

2014-11-20 Thread Alex Velenko

2014-11-20  Alex Velenko  

*MAINTAINERS (write-after-approval): Add myself.

diff --git a/MAINTAINERS b/MAINTAINERS
index 11a28ef..eada4e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -566,6 +566,7 @@ David Ung   

 Neil Vachharajani  
 Kris Van Hees  
 Joost VandeVondele 
+Alex Velenko   
 Ilya Verbin
 Kugan Vivekanandarajah 
 Tom de Vries   

Re: Add to maintainers list.

2014-11-21 Thread Alex Velenko

Hi,



2014-11-20  Alex Velenko  

*MAINTAINERS (write-after-approval): Add myself.

diff --git a/MAINTAINERS b/MAINTAINERS
index 11a28ef..eada4e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -566,6 +566,7 @@ David Ung   

  Neil Vachharajani 
  Kris Van Hees 
  Joost VandeVondele
+Alex Velenko   
  Ilya Verbin   
  Kugan Vivekanandarajah
  Tom de Vries  



Can someone, please, approve?
Kind regards,
Alex



Re: [PATCH] [AArch64, RTL] Bics instruction generation for aarch64

2014-11-24 Thread Alex Velenko

On 11/11/14 10:38, Alex Velenko wrote:

 From 98bb6d7323ce79e28be8ef892b919391ed857e1f Mon Sep 17 00:00:00 2001
From: Alex Velenko 
Date: Fri, 31 Oct 2014 18:43:32 +
Subject: [PATCH] [AArch64, RTL] Bics instruction generation for aarch64

Hi,

This patch adds rtl patterns for aarch64 to generate bics instructions in
cases when caputed value gets discarded and only only the status regester
change of the instruction gets reused.

Previously, bics would only be generated, if the value computed by bics
would later be reused, which is not necessarily the case when computing
this value for "if" statements.

Is this patch ok?

Thanks,
Alex

gcc/

2014-11-10  Alex Velenko  

 * gcc/config/aarch64/aarch64.md
(and_one_cmpl3_compare0_no_reuse):
   New define_insn.
 * (and_one_cmpl_3_compare0_no_reuse):
   Likewise.

gcc/testsuite/

2014-11-10  Alex Velenko  

 * gcc.target/aarch64/bics1.c : New testcase.
---
  gcc/config/aarch64/aarch64.md | 26 
  gcc/testsuite/gcc.target/aarch64/bics_3.c | 69
+++
  2 files changed, 95 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/aarch64/bics_3.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 341c26f..6158d82 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2845,6 +2845,18 @@
[(set_attr "type" "logics_reg")]
  )

+(define_insn "*and_one_cmpl3_compare0_no_reuse"
+  [(set (reg:CC_NZ CC_REGNUM)
+(compare:CC_NZ
+ (and:GPI (not:GPI
+   (match_operand:GPI 0 "register_operand" "r"))
+  (match_operand:GPI 1 "register_operand" "r"))
+ (const_int 0)))]
+  ""
+  "bics\\tzr, %1, %0"
+  [(set_attr "type" "logics_reg")]
+)
+
  (define_insn "*_one_cmpl_3"
[(set (match_operand:GPI 0 "register_operand" "=r")
  (LOGICAL:GPI (not:GPI
@@ -2894,6 +2906,20 @@
[(set_attr "type" "logics_shift_imm")]
  )

+(define_insn "*and_one_cmpl_3_compare0_no_reuse"
+  [(set (reg:CC_NZ CC_REGNUM)
+(compare:CC_NZ
+ (and:GPI (not:GPI
+   (SHIFT:GPI
+(match_operand:GPI 0 "register_operand" "r")
+(match_operand:QI 1 "aarch64_shift_imm_" "n")))
+  (match_operand:GPI 2 "register_operand" "r"))
+ (const_int 0)))]
+  ""
+  "bics\\tzr, %2, %0,  %1"
+  [(set_attr "type" "logics_shift_imm")]
+)
+
  (define_insn "clz2"
[(set (match_operand:GPI 0 "register_operand" "=r")
  (clz:GPI (match_operand:GPI 1 "register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/bics_3.c
b/gcc/testsuite/gcc.target/aarch64/bics_3.c
new file mode 100644
index 000..ecb53e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bics_3.c
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps" } */
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+bics_si_test (int a, int b)
+{
+  if (a & ~b)
+return 1;
+  else
+return 0;
+}
+
+int __attribute__ ((noinline))
+bics_si_test2 (int a, int b)
+{
+  if (a & ~ (b << 2))
+return 1;
+  else
+return 0;
+}
+
+typedef long long s64;
+
+int __attribute__ ((noinline))
+bics_di_test (s64 a, s64 b)
+{
+  if (a & ~b)
+return 1;
+  else
+return 0;
+}
+
+int __attribute__ ((noinline))
+bics_di_test2 (s64 a, s64 b)
+{
+  if (a & ~(b << 2))
+return 1;
+  else
+return 0;
+}
+
+int
+main (void)
+{
+  int a = 5;
+  int b = 5;
+  int c = 20;
+  s64 d = 5;
+  s64 e = 5;
+  s64 f = 20;
+  if (bics_si_test (a, b))
+abort ();
+  if (bics_si_test2 (c, b))
+abort ();
+  if (bics_di_test (d, e))
+abort ();
+  if (bics_di_test2 (f, e))
+abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "bics\twzr, w\[0-9\]+, w\[0-9\]+"
2 } } */
+/* { dg-final { scan-assembler-times "bics\twzr, w\[0-9\]+, w\[0-9\]+,
lsl 2" 1 } } */
+/* { dg-final { scan-assembler-times "bics\txzr, x\[0-9\]+, x\[0-9\]+"
2 } } */
+/* { dg-final { scan-assembler-times "bics\txzr, x\[0-9\]+, x\[0-9\]+,
lsl 2" 1 } } */
+
+/* { dg-final { cleanup-saved-temps } } */


Hi,
Could someone, please, approve?
Kind regards,
Alex



Re: [patch]: [Bug tree-optimization/61917] [4.9/5 Regression] ICE on valid code at -O3 on x86_64-linux-gnu in vectorizable_reduction, at tree-vect-loop.c:4913

2015-02-26 Thread Alex Velenko

On 25/02/15 16:51, H.J. Lu wrote:

On Wed, Feb 25, 2015 at 5:10 AM, Kai Tietz  wrote:

Hello,

So, I did full regression-test for following patch:

ChangeLog

2015-02-25  Richard Biener  
 Kai Tietz  

 PR tree-optimization/61917
 * tree-vect-loop.c (vectorizable_reduction): Allow
 vect_internal_def without reduction to exit graceful.



I think it caused:

FAIL: gcc.dg/pr56350.c (internal compiler error)
FAIL: gcc.dg/pr56350.c (test for excess errors)

[hjl@gnu-6 gcc]$ ./xgcc -B./ -O -ftree-vectorize
/export/gnu/import/git/sources/gcc/gcc/testsuite/gcc.dg/pr56350.c
/export/gnu/import/git/sources/gcc/gcc/testsuite/gcc.dg/pr56350.c: In
function ‘f’:
/export/gnu/import/git/sources/gcc/gcc/testsuite/gcc.dg/pr56350.c:8:1:
internal compiler error: Segmentation fault
  f (void)
  ^
0xd1f836 crash_signal
/export/gnu/import/git/sources/gcc/gcc/toplev.c:383
0xfaf59a gimple_code
/export/gnu/import/git/sources/gcc/gcc/gimple.h:1553
0xfbd855 vectorizable_reduction(gimple_statement_base*,
gimple_stmt_iterator*, gimple_statement_base**, _slp_tree*)
/export/gnu/import/git/sources/gcc/gcc/tree-vect-loop.c:4987
0xfabc86 vect_analyze_stmt(gimple_statement_base*, bool*, _slp_tree*)
/export/gnu/import/git/sources/gcc/gcc/tree-vect-stmts.c:7170
0xfb50c9 vect_analyze_loop_operations
/export/gnu/import/git/sources/gcc/gcc/tree-vect-loop.c:1539
0xfb58cc vect_analyze_loop_2
/export/gnu/import/git/sources/gcc/gcc/tree-vect-loop.c:1800
0xfb5c70 vect_analyze_loop(loop*)
/export/gnu/import/git/sources/gcc/gcc/tree-vect-loop.c:1898
0xfd558f vectorize_loops()
/export/gnu/import/git/sources/gcc/gcc/tree-vectorizer.c:451
0xed3699 execute
/export/gnu/import/git/sources/gcc/gcc/tree-ssa-loop.c:295
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See  for instructions.
[hjl@gnu-6 gcc]$



Hi,
This patch also breaks gcc.dg/pr56350.c for aarch64-none-elf, 
arm-none-eabi and other arm targets. Failure looks very similar:


src/gcc/gcc/testsuite/gcc.dg/pr56350.c: In function 'f':

src/gcc/gcc/testsuite/gcc.dg/pr56350.c:8:1: internal compiler error: 
Segmentation fault


Please submit a full bug report,

with preprocessed source if appropriate.

See  for instructions.

compiler exited with status 1
output is:
src/gcc/gcc/testsuite/gcc.dg/pr56350.c: In function 'f':

src/gcc/gcc/testsuite/gcc.dg/pr56350.c:8:1: internal compiler error: 
Segmentation fault


Please submit a full bug report,

with preprocessed source if appropriate.

See  for instructions.

Kind regards,
Alex



Re: [patch]: [Bug tree-optimization/61917] [4.9/5 Regression] ICE on valid code at -O3 on x86_64-linux-gnu in vectorizable_reduction, at tree-vect-loop.c:4913

2015-02-26 Thread Alex Velenko

Hi Jakub,
Just tested with latest svn update. The issue is indeed gone.
Kind regards,
Alex

On 26/02/15 12:12, Jakub Jelinek wrote:

On Thu, Feb 26, 2015 at 12:03:46PM +, Alex Velenko wrote:

This patch also breaks gcc.dg/pr56350.c for aarch64-none-elf, arm-none-eabi
and other arm targets. Failure looks very similar:


Just svn update?  r220987 should fix this.

Jakub





Re: ipa-icf::merge TLC

2015-03-02 Thread Alex Velenko



On 01/03/15 16:47, Christophe Lyon wrote:

On 27 February 2015 at 21:49, Jan Hubicka  wrote:


../sysdeps/gnu/siglist.c:72:1: internal compiler error: in address_matters_p, 
at symtab.c:1908
  versioned_symbol (libc, __new_sys_sigabbrev, sys_sigabbrev, GLIBC_2_3_3);
  ^
0x66a080 symtab_node::address_matters_p()
 /scratch/sellcey/repos/bootstrap/src/gcc/gcc/symtab.c:1908
0xe7cbe5 ipa_icf::sem_variable::merge(ipa_icf::sem_item*)
 /scratch/sellcey/repos/bootstrap/src/gcc/gcc/ipa-icf.c:1443


Indeed, the ipa-icf should not try to analyze aliases - just prove ekvialence of
definitions they are attached to.  It already does that for functions (bit by 
accident;
it gives up when there is no gimple body), but it does not do that for 
variables because
it gets into ctor_for_folding. For that reason it sometimes decides to try to 
make two
variable aliases alias of each other that is not a good idea, because of 
possible creation
of loops.

I am just discussing with Martin the fix.

Honza


For the record, I have noticed similar errors on ARM and AArch64
targets, when building glibc.

Christophe.



I confirm ARM and AArch64 failing to build with this patch:

/work/build-aarch64-none-linux-gnu/install//bin/aarch64-none-linux-gnu-gcc 
../sysdeps/posix/cuserid.c -c -std=gnu99 -fgnu89-inline  -O2 -Wall 
-Winline -Wundef -Wwrite-strings -fmerge-all-constants -frounding-math 
-g -Wstrict-prototypes   -fPIC -fexceptions   -I../include 
-I/work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common 
-I/work/build-aarch64-none-linux-gnu/obj/glibc 
-I../sysdeps/unix/sysv/linux/aarch64  -I../sysdeps/aarch64/nptl 
-I../sysdeps/unix/sysv/linux/generic 
-I../sysdeps/unix/sysv/linux/wordsize-64 
-I../sysdeps/unix/sysv/linux/include -I../sysdeps/unix/sysv/linux 
-I../sysdeps/nptl  -I../sysdeps/pthread  -I../sysdeps/gnu 
-I../sysdeps/unix/inet  -I../sysdeps/unix/sysv  -I../sysdeps/unix 
-I../sysdeps/posix  -I../sysdeps/aarch64/fpu  -I../sysdeps/aarch64 
-I../sysdeps/wordsize-64  -I../sysdeps/ieee754/ldbl-128 
-I../sysdeps/ieee754/dbl-64/wordsize-64  -I../sysdeps/ieee754/dbl-64 
-I../sysdeps/ieee754/flt-32  -I../sysdeps/aarch64/soft-fp 
-I../sysdeps/ieee754  -I../sysdeps/generic  -I.. -I../libio -I. 
-nostdinc -isystem 
/work/build-aarch64-none-linux-gnu/install/bin/../lib/gcc/aarch64-none-linux-gnu/5.0.0/include 
-isystem 
/work/build-aarch64-none-linux-gnu/install/bin/../lib/gcc/aarch64-none-linux-gnu/5.0.0/include-fixed 
-isystem 
/work/build-aarch64-none-linux-gnu/install//aarch64-none-linux-gnu/libc/usr/include 
 -D_LIBC_REENTRANT -include 
/work/build-aarch64-none-linux-gnu/obj/glibc/libc-modules.h 
-DMODULE_NAME=libc -include ../include/libc-symbols.h  -DPIC -DSHARED 
  -D_IO_MTSAFE_IO -o 
/work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common/cuserid.os -MD 
-MP -MF 
/work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common/cuserid.os.dt 
-MT /work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common/cuserid.os
../sysdeps/gnu/siglist.c:77:1: internal compiler error: in 
address_matters_p, at symtab.c:1908

 versioned_symbol (libc, __new_sys_sigabbrev, sys_sigabbrev, GLIBC_2_1);
 ^
*** errlist.c count 134 inflated to GLIBC_2.12 count 135 (old errno.h?)
chmod a-w 
/work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common/errlist-compat.cT

0x6b9100 symtab_node::address_matters_p()
/work/src/gcc/gcc/symtab.c:1908
0xedb4e5 ipa_icf::sem_variable::merge(ipa_icf::sem_item*)
/work/src/gcc/gcc/ipa-icf.c:1723
0xee03f9 ipa_icf::sem_item_optimizer::merge_classes(unsigned int)
/work/src/gcc/gcc/ipa-icf.c:2955
0xee6d31 ipa_icf::sem_item_optimizer::execute()
/work/src/gcc/gcc/ipa-icf.c:2217
0xee8df1 ipa_icf_driver
/work/src/gcc/gcc/ipa-icf.c:3034
0xee8df1 ipa_icf::pass_ipa_icf::execute(function*)
/work/src/gcc/gcc/ipa-icf.c:3081
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See  for instructions.
make[2]: *** 
[/work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common/siglist.o] 
Error 1

make[2]: *** Waiting for unfinished jobs
mv -f 
/work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common/errlist-compat.cT 
/work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common/errlist-compat.c


Regards,
Alex



Re: ipa-icf::merge TLC

2015-03-03 Thread Alex Velenko

On 02/03/15 22:04, Christophe Lyon wrote:

On 2 March 2015 at 21:21, Jan Hubicka  wrote:



On 01/03/15 16:47, Christophe Lyon wrote:

On 27 February 2015 at 21:49, Jan Hubicka  wrote:


../sysdeps/gnu/siglist.c:72:1: internal compiler error: in address_matters_p, 
at symtab.c:1908
  versioned_symbol (libc, __new_sys_sigabbrev, sys_sigabbrev, GLIBC_2_3_3);
  ^
0x66a080 symtab_node::address_matters_p()
 /scratch/sellcey/repos/bootstrap/src/gcc/gcc/symtab.c:1908
0xe7cbe5 ipa_icf::sem_variable::merge(ipa_icf::sem_item*)
 /scratch/sellcey/repos/bootstrap/src/gcc/gcc/ipa-icf.c:1443


Indeed, the ipa-icf should not try to analyze aliases - just prove ekvialence of
definitions they are attached to.  It already does that for functions (bit by 
accident;
it gives up when there is no gimple body), but it does not do that for 
variables because
it gets into ctor_for_folding. For that reason it sometimes decides to try to 
make two
variable aliases alias of each other that is not a good idea, because of 
possible creation
of loops.

I am just discussing with Martin the fix.

Honza


For the record, I have noticed similar errors on ARM and AArch64
targets, when building glibc.

Christophe.



I confirm ARM and AArch64 failing to build with this patch:
chmod a-w 
/work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common/errlist-compat.cT
0x6b9100 symtab_node::address_matters_p()
   /work/src/gcc/gcc/symtab.c:1908
0xedb4e5 ipa_icf::sem_variable::merge(ipa_icf::sem_item*)
   /work/src/gcc/gcc/ipa-icf.c:1723
0xee03f9 ipa_icf::sem_item_optimizer::merge_classes(unsigned int)
   /work/src/gcc/gcc/ipa-icf.c:2955
0xee6d31 ipa_icf::sem_item_optimizer::execute()
   /work/src/gcc/gcc/ipa-icf.c:2217
0xee8df1 ipa_icf_driver
   /work/src/gcc/gcc/ipa-icf.c:3034
0xee8df1 ipa_icf::pass_ipa_icf::execute(function*)
   /work/src/gcc/gcc/ipa-icf.c:3081


I commited patch for the alias merging yesterda night, so it should be fixed
now.  If it still fails, please fill in a PR with preprocessed testcase so I
can reproduce it in a cross.



On my side, I saw builds complete again starting with r221090, I guess
that's the commit you are referring to?

Hi,

I built with r221117. I see errors while building following targets:
aarch64_be-none-linux-gnu, aarch64_be-none-linux-gnu,
arm-none-linux-gnueabihf, arm-none-linux-gnueabi.

For aarch64_be-none-linux-gnu I reproduce the error like this:

/work/build-aarch64-none-linux-gnu/install//bin/aarch64-none-linux-gnu-gcc 
/work/src/glibc/sysdeps/gnu/siglist.c -c -std=gnu99 -fgnu89-inline  -O2 
-Wall -Winline -Wundef -Wwrite-strings -fmerge-all-constants 
-frounding-math -g -Wstrict-prototypes   -fno-toplevel-reorder 
-fno-section-anchors   -I/work/src/glibc/include 
-I/work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common 
-I/work/build-aarch64-none-linux-gnu/obj/glibc 
-I/work/src/glibc/sysdeps/unix/sysv/linux/aarch64 
-I/work/src/glibc/sysdeps/aarch64/nptl 
-I/work/src/glibc/sysdeps/unix/sysv/linux/generic 
-I/work/src/glibc/sysdeps/unix/sysv/linux/wordsize-64 
-I/work/src/glibc/sysdeps/unix/sysv/linux/include 
-I/work/src/glibc/sysdeps/unix/sysv/linux 
-I/work/src/glibc/sysdeps/nptl  -I/work/src/glibc/sysdeps/pthread 
-I/work/src/glibc/sysdeps/gnu  -I/work/src/glibc/sysdeps/unix/inet 
-I/work/src/glibc/sysdeps/unix/sysv  -I/work/src/glibc/sysdeps/unix 
-I/work/src/glibc/sysdeps/posix  -I/work/src/glibc/sysdeps/aarch64/fpu 
-I/work/src/glibc/sysdeps/aarch64  -I/work/src/glibc/sysdeps/wordsize-64 
 -I/work/src/glibc/sysdeps/ieee754/ldbl-128 
-I/work/src/glibc/sysdeps/ieee754/dbl-64/wordsize-64 
-I/work/src/glibc/sysdeps/ieee754/dbl-64 
-I/work/src/glibc/sysdeps/ieee754/flt-32 
-I/work/src/glibc/sysdeps/aarch64/soft-fp 
-I/work/src/glibc/sysdeps/ieee754  -I/work/src/glibc/sysdeps/generic 
-I/work/src/glibc -I/work/src/glibc/libio -I. -nostdinc -isystem 
/work/build-aarch64-none-linux-gnu/install/bin/../lib/gcc/aarch64-none-linux-gnu/5.0.0/include 
-isystem 
/work/build-aarch64-none-linux-gnu/install/bin/../lib/gcc/aarch64-none-linux-gnu/5.0.0/include-fixed 
-isystem 
/work/build-aarch64-none-linux-gnu/install//aarch64-none-linux-gnu/libc/usr/include 
 -D_LIBC_REENTRANT -include 
/work/build-aarch64-none-linux-gnu/obj/glibc/libc-modules.h 
-DMODULE_NAME=libc -include /work/src/glibc/include/libc-symbols.h 
 -D_IO_MTSAFE_IO -o 
/work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common/siglist.o -MD 
-MP -MF 
/work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common/siglist.o.dt 
-MT /work/build-aarch64-none-linux-gnu/obj/glibc/stdio-common/siglist.o
/work/src/glibc/sysdeps/gnu/siglist.c:77:1: internal compiler error: in 
address_matters_p, at symtab.c:1908

 versioned_symbol (libc, __new_sys_sigabbrev, sys_sigabbrev, GLIBC_2_1);
 ^
0x6b9140 symtab_node::address_matters_p()
/work/src/gcc/gcc/symtab.c:1908
0xedb685 ipa_icf::sem_variable::merge(ipa_icf::sem_item*)
/work/src/gcc/gcc/ipa-icf.c:1740
0xee05b1 ipa_icf::sem_item_optimizer::merge_class

Re: [PATCH] PR rtl-optimization/32219: optimizer causees wrong code in pic/hidden/weak symbol checking

2015-03-03 Thread Alex Velenko

On 19/02/15 17:26, Richard Henderson wrote:

On 02/19/2015 09:08 AM, Alex Velenko wrote:

Your suggestion seem to fix gcc.target/arm/long-calls-1.c, but has to be
thoroughly tested.


Before you do complete testing, please also delete the TREE_STATIC test.
That bit should never be relevant to functions, as it indicates not that
it is in the compilation unit, but that it has static (as opposed to
automatic) storage duration.  Thus it is only relevant to variables.


r~



diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 7bf5b4d..777230e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -6392,14 +6392,8 @@ arm_set_default_type_attributes (tree type)
  static bool
  arm_function_in_section_p (tree decl, section *section)
  {
-  /* We can only be certain about functions defined in the same
- compilation unit.  */
-  if (!TREE_STATIC (decl))
-return false;
-
-  /* Make sure that SYMBOL always binds to the definition in this
- compilation unit.  */
-  if (!targetm.binds_local_p (decl))
+  /* We can only be certain about the prevailing symbol definition.  */
+  if (!decl_binds_to_current_def_p (decl))
  return false;

/* If DECL_SECTION_NAME is set, assume it is trustworthy.  */




Hi,

Did a bootstrap and a full regression run on arm-none-linux-gnueabihf,
No new regressions found. Some previously failing tests in libstdc++ 
started to fail differently, for example:


< ERROR: 22_locale/num_get/get/wchar_t/2.cc: can't read 
"additional_sources": no such variable for " dg-do 22 run { xfail 
lax_strtof\

p } "
< UNRESOLVED: 22_locale/num_get/get/wchar_t/2.cc: can't read 
"additional_sources": no such variable for " dg-do 22 run { xfail lax_s\

trtofp } "
---
> ERROR: 22_locale/num_get/get/wchar_t/2.cc: can't read 
"et_cache(uclibc,value)": no such element in array for " dg-do 22 run { 
xfai\

l lax_strtofp } "
> UNRESOLVED: 22_locale/num_get/get/wchar_t/2.cc: can't read 
"et_cache(uclibc,value)": no such element in array for " dg-do 22 run {\

 xfail lax_strtofp } "


But I think it is okay.

Kind regards,
Alex



[PATCH] [RTL] Relax CSE check to set REG_EQUAL notes.

2015-03-04 Thread Alex Velenko
Hi,

This patch permits CSE to add REG_EQUAL notes when single setting a constant
to a register even if REG_EQUAL constant rtx is the same as the set source rtx.
This enables optimizations in later passes looking for REG_EQUAL notes, like
jump2 pass.

For example, in arm testcase pr43920-2.c, CSE previously decided not to put
an "obvious" note on insn 9, as set value was the same as note value.
At the same time, other insns set up as -1 were set up through a register
and did get a note:
(insn 9 53 34 8 (set (reg:SI 110 [ D.4934 ])
(const_int -1 [0x])) 
/work/src/gcc/gcc/testsuite/gcc.target/arm/pr43920-2.c:21 613 
{*thumb2_movsi_vfp}
 (nil))

(insn 8 45 50 6 (set (reg:SI 110 [ D.4934 ])
(reg/v:SI 111 [ startD.4917 ])) 
/work/src/gcc/gcc/testsuite/gcc.target/arm/pr43920-2.c:21 613 
{*thumb2_movsi_vfp}
 (expr_list:REG_EQUAL (const_int -1 [0x])
(nil)))

(insn 6 49 54 7 (set (reg:SI 110 [ D.4934 ])
(reg/v:SI 112 [ endD.4918 ])) 
/work/src/gcc/gcc/testsuite/gcc.target/arm/pr43920-2.c:21 613 
{*thumb2_movsi_vfp}
 (expr_list:REG_EQUAL (const_int -1 [0x])
(nil)))

Jump2 pass, optimizing common code, was looking at notes to reason about
register values and failing to recognize those insns to be equal.

Making CSE to set up REG_EQUAL notes even in "obvious" cases
fixes pr43920-2.c for arm-none-eabi and other targets.

I prefer adding notes in CSE instead of adding additional checks in Jump2
and, if any, other passes, as I think it is more uniform solution and allows
single point fix. Downside is having more notes.

Done full regression run on arm-none-eabi and bootstrapped on x86.

Is patch ok?

gcc/

2015-03-04  Alex Velenko  

* cse.c (cse_insn): Check to set REG_EQUAL note relaxed.
---
 gcc/cse.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/gcc/cse.c b/gcc/cse.c
index 2a33827..abaf867 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -5383,10 +5383,9 @@ cse_insn (rtx_insn *insn)
}
 
   /* If this is a single SET, we are setting a register, and we have an
-equivalent constant, we want to add a REG_EQUAL note if the constant
-is different from the source.  We don't want to do it for a constant
-pseudo since verifying that this pseudo hasn't been eliminated is a
-pain; moreover such a note won't help anything.
+equivalent constant, we want to add a REG_EQUAL note.  We don't want
+to do it for a constant pseudo since verifying that this pseudo hasn't
+been eliminated is a pain; moreover such a note won't help anything.
 
 Avoid a REG_EQUAL note for (CONST (MINUS (LABEL_REF) (LABEL_REF)))
 which can be created for a reference to a compile time computable
@@ -5400,8 +5399,7 @@ cse_insn (rtx_insn *insn)
  && !(GET_CODE (src_const) == CONST
   && GET_CODE (XEXP (src_const, 0)) == MINUS
   && GET_CODE (XEXP (XEXP (src_const, 0), 0)) == LABEL_REF
-  && GET_CODE (XEXP (XEXP (src_const, 0), 1)) == LABEL_REF)
- && !rtx_equal_p (src, src_const))
+  && GET_CODE (XEXP (XEXP (src_const, 0), 1)) == LABEL_REF))
{
  /* Make sure that the rtx is not shared.  */
  src_const = copy_rtx (src_const);
-- 
1.8.1.2




[PATCH] [ARM] PR45701 testcase fix.

2015-03-04 Thread Alex Velenko
Hi,

This patch fixes arm pr45701 scan assembly tests. Those test register r3 being
used to maintain stack double word alignment. Recent optimizations reduced
number of local variables needed in those tests, removing necessity to push r3.
Testcase fixed by adding additional local variable.

Is patch OK?

2015-03-04  Alex Velenko  

gcc/testsuite

* gcc.target/arm/pr45701-1.c (history_expand_line_internal): Add an
extra variable to force stack alignment.
* gcc.target/arm/pr45701-2.c (history_expand_line_internal): Add an
extra variable to force stack alignment.
---
 gcc/testsuite/gcc.target/arm/pr45701-1.c | 5 +++--
 gcc/testsuite/gcc.target/arm/pr45701-2.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr45701-1.c 
b/gcc/testsuite/gcc.target/arm/pr45701-1.c
index 2c690d5..454a087 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-1.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-1.c
@@ -5,6 +5,7 @@
 /* { dg-final { scan-assembler-not "r8" } } */
 
 extern int hist_verify;
+extern int a1;
 extern char *pre_process_line (char*);
 extern char* str_cpy (char*, char*);
 extern int str_len (char*);
@@ -16,10 +17,10 @@ history_expand_line_internal (char* line)
 {
   char *new_line;
   int old_verify;
-
+  int a = a1;
   old_verify = hist_verify;
   hist_verify = 0;
   new_line = pre_process_line (line);
-  hist_verify = old_verify;
+  hist_verify = old_verify + a;
   return (new_line == line) ? savestring (line) : new_line;
 }
diff --git a/gcc/testsuite/gcc.target/arm/pr45701-2.c 
b/gcc/testsuite/gcc.target/arm/pr45701-2.c
index ee1ee7d..afe0840 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-2.c
@@ -5,6 +5,7 @@
 /* { dg-final { scan-assembler-not "r8" } } */
 
 extern int hist_verify;
+extern int a1;
 extern char *pre_process_line (char*);
 extern char* savestring1 (char*, char*);
 extern char* str_cpy (char*, char*);
@@ -17,11 +18,11 @@ history_expand_line_internal (char* line)
 {
   char *new_line;
   int old_verify;
-
+  int a = a1;
   old_verify = hist_verify;
   hist_verify = 0;
   new_line = pre_process_line (line);
-  hist_verify = old_verify;
+  hist_verify = old_verify + a;
   /* Two tail calls here, but r3 is not used to pass values.  */
   return (new_line == line) ? savestring (line) : savestring1 (new_line, line);
 }
-- 
1.8.1.2




Re: [PATCH] PR rtl-optimization/32219: optimizer causees wrong code in pic/hidden/weak symbol checking

2015-03-05 Thread Alex Velenko


On 03/03/15 15:58, Alex Velenko wrote:

On 19/02/15 17:26, Richard Henderson wrote:

On 02/19/2015 09:08 AM, Alex Velenko wrote:
Your suggestion seem to fix gcc.target/arm/long-calls-1.c, but has 
to be

thoroughly tested.


Before you do complete testing, please also delete the TREE_STATIC test.
That bit should never be relevant to functions, as it indicates not that
it is in the compilation unit, but that it has static (as opposed to
automatic) storage duration.  Thus it is only relevant to variables.


r~



diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 7bf5b4d..777230e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -6392,14 +6392,8 @@ arm_set_default_type_attributes (tree type)
  static bool
  arm_function_in_section_p (tree decl, section *section)
  {
-  /* We can only be certain about functions defined in the same
- compilation unit.  */
-  if (!TREE_STATIC (decl))
-return false;
-
-  /* Make sure that SYMBOL always binds to the definition in this
- compilation unit.  */
-  if (!targetm.binds_local_p (decl))
+  /* We can only be certain about the prevailing symbol definition.  */
+  if (!decl_binds_to_current_def_p (decl))
  return false;

/* If DECL_SECTION_NAME is set, assume it is trustworthy. */




Hi,

Did a bootstrap and a full regression run on arm-none-linux-gnueabihf,
No new regressions found. Some previously failing tests in libstdc++ 
started to fail differently, for example:


< ERROR: 22_locale/num_get/get/wchar_t/2.cc: can't read 
"additional_sources": no such variable for " dg-do 22 run { xfail 
lax_strtof\

p } "
< UNRESOLVED: 22_locale/num_get/get/wchar_t/2.cc: can't read 
"additional_sources": no such variable for " dg-do 22 run { xfail lax_s\

trtofp } "
---
> ERROR: 22_locale/num_get/get/wchar_t/2.cc: can't read 
"et_cache(uclibc,value)": no such element in array for " dg-do 22 run 
{ xfai\

l lax_strtofp } "
> UNRESOLVED: 22_locale/num_get/get/wchar_t/2.cc: can't read 
"et_cache(uclibc,value)": no such element in array for " dg-do 22 run {\

 xfail lax_strtofp } "


But I think it is okay.

Kind regards,
Alex




Hi,
Ping. Could someone, please approve Richard's patch?
This issue needs fixing.
Kind regards,
Alex



Re: [PATCH] PR rtl-optimization/32219: optimizer causees wrong code in pic/hidden/weak symbol checking

2015-03-06 Thread Alex Velenko


On 05/03/15 15:28, Ramana Radhakrishnan wrote:

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 7bf5b4d..777230e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -6392,14 +6392,8 @@ arm_set_default_type_attributes (tree type)
   static bool
   arm_function_in_section_p (tree decl, section *section)
   {
-  /* We can only be certain about functions defined in the same
- compilation unit.  */
-  if (!TREE_STATIC (decl))
-return false;
-
-  /* Make sure that SYMBOL always binds to the definition in this
- compilation unit.  */
-  if (!targetm.binds_local_p (decl))
+  /* We can only be certain about the prevailing symbol definition.  */
+  if (!decl_binds_to_current_def_p (decl))
   return false;

 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */




Sorry to have missed this - I've also been traveling recently which has
made it harder with patch traffic - this is OK if no regressions.

Please apply with an appropriate Changelog.

regressions
Ramana



Hi,
Committed as r221220 and fixed ChangeLog entry in r221234.
Sorry for claiming the patch for myself.
Kind regards,
Alex



Re: [patch] PR lto/65276 remove odr_violated assert

2015-03-17 Thread Alex Velenko

On 02/03/15 18:43, Aldy Hernandez wrote:

TYPE_BINFO is null when no optimization is used, but odr_violated is unset.

Fixed and approved in the PR by Honza.

Tested on x86-64 Linux.

Committed to mainline.



Hi,
I believe your testcase does not work for arm-none-eabi:

Executing on host: /arm-none-eabi/obj/gcc2/gcc/testsuite/g++8/../../xg++ 
-B/arm-none-eabi/obj/gcc2/gcc/testsuite/g++8/../../ cp_lto_pr65276_0.o 
cp_lto_pr65276_1.o g++_tg.o  -fno-diagnostics-show-caret 
-fdiagnostics-color=never  -nostdinc++ 
-I/arm-none-eabi/obj/gcc2/arm-none-eabi/libstdc++-v3/include/arm-none-eabi 
-I/arm-none-eabi/obj/gcc2/arm-none-eabi/libstdc++-v3/include 
-I/src/gcc/libstdc++-v3/libsupc++ 
-I/src/gcc/libstdc++-v3/include/backward 
-I/src/gcc/libstdc++-v3/testsuite/util -fmessage-length=0 -flto -O0 
-std=c++11   -specs=rdimon.specs -Wa,-mno-warn-deprecated 
-L/arm-none-eabi/obj/gcc2/arm-none-eabi/./libstdc++-v3/src/.libs 
-B/arm-none-eabi/obj/gcc2/arm-none-eabi/./libstdc++-v3/src/.libs 
-Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main -Wl,-wrap,abort  -o 
g++-dg-lto-pr65276-01.exe(timeout = 300)
spawn /arm-none-eabi/obj/gcc2/gcc/testsuite/g++8/../../xg++ 
-B/arm-none-eabi/obj/gcc2/gcc/testsuite/g++8/../../ cp_lto_pr65276_0.o 
cp_lto_pr65276_1.o g++_tg.o -fno-diagnostics-show-caret 
-fdiagnostics-color=never -nostdinc++ 
-I/arm-none-eabi/obj/gcc2/arm-none-eabi/libstdc++-v3/include/arm-none-eabi 
-I/arm-none-eabi/obj/gcc2/arm-none-eabi/libstdc++-v3/include 
-I/src/gcc/libstdc++-v3/libsupc++ 
-I/src/gcc/libstdc++-v3/include/backward 
-I/src/gcc/libstdc++-v3/testsuite/util -fmessage-length=0 -flto -O0 
-std=c++11 -specs=rdimon.specs -Wa,-mno-warn-deprecated 
-L/arm-none-eabi/obj/gcc2/arm-none-eabi/./libstdc++-v3/src/.libs 
-B/arm-none-eabi/obj/gcc2/arm-none-eabi/./libstdc++-v3/src/.libs 
-Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main -Wl,-wrap,abort -o 
g++-dg-lto-pr65276-01.exe
/tmp/61243907.0/ccwrV09Z.ltrans0.ltrans.o:(.rodata+0x0): multiple 
definition of `typeinfo for std::exception'
/arm-none-eabi/obj/gcc2/arm-none-eabi/./libstdc++-v3/src/.libs/libstdc++.a(eh_exception.o):(.rodata._ZTISt9exception+0x0): 
first defined here
/tmp/61243907.0/ccwrV09Z.ltrans0.ltrans.o:(.rodata+0x8): multiple 
definition of `typeinfo name for std::exception'
/arm-none-eabi/obj/gcc2/arm-none-eabi/./libstdc++-v3/src/.libs/libstdc++.a(eh_exception.o):/src/gcc/libstdc++-v3/libsupc++/eh_exception.cc:35: 
first defined here

collect2: error: ld returned 1 exit status
compiler exited with status 1
output is:
/tmp/61243907.0/ccwrV09Z.ltrans0.ltrans.o:(.rodata+0x0): multiple 
definition of `typeinfo for std::exception'
/arm-none-eabi/obj/gcc2/arm-none-eabi/./libstdc++-v3/src/.libs/libstdc++.a(eh_exception.o):(.rodata._ZTISt9exception+0x0): 
first defined here
/tmp/61243907.0/ccwrV09Z.ltrans0.ltrans.o:(.rodata+0x8): multiple 
definition of `typeinfo name for std::exception'
/arm-none-eabi/obj/gcc2/arm-none-eabi/./libstdc++-v3/src/.libs/libstdc++.a(eh_exception.o):/src/gcc/libstdc++-v3/libsupc++/eh_exception.cc:35: 
first defined here

collect2: error: ld returned 1 exit status

FAIL: g++.dg/lto/pr65276 cp_lto_pr65276_0.o-cp_lto_pr65276_1.o link, 
-flto -O0 -std=c++11
UNRESOLVED: g++.dg/lto/pr65276 cp_lto_pr65276_0.o-cp_lto_pr65276_1.o 
execute -flto -O0 -std=c++11


Kind regards,
Alex



Re: [PATCH] [ARM] PR45701 testcase fix.

2015-03-26 Thread Alex Velenko

On 04/03/15 11:13, Alex Velenko wrote:

Hi,

This patch fixes arm pr45701 scan assembly tests. Those test register r3 being
used to maintain stack double word alignment. Recent optimizations reduced
number of local variables needed in those tests, removing necessity to push r3.
Testcase fixed by adding additional local variable.

Is patch OK?

2015-03-04  Alex Velenko  

gcc/testsuite

* gcc.target/arm/pr45701-1.c (history_expand_line_internal): Add an
extra variable to force stack alignment.
* gcc.target/arm/pr45701-2.c (history_expand_line_internal): Add an
extra variable to force stack alignment.
---
  gcc/testsuite/gcc.target/arm/pr45701-1.c | 5 +++--
  gcc/testsuite/gcc.target/arm/pr45701-2.c | 5 +++--
  2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr45701-1.c 
b/gcc/testsuite/gcc.target/arm/pr45701-1.c
index 2c690d5..454a087 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-1.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-1.c
@@ -5,6 +5,7 @@
  /* { dg-final { scan-assembler-not "r8" } } */

  extern int hist_verify;
+extern int a1;
  extern char *pre_process_line (char*);
  extern char* str_cpy (char*, char*);
  extern int str_len (char*);
@@ -16,10 +17,10 @@ history_expand_line_internal (char* line)
  {
char *new_line;
int old_verify;
-
+  int a = a1;
old_verify = hist_verify;
hist_verify = 0;
new_line = pre_process_line (line);
-  hist_verify = old_verify;
+  hist_verify = old_verify + a;
return (new_line == line) ? savestring (line) : new_line;
  }
diff --git a/gcc/testsuite/gcc.target/arm/pr45701-2.c 
b/gcc/testsuite/gcc.target/arm/pr45701-2.c
index ee1ee7d..afe0840 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-2.c
@@ -5,6 +5,7 @@
  /* { dg-final { scan-assembler-not "r8" } } */

  extern int hist_verify;
+extern int a1;
  extern char *pre_process_line (char*);
  extern char* savestring1 (char*, char*);
  extern char* str_cpy (char*, char*);
@@ -17,11 +18,11 @@ history_expand_line_internal (char* line)
  {
char *new_line;
int old_verify;
-
+  int a = a1;
old_verify = hist_verify;
hist_verify = 0;
new_line = pre_process_line (line);
-  hist_verify = old_verify;
+  hist_verify = old_verify + a;
/* Two tail calls here, but r3 is not used to pass values.  */
return (new_line == line) ? savestring (line) : savestring1 (new_line, 
line);
  }


Hi,

Ping.

Alex



Re: [PATCH] [ARM] PR45701 testcase fix.

2015-03-31 Thread Alex Velenko



On 31/03/15 15:30, Richard Earnshaw wrote:

On 04/03/15 11:13, Alex Velenko wrote:

Hi,

This patch fixes arm pr45701 scan assembly tests. Those test register r3 being
used to maintain stack double word alignment. Recent optimizations reduced
number of local variables needed in those tests, removing necessity to push r3.
Testcase fixed by adding additional local variable.

Is patch OK?



This patch is OK.

Let me put it on record that I really dislike these scan-assembler tests
that rely on specific behaviours throughout the entire compilation flow.
  They're just too fragile to be useful.

R.



Committed.


2015-03-04  Alex Velenko  

gcc/testsuite

* gcc.target/arm/pr45701-1.c (history_expand_line_internal): Add an
extra variable to force stack alignment.
* gcc.target/arm/pr45701-2.c (history_expand_line_internal): Add an
extra variable to force stack alignment.
---
  gcc/testsuite/gcc.target/arm/pr45701-1.c | 5 +++--
  gcc/testsuite/gcc.target/arm/pr45701-2.c | 5 +++--
  2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/arm/pr45701-1.c 
b/gcc/testsuite/gcc.target/arm/pr45701-1.c
index 2c690d5..454a087 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-1.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-1.c
@@ -5,6 +5,7 @@
  /* { dg-final { scan-assembler-not "r8" } } */

  extern int hist_verify;
+extern int a1;
  extern char *pre_process_line (char*);
  extern char* str_cpy (char*, char*);
  extern int str_len (char*);
@@ -16,10 +17,10 @@ history_expand_line_internal (char* line)
  {
char *new_line;
int old_verify;
-
+  int a = a1;
old_verify = hist_verify;
hist_verify = 0;
new_line = pre_process_line (line);
-  hist_verify = old_verify;
+  hist_verify = old_verify + a;
return (new_line == line) ? savestring (line) : new_line;
  }
diff --git a/gcc/testsuite/gcc.target/arm/pr45701-2.c 
b/gcc/testsuite/gcc.target/arm/pr45701-2.c
index ee1ee7d..afe0840 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-2.c
@@ -5,6 +5,7 @@
  /* { dg-final { scan-assembler-not "r8" } } */

  extern int hist_verify;
+extern int a1;
  extern char *pre_process_line (char*);
  extern char* savestring1 (char*, char*);
  extern char* str_cpy (char*, char*);
@@ -17,11 +18,11 @@ history_expand_line_internal (char* line)
  {
char *new_line;
int old_verify;
-
+  int a = a1;
old_verify = hist_verify;
hist_verify = 0;
new_line = pre_process_line (line);
-  hist_verify = old_verify;
+  hist_verify = old_verify + a;
/* Two tail calls here, but r3 is not used to pass values.  */
return (new_line == line) ? savestring (line) : savestring1 (new_line, 
line);
  }







Re: Re: [PATCH] [PATCH][ARM] Fix split-live-ranges-for-shrink-wrap.c testcase.

2015-06-22 Thread Alex Velenko

On 20/05/15 21:14, Joseph Myers wrote:

Again, the condition you propose to add doesn't make sense.  arm_arch_X_ok
is only appropriate for tests using an explicit -march=X.  Testing with
-march=armv7* should automatically skip this test anyway because it would
cause arm_thumb1_ok to fail.



Hi,

I adjusted the patch to skip execution split-live-ranges-for-shrink-wrap.c
with explicitly specified -march=armv4t and provide -march=armv5t flag =
for
arm_arch_v5t_ok targets.

Is patch ok?

Alex

gcc/testsuite

2015-06-22  Alex Velenko  

 * gcc.target/arm/split-live-ranges-for-shrink-wrap.c (dg-skip-if):
Skip -march=armv4t.
(dg-additional-options): Set armv5t flag.

diff --git
a/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c
b/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c
index e36000b..3cb93dc 100644
--- a/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c
+++ b/gcc/testsuite/gcc.target/arm/split-live-ranges-for-shrink-wrap.c
@@ -1,6 +1,8 @@
  /* { dg-do assemble } */
  /* { dg-options "-mthumb -Os -fdump-rtl-ira " }  */
  /* { dg-require-effective-target arm_thumb1_ok } */
+/* { dg-skip-if "do not test on armv4t" { *-*-* } { "-march=armv4t" } } =
*/
+/* { dg-additional-options "-march=armv5t" {target arm_arch_v5t_ok} } */

  int foo (char *, char *, int);
  int test (int d, char * out, char *in, int len)

--1.8.1.2--



[PATCH][AArch64] Testcase fix for __ATOMIC_CONSUME

2015-01-21 Thread Alex Velenko
Hi,
Is the following patch ok?
regards,
Alex

This patch fixes aarch64/atomic-op-consume.c test to expect safe assembly to be
generated when __ATOMIC_CONSUME semantics is requested.

2015-01-21 Alex Velenko alex.vele...@arm.com

gcc/testsuite/

gcc.target/aarch64/atomic-op-consume.c(scan-assember-times): Modified.

diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c 
b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
index 38d6c2c..cf33be2 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
@@ -3,5 +3,9 @@
 
 #include "atomic-op-consume.x"
 
-/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } 
} */
+/* To workaround Bugzilla 59448 issue, a request for memory behaviour
+   __ATOMIC_CONSUME is promoted to MEMMODEL_ACQUIRE behaviour, not
+   MEMMODEL_CONSUME behaviour.  This causes "ldaxr" to be generated
+   instead of "ldxr".  */
+/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } 
} */
 /* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, 
\\\[x\[0-9\]+\\\]" 6 } } */

[PATCH][AArch32] Testcase fix for __ATOMIC_CONSUME

2015-01-27 Thread Alex Velenko

Hi,

This patch fixes arm/atomic-op-consume.c test to expect safe "LDAEX"
instruction to be generated when __ATOMIC_CONSUME semantics is requested.

This patch was tested by running the modified test on arm-none-eabi and
arm-none-linux-gnueabi compilers.

Is this patch ok?

Alex

2015-01-27  Alex Velenko  

gcc/testsuite/

  * gcc.target/arm/atomic-op-consume.c (scan-assember-times): Adjust
  scan-assembler-times pattern.

diff --git a/gcc/testsuite/gcc.target/arm/atomic-op-consume.c 
b/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
index 0354717..cc6c028 100644
--- a/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
+++ b/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
@@ -5,6 +5,9 @@
 
 #include "../aarch64/atomic-op-consume.x"
 
-/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } 
} */
+/* To workaround Bugzilla 59448 issue, a request for __ATOMIC_CONSUME is always
+   promoted to __ATOMIC_ACQUIRE, implemented as MEMMODEL_ACQUIRE.  This causes
+   "LDAEX" to be generated instead of "LDREX".  */
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } 
} */
 /* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, 
\\\[r\[0-9\]+\\\]" 6 } } */
 /* { dg-final { scan-assembler-not "dmb" } } */

[PATCH][AArch64] Testcase fix for __ATOMIC_CONSUME

2015-01-27 Thread Alex Velenko

Hi,

This patch fixes aarch64/atomic-op-consume.c test to expect safe "LDAXR"
instruction to be generated when __ATOMIC_CONSUME semantics is requested.

This patch was tested by running the modified test on aarch64-none-elf
compiler.

Is this patch ok?

Alex

2015-01-27  Alex Velenko  

gcc/testsuite/

  * gcc.target/aarch64/atomic-op-consume.c (scan-assember-times): Adjust
  scan-assembler-times pattern.

diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c 
b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
index 38d6c2c..7ece5b1 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
@@ -3,5 +3,8 @@
 
 #include "atomic-op-consume.x"
 
-/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } 
} */
+/* To workaround Bugzilla 59448 issue, a request for __ATOMIC_CONSUME is always
+   promoted to __ATOMIC_ACQUIRE, implemented as MEMMODEL_ACQUIRE.  This causes
+   "LDAXR" to be generated instead of "LDXR".  */
+/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } 
} */
 /* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, 
\\\[x\[0-9\]+\\\]" 6 } } */

Re: [PATCH][AArch32] Testcase fix for __ATOMIC_CONSUME

2015-01-28 Thread Alex Velenko

On 27/01/15 16:13, Ramana Radhakrishnan wrote:

On Tue, Jan 27, 2015 at 4:06 PM, Alex Velenko  wrote:


Hi,

This patch fixes arm/atomic-op-consume.c test to expect safe "LDAEX"
instruction to be generated when __ATOMIC_CONSUME semantics is requested.

This patch was tested by running the modified test on arm-none-eabi and
arm-none-linux-gnueabi compilers.

Is this patch ok?


Ok. Please remember James's comments in the future about cover notes.



Commited.

Alex


Ramana



Alex

2015-01-27  Alex Velenko  

gcc/testsuite/

   * gcc.target/arm/atomic-op-consume.c (scan-assember-times): Adjust
   scan-assembler-times pattern.

diff --git a/gcc/testsuite/gcc.target/arm/atomic-op-consume.c 
b/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
index 0354717..cc6c028 100644
--- a/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
+++ b/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
@@ -5,6 +5,9 @@

  #include "../aarch64/atomic-op-consume.x"

-/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } 
} */
+/* To workaround Bugzilla 59448 issue, a request for __ATOMIC_CONSUME is always
+   promoted to __ATOMIC_ACQUIRE, implemented as MEMMODEL_ACQUIRE.  This causes
+   "LDAEX" to be generated instead of "LDREX".  */
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } 
} */
  /* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, 
\\\[r\[0-9\]+\\\]" 6 } } */
  /* { dg-final { scan-assembler-not "dmb" } } */






Re: Re: [PATCH IRA] update_equiv_regs fails to set EQUIV reg-note for pseudo with more than one definition

2015-02-02 Thread Alex Velenko

On 11/10/14 13:44, Felix Yang wrote:

Hello Jeff,

 I see that you have improved the RTL typesafety issue for ira.c,
so I rebased this patch
 on the latest trunk and change to use the new list walking interface.
 Bootstrapped on x86_64-SUSE-Linux and make check regression tested.
 OK for trunk?

Hi Felix,
I believe your patch causes a regression for arm-none-eabi.
FAIL: gcc.target/arm/pr43920-2.c object-size text <= 54
FAIL: gcc.target/arm/pr43920-2.c scan-assembler-times pop 2

This happens because your patch stops reuse of code for
" return -1;" statements in pr43920-2.c.

As far as I investigated, your patch prevents adding "(expr_list (-1) 
(nil)" in ira pass, which prevents jump2 optimization from happening.


So before, in ira pass I could see:
"(insn 9 53 34 8 (set (reg:SI 110 [ D.4934 ])
(const_int -1 [0x])) 
/work/fsf-trunk-ref-2/src/gcc/gcc/testsuite/gcc.target/arm/pr43920-2.c:20 613 
{*thumb2_movsi_vfp}

 (expr_list:REG_EQUAL (const_int -1 [0x])
(nil)))"
But with your patch I get
"(insn 9 53 34 8 (set (reg:SI 110 [ D.5322 ])
(const_int -1 [0x])) 
/work/fsf-trunk-2/src/gcc/gcc/testsuite/gcc.target/arm/pr43920-2.c:20 
615 {*thumb2_movsi_vfp}

 (nil))"

This causes a code generation regression and needs to be fixed.
Kind regards,
Alex



Index: gcc/ChangeLog
===
--- gcc/ChangeLog(revision 216116)
+++ gcc/ChangeLog(working copy)
@@ -1,3 +1,14 @@
+2014-10-11  Felix Yang  
+Jeff Law  
+
+* ira.c (struct equivalence): Change member "is_arg_equivalence"
and "replace"
+into boolean bitfields; turn member "loop_depth" into a short
integer; add new
+member "no_equiv" and "reserved".
+(no_equiv): Set no_equiv of struct equivalence if register is marked
+as having no known equivalence.
+(update_equiv_regs): Check all definitions for a multiple-set
+register to make sure that the RHS have the same value.
+
  2014-10-11  Martin Liska  

  PR/63376
Index: gcc/ira.c
===
--- gcc/ira.c(revision 216116)
+++ gcc/ira.c(working copy)
@@ -2902,12 +2902,14 @@ struct equivalence

/* Loop depth is used to recognize equivalences which appear
   to be present within the same loop (or in an inner loop).  */
-  int loop_depth;
+  short loop_depth;
/* Nonzero if this had a preexisting REG_EQUIV note.  */
-  int is_arg_equivalence;
+  unsigned char is_arg_equivalence : 1;
/* Set when an attempt should be made to replace a register
   with the associated src_p entry.  */
-  char replace;
+  unsigned char replace : 1;
+  /* Set if this register has no known equivalence.  */
+  unsigned char no_equiv : 1;
  };

  /* reg_equiv[N] (where N is a pseudo reg number) is the equivalence
@@ -3255,6 +3257,7 @@ no_equiv (rtx reg, const_rtx store ATTRIBUTE_UNUSE
if (!REG_P (reg))
  return;
regno = REGNO (reg);
+  reg_equiv[regno].no_equiv = 1;
list = reg_equiv[regno].init_insns;
if (list && list->insn () == NULL)
  return;
@@ -3381,7 +3384,7 @@ update_equiv_regs (void)

/* If this insn contains more (or less) than a single SET,
   only mark all destinations as having no known equivalence.  */
-  if (set == 0)
+  if (set == NULL_RTX)
  {
note_stores (PATTERN (insn), no_equiv, NULL);
continue;
@@ -3476,16 +3479,49 @@ update_equiv_regs (void)
if (note && GET_CODE (XEXP (note, 0)) == EXPR_LIST)
  note = NULL_RTX;

-  if (DF_REG_DEF_COUNT (regno) != 1
-  && (! note
+  if (DF_REG_DEF_COUNT (regno) != 1)
+{
+  bool equal_p = true;
+  rtx_insn_list *list;
+
+  /* If we have already processed this pseudo and determined it
+ can not have an equivalence, then honor that decision.  */
+  if (reg_equiv[regno].no_equiv)
+continue;
+
+  if (! note
|| rtx_varies_p (XEXP (note, 0), 0)
|| (reg_equiv[regno].replacement
&& ! rtx_equal_p (XEXP (note, 0),
-reg_equiv[regno].replacement
-{
-  no_equiv (dest, set, NULL);
-  continue;
+reg_equiv[regno].replacement)))
+{
+  no_equiv (dest, set, NULL);
+  continue;
+}
+
+  list = reg_equiv[regno].init_insns;
+  for (; list; list = list->next ())
+{
+  rtx note_tmp;
+  rtx_insn *insn_tmp;
+
+  insn_tmp = list->insn ();
+  note_tmp = find_reg_note (insn_tmp, REG_EQUAL, NULL_RTX);
+  gcc_assert (note_tmp);
+  if (! rtx_equal_p (XEXP (note, 0), XEXP (note_tmp, 0)))
+{
+  equal_p = false;
+  break;
+}
+}
+
+  if (! equal_p)
+{
+  no_equiv (dest, set, NULL);
+  c

Re: [PATCH IRA] update_equiv_regs fails to set EQUIV reg-note for pseudo with more than one definition

2015-02-03 Thread Alex Velenko

On 03/02/15 08:29, Bin.Cheng wrote:

On Tue, Feb 3, 2015 at 3:24 PM, Jeff Law  wrote:

On 02/02/15 08:59, Alex Velenko wrote:


On 11/10/14 13:44, Felix Yang wrote:


Hello Jeff,

  I see that you have improved the RTL typesafety issue for ira.c,
so I rebased this patch
  on the latest trunk and change to use the new list walking
interface.
  Bootstrapped on x86_64-SUSE-Linux and make check regression tested.
  OK for trunk?


Hi Felix,
I believe your patch causes a regression for arm-none-eabi.
FAIL: gcc.target/arm/pr43920-2.c object-size text <= 54
FAIL: gcc.target/arm/pr43920-2.c scan-assembler-times pop 2

This happens because your patch stops reuse of code for
" return -1;" statements in pr43920-2.c.

As far as I investigated, your patch prevents adding "(expr_list (-1)
(nil)" in ira pass, which prevents jump2 optimization from happening.

So before, in ira pass I could see:
"(insn 9 53 34 8 (set (reg:SI 110 [ D.4934 ])
  (const_int -1 [0x]))
/work/fsf-trunk-ref-2/src/gcc/gcc/testsuite/gcc.target/arm/pr43920-2.c:20
613
{*thumb2_movsi_vfp}
   (expr_list:REG_EQUAL (const_int -1 [0x])
  (nil)))"
But with your patch I get
"(insn 9 53 34 8 (set (reg:SI 110 [ D.5322 ])
  (const_int -1 [0x]))
/work/fsf-trunk-2/src/gcc/gcc/testsuite/gcc.target/arm/pr43920-2.c:20
615 {*thumb2_movsi_vfp}
   (nil))"

This causes a code generation regression and needs to be fixed.
Kind regards,


We'd need to see the full dumps.  In particular is reg110 set anywhere else?
If so then the change is doing precisely what it should be doing and the
test needs to be updated to handle the different code we generate.


Hmm, if I understand correctly, it's a code size regression, so I
don't think it's appropriate to adapt the test case.  Either the patch
or something else in GCC is doing wrong, right?

Hi Alex, could you please file a PR with full dump information for tracking?

Thanks,
bin

Hi Bin,
Created bugzilla ticket, as requested:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64916
This test already existed in the testsuite, it is not new.
Kind regards,
Alex



Jeff






Re: Re: [PATCH][AArch64] Testcase fix for __ATOMIC_CONSUME

2015-02-09 Thread Alex Velenko

On 28/01/15 18:50, Mike Stump wrote:

On Jan 28, 2015, at 9:51 AM, Marcus Shawcroft  
wrote:

Going forward we can [ … ] xfail the test case pending a proper solution to
59448 ?



Mike do you prefer one of the other two approaches ?


I’d xfail the test case and mark with the fix consume PR.  If we don’t have an 
unambiguous, fix consume PR, I’d file that.  It should be listed as failing on 
aarch, and the fix for that PR should then make the aarch test case pass.  This 
way no one can run off with the PR and do anything else with it.



Hi, Mike!

Sorry for the delay.

The following patch makes atomic-op-consume.c XFAIL for both 
gcc.target/arm and gcc.target/aarch64 tests. XFAIL was chosen as 
prefered approach for handling __ATOMIC_CONSUME to __ATOMIC_ACQUIRE 
promotion workaround.


This patch was tested by running the modified tests on aarch64-none-elf
and arm-none-eabi compilers.

Is this patch ok?

Alex

2015-02-09  Alex Velenko  

gcc/testsuite/

* gcc.target/aarch64/atomic-op-consume.c (scan-assember-times):
Directive adjusted to XFAIL.
* gcc.target/arm/atomic-op-consume.c (scan-assember-times): Directive
adjusted to XFAIL.

diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c 
b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c

index 38d6c2c..8150af6 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
@@ -3,5 +3,9 @@

 #include "atomic-op-consume.x"

-/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, 
\\\[x\[0-9\]+\\\]" 6 } } */
+/* To workaround Bugzilla 59448 issue, a request for __ATOMIC_CONSUME 
is always
+   promoted to __ATOMIC_ACQUIRE, implemented as MEMMODEL_ACQUIRE.  This 
causes

+   "LDAXR" to be generated instead of "LDXR".  Therefore, "LDXR" test is
+   expected to fail.  */
+/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, 
\\\[x\[0-9\]+\\\]" 6 { xfail *-*-* } } } */
 /* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, 
\\\[x\[0-9\]+\\\]" 6 } } */
diff --git a/gcc/testsuite/gcc.target/arm/atomic-op-consume.c 
b/gcc/testsuite/gcc.target/arm/atomic-op-consume.c

index cc6c028..060655c 100644
--- a/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
+++ b/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
@@ -7,7 +7,8 @@

 /* To workaround Bugzilla 59448 issue, a request for __ATOMIC_CONSUME 
is always
promoted to __ATOMIC_ACQUIRE, implemented as MEMMODEL_ACQUIRE. 
This causes

-   "LDAEX" to be generated instead of "LDREX".  */
-/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, 
\\\[r\[0-9\]+\\\]" 6 } } */

+   "LDAEX" to be generated instead of "LDREX".  Therefore, "LDREX" test is
+   expected to fail.  */
+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, 
\\\[r\[0-9\]+\\\]" 6 { xfail *-*-* } } } */
 /* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, 
\\\[r\[0-9\]+\\\]" 6 } } */

 /* { dg-final { scan-assembler-not "dmb" } } */

--1.8.1.2--





[PATCH][TREE] Match.pd "compare-and-not" patterns.

2015-02-09 Thread Alex Velenko

Hi,

This patch adds match.pd "compare-and-not" simplication patterns.
This is a generic transformation reducing variable accesses.

Done full regression run on arm-none-eabi, aarch64-none-elf,
aarch64_be-none-elf and bootstrapped on x86.

Is patch ok?

2015-02-09  Alex Velenko  

gcc/

* match.pd ((X & Y) == X): New pattern.
((X & Y) == Y): New pattern.

gcc/testsuite

* gcc.dg/match-and-not.c: New testcase.

diff --git a/gcc/match.pd b/gcc/match.pd
index 81c4ee6..b4fa6e9 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -377,6 +377,24 @@ along with GCC; see the file COPYING3.  If not see
&& TYPE_PRECISION (TREE_TYPE (@1)) == 1)
(le @0 @1)))
 
+/* If arg1 and arg2 are integers
+   (X & Y) == X  ->  (X & ~Y) == 0.
+   (X & Y) == Y  ->  (~X & Y) == 0.  */
+(simplify
+  (eq (bit_and @0 @1) @0)
+  (if ((INTEGRAL_TYPE_P (TREE_TYPE (@0)))
+   && !(CONSTANT_CLASS_P (@0))
+   && (INTEGRAL_TYPE_P (TREE_TYPE (@1)))
+   && !(CONSTANT_CLASS_P (@1)))
+  (eq (bit_and @0 (bit_not @1)) { build_zero_cst (TREE_TYPE (@0)); })))
+(simplify
+  (eq (bit_and @0 @1) @1)
+  (if ((INTEGRAL_TYPE_P (TREE_TYPE (@0)))
+   && !(CONSTANT_CLASS_P (@0))
+   && (INTEGRAL_TYPE_P (TREE_TYPE (@1)))
+   && !(CONSTANT_CLASS_P (@1)))
+  (eq (bit_and (bit_not @0) @1) { build_zero_cst (TREE_TYPE (@1)); })))
+
 /* ~~x -> x */
 (simplify
   (bit_not (bit_not @0))
diff --git a/gcc/testsuite/gcc.dg/match-and-not.c 
b/gcc/testsuite/gcc.dg/match-and-not.c
new file mode 100644
index 000..62d993e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/match-and-not.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -fdump-tree-original" }  */
+
+extern void abort (void);
+
+/* (x & y) == x -> (x & ~y) == 0.  */
+int __attribute__ ((noinline))
+f1 (int x, int y)
+{
+  return (x & y) == x;
+}
+
+int __attribute__ ((noinline))
+f2 (int x, int y)
+{
+  return (x & y) == y;
+}
+
+int
+main ()
+{
+  if (!f1 (21, 85))
+abort ();
+
+  if (!f2 (85, 84))
+abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\(~y & x\\) == 0" 1 "original" } } */
+/* { dg-final { scan-tree-dump-times "\\(~x & y\\) == 0" 1 "original" } } */
+/* { dg-final { cleanup-tree-dump "original" } } */

Re: [PATCH IRA] update_equiv_regs fails to set EQUIV reg-note for pseudo with more than one definition

2015-02-12 Thread Alex Velenko

On 09/02/15 23:32, Jeff Law wrote:

On 02/03/15 20:03, Bin.Cheng wrote:

I looked into the test and can confirm the previous compilation is correct.
The cover letter of this patch said IRA mis-handled REQ_EQUIV before,
but in this case it is REG_EQUAL that is lost.  The full dump (without
this patch) after IRA is like:

Right, but a REG_EQUIV is generated based on the incoming REG_EQUAL
notes in the insn stream.  Basically update_equiv_regs will scan insn
stream and some REG_EQUAL notes will be promoted to REG_EQUIV notes.


Hi Jeff,
Do I understand you correctly, that REG_EQUAL notes should not be
generated in IRA pass, because some of them may get promoted to
REG_EQUIV? Or is there any other reason register r110 should not get
REG_EQUAL note? Previously, register r110 was getting REG_EQUAL, not
REG_EQUIV note.
Kind regards,
Alex



The REG_EQUIV is a function-wide equivalence, meaning that one could
substitute the REG_EQUIV note for in any uses of the destination
register and still have a valid representation of the program.

REG_EQUAL's validity is limited to the point after the insn in which it
appears and before the next insn.



Before r216169 (with REG_EQUAL in insn9), jumps from basic block 6/7/8
-> 9 can be merged because r110 equals to -1 afterwards.  But with the
patch, the equal information of r110==-1 in basic block 8 is lost.  As
a result, jump from 8->9 can't be merged and two additional
instructions are generated.

  >

I suppose the REG_EQUAL note is correct in insn9?  According to
GCCint, it only means r110 set by insn9 will be equal to the value at
run time at the end of this insn but not necessarily elsewhere in the
function.

If you previously got a REG_EQUIV note on any of those insns it was
wrong and this is the precise kind of situation that the change was
trying to fix.

R110 can have the value -1 (BB6, BB7, BB8) or 0 (BB5).  Thus there is no
single value across the entire function that one can validly use for r110.

I think you could mark this as a missed optimization, but not a
regresssion since the desired output was relying on a bug in the compiler.

If I were to start looking at this, my first thought would be to look at
why we have multiple sets of r110, particularly if there are lifetimes
that are disjoint.




I also found another problem (or mis-leading?) with the document:
"Thus, compiler passes prior to register allocation need only check
for REG_EQUAL notes and passes subsequent to register allocation need
only check for REG_EQUIV notes".  This seems not true now as in this
example, passes after register allocation do take advantage of
REG_EQUAL in optimization and we can't achieve that by using
REG_EQUIV.

I think that's a long standing (and incorrect) generalization.  IIRC we
can get a REG_EQUIV note earlier for certain argument setup situations.
   And I think it's been the case for a long time that a pass after
reload could try to exploit REG_EQUAL notes.

jeff





Re: [PATCH] PR rtl-optimization/32219: optimizer causees wrong code in pic/hidden/weak symbol checking

2015-02-18 Thread Alex Velenko

On 13/02/15 05:11, Richard Henderson wrote:

On 02/12/2015 08:14 PM, H.J. Lu wrote:

I tried the second patch.  Results look good on Linux/x86-64.


Thanks.  My results concurr.  I went ahead and installed the patch as posted.


r~


2015-02-12  H.J. Lu  
 Richard Henderson  

 PR rtl/32219
 * cgraphunit.c (cgraph_node::finalize_function): Set definition
 before notice_global_symbol.
 (varpool_node::finalize_decl): Likewise.
 * varasm.c (default_binds_local_p_2): Rename from
 default_binds_local_p_1, add weak_dominate argument.  Use direct
 returns instead of assigning to local variable.  Unify varpool and
 cgraph paths via symtab_node.  Reject undef weak variables before
 testing visibility.  Reorder tests for simplicity.
 (default_binds_local_p): Use default_binds_local_p_2.
 (default_binds_local_p_1): Likewise.
 (decl_binds_to_current_def_p): Unify varpool and cgraph paths
 via symtab_node.
 (default_elf_asm_output_external): Emit visibility when specified.

2015-02-12  H.J. Lu  

 PR rtl/32219
 * gcc.dg/visibility-22.c: New test.
 * gcc.dg/visibility-23.c: New test.
 * gcc.target/i386/pr32219-1.c: New test.
 * gcc.target/i386/pr32219-2.c: New test.
 * gcc.target/i386/pr32219-3.c: New test.
 * gcc.target/i386/pr32219-4.c: New test.
 * gcc.target/i386/pr32219-5.c: New test.
 * gcc.target/i386/pr32219-6.c: New test.
 * gcc.target/i386/pr32219-7.c: New test.
 * gcc.target/i386/pr32219-8.c: New test.
 * gcc.target/i386/pr64317.c: Expect GOTOFF, not GOT.



Hi all,
By changing behaviour of varasm.c:default_binds_local_p, this patch 
changes behaviour of gcc/config/arm/arm.c:arm_function_in_section_p and 
through it breaks gcc/config/arm/arm.c:arm_is_long_call_p for weak symbols.


As a result, I get regression for gcc.target/arm/long-calls-1.c on
arm-none-eabi:
FAIL: gcc.target/arm/long-calls-1.c scan-assembler-not \tbl?\tweak_l1\n
FAIL: gcc.target/arm/long-calls-1.c scan-assembler-not \tbl?\tweak_l3\n

In https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html there
is a description for -mlong-calls.

This has to be fixed.

Kind regards,
Alex



Re: [PATCH][AArch64] Testcase fix for __ATOMIC_CONSUME

2015-02-18 Thread Alex Velenko

On 12/02/15 18:38, Mike Stump wrote:

On Feb 11, 2015, at 12:16 PM, Torvald Riegel  wrote:

On Mon, 2015-02-09 at 09:10 -0800, Mike Stump wrote:

On Feb 9, 2015, at 7:11 AM, Alex Velenko  wrote:

The following patch makes atomic-op-consume.c XFAIL

Is this patch ok?


Ok.

I’d shorten the comment above the xfail to be exceedingly short:

  /* PR59448 consume not implemented yet */

The reason is the brain can process this about 8x faster.  Also, one can cut 
and paste the PR part into a web browser directly, or, if you have an electric 
bugzilla mode for emacs, it will pop right up. */


Given the discussions we had in ISO C++ SG1, it seems the only way to
fix memory_order_consume is to deprecate it (or let it rot forever), and
add something else to the standard.  See
http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4321.pdf


Nice paper, thanks.


IOW, I believe the promotion is here to stay.  I'm not aware of any
other implementation doing something else.

Thus, XFAIL doesn't seem right to me.


Since Jakub in PR64930 updated to the now expected output instead of xfail, and 
given the paper above, easy to agree with this.  The changes to remove the 
xfail and expect the now expected codegen are pre-approved.




Hi Mike,
As pre-approved trivial change, on Monday I commited the following patch:

gcc/testsuite/

2015-02-16  Alex Velenko  

* gcc.target/aarch64/atomic-op-consume.c (scan-assember-times):
Directive adjusted to scan for ldaxr.
* gcc.target/arm/atomic-op-consume.c (scan-assember-times): Directive
adjusted to scan for ldaex.


diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c 
b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c

index 0e6dbbe..26ebbdf 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
@@ -3,6 +3,6 @@

 #include "atomic-op-consume.x"

-/* PR59448 consume not implemented yet.  */
-/* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, 
\\\[x\[0-9\]+\\\]" 6 { xfail *-*-* } } } */

+/* Scan for ldaxr is a PR59448 consume workaround.  */
+/* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, 
\\\[x\[0-9\]+\\\]" 6 } } */
 /* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, 
\\\[x\[0-9\]+\\\]" 6 } } */
diff --git a/gcc/testsuite/gcc.target/arm/atomic-op-consume.c 
b/gcc/testsuite/gcc.target/arm/atomic-op-consume.c

index fafe4d6..6c5f989 100644
--- a/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
+++ b/gcc/testsuite/gcc.target/arm/atomic-op-consume.c
@@ -5,7 +5,7 @@

 #include "../aarch64/atomic-op-consume.x"

-/* PR59448 consume not implemented yet.  */
-/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, 
\\\[r\[0-9\]+\\\]" 6 { xfail *-*-* } } } */

+/* Scan for ldaex is a PR59448 consume workaround.  */
+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, 
\\\[r\[0-9\]+\\\]" 6 } } */
 /* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, 
\\\[r\[0-9\]+\\\]" 6 } } */

 /* { dg-final { scan-assembler-not "dmb" } } */



Re: [PATCH] PR64959: SFINAE in UDLs

2015-02-18 Thread Alex Velenko



On 13/02/15 22:21, Andrea Azzarone wrote:

We can use the same trick used in the other tests. Patch attached.
Sorry about that!

2015-02-13 20:45 GMT+01:00 Jakub Jelinek :

On Wed, Feb 11, 2015 at 12:26:33AM +0100, Andrea Azzarone wrote:

 * gcc/testsuite/g++.dg/cpp1y/udlit-char-template-vs-std-literal-operator.C:


This fails on i686-linux:

FAIL: g++.dg/cpp1y/udlit-char-template-vs-std-literal-operator.C  -std=c++14 
(test for excess errors)
Excess errors: 
/home/jakub/src/gcc/gcc/testsuite/g++.dg/cpp1y/udlit-char-template-vs-std-literal-operator.C:10:51:
 error: 'int operator""_script(const char*, long unsigned int)' has invalid 
argument list

Perhaps you meant to #include  too and use
size_t instead of unsigned long?  Or just __SIZE_TYPE__ instead
of unsigned long?

 Jakub







Hi,
this patch also fixes issues for arm-none-eabi.
Could someone add this patch?
Kind regards,
Alex.



Re: [PATCH] PR64959: SFINAE in UDLs

2015-02-19 Thread Alex Velenko

On 18/02/15 18:30, Jakub Jelinek wrote:

On Wed, Feb 18, 2015 at 06:29:34PM +, Alex Velenko wrote:

this patch also fixes issues for arm-none-eabi.
Could someone add this patch?


ENOPATCH

Jakub


Hi Jakub,
I meant Andrea Azzarone's patch in the previous e-mail. It has not been 
added yet.

Kind regards,
Alex



Re: [PATCH] PR rtl-optimization/32219: optimizer causees wrong code in pic/hidden/weak symbol checking

2015-02-19 Thread Alex Velenko

On 19/02/15 14:16, Richard Henderson wrote:

On 02/18/2015 06:17 AM, Alex Velenko wrote:

By changing behaviour of varasm.c:default_binds_local_p, this patch changes
behaviour of gcc/config/arm/arm.c:arm_function_in_section_p and through it
breaks gcc/config/arm/arm.c:arm_is_long_call_p for weak symbols.

As a result, I get regression for gcc.target/arm/long-calls-1.c on
arm-none-eabi:
FAIL: gcc.target/arm/long-calls-1.c scan-assembler-not \tbl?\tweak_l1\n
FAIL: gcc.target/arm/long-calls-1.c scan-assembler-not \tbl?\tweak_l3\n

In https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html there
is a description for -mlong-calls.

This has to be fixed.


Please file a bug, for tracking purposes.

That said, it looks as if arm_function_in_section_p should be using
decl_binds_to_current_def_p instead of targetm.binds_local_p.

That will properly reject weak symbols within a given module until we receive
extra information from LTO indicating when a weak definition turns out to be
the prevailing definition.


r~



Hi Richard,
Thank you for your reply.
Here is the bug report.
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65121

Your suggestion seem to fix gcc.target/arm/long-calls-1.c, but has to be 
thoroughly tested.


Kind regards,
Alex



[RTL, Patch] Int div by constant compilation enhancement

2014-11-03 Thread Alex Velenko

Hi,
This patch adds a mid-end check to catch division by
constant case and optimize it to generate one shift,
instead of two.

A testacase to check the correct codegeneration for aarch64
is added. This check is not made generic, because the optimisation
implemented is not used by all targets.

Is it ok?

Thanks,
Alex

gcc/

2014-11-03  Alex Velenko  

* simplify-rtx.c (simplify_binary_operation_1): Div check added.
* rtl.h (SUBREG_P): New macro added.

gcc/testsuite/

2014-11-03  Alex Velenko  

* gcc.dg/asr-div1.c : New testcase.
>From 471fbb2057b4d338d01bb403f0973adbed33a31d Mon Sep 17 00:00:00 2001
From: Alex Velenko 
Date: Mon, 6 Oct 2014 15:33:36 +0100
Subject: [PATCH] [RTL, Patch] Int div by constant compilation enhancement

This patch adds a mid-end check to catch division by
constant case and optimize it to generate one shift,
instead of two.

A testacase to check the correct codegeneration for aarch64
is added. This check is not made generic, because the optimisation
implemented is not used by all targets.
Thanks,
Alex

gcc/

2014-11-03  Alex Velenko  

	* simplify-rtx.c (simplify_binary_operation_1): Div check added.
	* rtl.h (SUBREG_P): New macro added.

gcc/testsuite/

2014-11-03  Alex Velenko  

	* gcc.dg/asr-div1.c : New testcase.
---
 gcc/rtl.h   |  3 +++
 gcc/simplify-rtx.c  | 38 
 gcc/testsuite/gcc.dg/asr_div1.c | 56 +
 3 files changed, 97 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/asr_div1.c

diff --git a/gcc/rtl.h b/gcc/rtl.h
index ddd39c9..6222817 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -789,6 +789,9 @@ struct GTY(()) rtvec_def {
 /* Predicate yielding nonzero iff X is a data for a jump table.  */
 #define JUMP_TABLE_DATA_P(INSN) (GET_CODE (INSN) == JUMP_TABLE_DATA)
 
+/* Predicate yielding nonzero iff RTX is a subreg.  */
+#define SUBREG_P(RTX) (GET_CODE (RTX) == SUBREG)
+
 template <>
 template <>
 inline bool
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index d783c22..e01fba7 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -3102,6 +3102,44 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	  && UINTVAL (trueop0) == GET_MODE_MASK (mode)
 	  && ! side_effects_p (op1))
 	return op0;
+  /* Given:
+ scalar modes M1, M2
+ scalar constants c1, c2
+ size (M2) > size (M1)
+ c1 == size (M2) - size (M1)
+ optimize:
+ (ashiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2)
+  (const_int ))
+  )
+  (const_int ))
+ to:
+ (subreg:M1 (ashiftrt:M2 (reg:M2)
+ (const_int ))
+  ).  */
+  if (!VECTOR_MODE_P (mode)
+  && SUBREG_P (op0)
+  && CONST_INT_P (op1)
+  && (GET_CODE (SUBREG_REG (op0)) == LSHIFTRT)
+  && !VECTOR_MODE_P (GET_MODE (SUBREG_REG (op0)))
+  && CONST_INT_P (XEXP (SUBREG_REG (op0), 1))
+  && (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)))
+  > GET_MODE_BITSIZE (mode))
+  && (INTVAL (XEXP (SUBREG_REG (op0), 1))
+  == (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)))
+  - GET_MODE_BITSIZE (mode)))
+  && subreg_lowpart_p (op0))
+{
+  rtx tmp = GEN_INT (INTVAL (XEXP (SUBREG_REG (op0), 1))
+ + INTVAL (op1));
+  machine_mode inner_mode = GET_MODE (SUBREG_REG (op0));
+  tmp = simplify_gen_binary (ASHIFTRT,
+ GET_MODE (SUBREG_REG (op0)),
+ XEXP (SUBREG_REG (op0), 0),
+ tmp);
+  return simplify_gen_subreg (mode, tmp, inner_mode,
+  subreg_lowpart_offset (mode,
+ inner_mode));
+}
 canonicalize_shift:
   if (SHIFT_COUNT_TRUNCATED && CONST_INT_P (op1))
 	{
diff --git a/gcc/testsuite/gcc.dg/asr_div1.c b/gcc/testsuite/gcc.dg/asr_div1.c
new file mode 100644
index 000..61430ca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asr_div1.c
@@ -0,0 +1,56 @@
+/* Test division by const int generates only one shift.  */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-rtl-combine-all" } */
+
+extern void abort (void);
+
+#define NOINLINE __attribute__((noinline))
+
+static NOINLINE int
+f1 (int n)
+{
+  return n / 33;
+}
+
+static NOINLINE int
+f2 (int n)
+{
+  return n / 77;
+}
+
+int
+main ()
+{
+  int a = 0x;
+  int b = 0x;
+  int c;
+  c = f1 (a);
+  if (c != 0xfd6a052c)
+abort ();
+  c = f1 (b);
+  if (c != 0x295FAD4)
+abort ();
+  c = f2 (a);
+  if (c != 0xfee44b5c)
+abort ();
+  c = f2 (b);
+  if (c != 0x11bb4a4)
+ 

[PATCH] [AArch64, RTL] Bics instruction generation for aarch64

2014-11-11 Thread Alex Velenko

From 98bb6d7323ce79e28be8ef892b919391ed857e1f Mon Sep 17 00:00:00 2001
From: Alex Velenko 
Date: Fri, 31 Oct 2014 18:43:32 +
Subject: [PATCH] [AArch64, RTL] Bics instruction generation for aarch64

Hi,

This patch adds rtl patterns for aarch64 to generate bics instructions in
cases when caputed value gets discarded and only only the status regester
change of the instruction gets reused.

Previously, bics would only be generated, if the value computed by bics
would later be reused, which is not necessarily the case when computing
this value for "if" statements.

Is this patch ok?

Thanks,
Alex

gcc/

2014-11-10  Alex Velenko  

* gcc/config/aarch64/aarch64.md 
(and_one_cmpl3_compare0_no_reuse):

  New define_insn.
* (and_one_cmpl_3_compare0_no_reuse):
  Likewise.

gcc/testsuite/

2014-11-10  Alex Velenko  

* gcc.target/aarch64/bics1.c : New testcase.
---
 gcc/config/aarch64/aarch64.md | 26 
 gcc/testsuite/gcc.target/aarch64/bics_3.c | 69 
+++

 2 files changed, 95 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/bics_3.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 341c26f..6158d82 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2845,6 +2845,18 @@
   [(set_attr "type" "logics_reg")]
 )

+(define_insn "*and_one_cmpl3_compare0_no_reuse"
+  [(set (reg:CC_NZ CC_REGNUM)
+(compare:CC_NZ
+ (and:GPI (not:GPI
+   (match_operand:GPI 0 "register_operand" "r"))
+  (match_operand:GPI 1 "register_operand" "r"))
+ (const_int 0)))]
+  ""
+  "bics\\tzr, %1, %0"
+  [(set_attr "type" "logics_reg")]
+)
+
 (define_insn "*_one_cmpl_3"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 (LOGICAL:GPI (not:GPI
@@ -2894,6 +2906,20 @@
   [(set_attr "type" "logics_shift_imm")]
 )

+(define_insn "*and_one_cmpl_3_compare0_no_reuse"
+  [(set (reg:CC_NZ CC_REGNUM)
+(compare:CC_NZ
+ (and:GPI (not:GPI
+   (SHIFT:GPI
+(match_operand:GPI 0 "register_operand" "r")
+(match_operand:QI 1 "aarch64_shift_imm_" "n")))
+  (match_operand:GPI 2 "register_operand" "r"))
+ (const_int 0)))]
+  ""
+  "bics\\tzr, %2, %0,  %1"
+  [(set_attr "type" "logics_shift_imm")]
+)
+
 (define_insn "clz2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 (clz:GPI (match_operand:GPI 1 "register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/bics_3.c 
b/gcc/testsuite/gcc.target/aarch64/bics_3.c

new file mode 100644
index 000..ecb53e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bics_3.c
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps" } */
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+bics_si_test (int a, int b)
+{
+  if (a & ~b)
+return 1;
+  else
+return 0;
+}
+
+int __attribute__ ((noinline))
+bics_si_test2 (int a, int b)
+{
+  if (a & ~ (b << 2))
+return 1;
+  else
+return 0;
+}
+
+typedef long long s64;
+
+int __attribute__ ((noinline))
+bics_di_test (s64 a, s64 b)
+{
+  if (a & ~b)
+return 1;
+  else
+return 0;
+}
+
+int __attribute__ ((noinline))
+bics_di_test2 (s64 a, s64 b)
+{
+  if (a & ~(b << 2))
+return 1;
+  else
+return 0;
+}
+
+int
+main (void)
+{
+  int a = 5;
+  int b = 5;
+  int c = 20;
+  s64 d = 5;
+  s64 e = 5;
+  s64 f = 20;
+  if (bics_si_test (a, b))
+abort ();
+  if (bics_si_test2 (c, b))
+abort ();
+  if (bics_di_test (d, e))
+abort ();
+  if (bics_di_test2 (f, e))
+abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "bics\twzr, w\[0-9\]+, w\[0-9\]+" 
2 } } */
+/* { dg-final { scan-assembler-times "bics\twzr, w\[0-9\]+, w\[0-9\]+, 
lsl 2" 1 } } */
+/* { dg-final { scan-assembler-times "bics\txzr, x\[0-9\]+, x\[0-9\]+" 
2 } } */
+/* { dg-final { scan-assembler-times "bics\txzr, x\[0-9\]+, x\[0-9\]+, 
lsl 2" 1 } } */

+
+/* { dg-final { cleanup-saved-temps } } */
--
1.8.1.2





Re: ira.c update_equiv_regs patch causes gcc/testsuite/gcc.target/arm/pr43920-2.c regression

2015-08-18 Thread Alex Velenko



On 31/07/15 12:04, Alex Velenko wrote:

On 29/07/15 23:14, Jeff Law wrote:

On 07/28/2015 12:18 PM, Alex Velenko wrote:

On 21/04/15 06:27, Jeff Law wrote:

On 04/20/2015 01:09 AM, Shiva Chen wrote:

Hi, Jeff

Thanks for your advice.

can_replace_by.patch is the new patch to handle both cases.

pr43920-2.c.244r.jump2.ori is the original  jump2 rtl dump

pr43920-2.c.244r.jump2.patch_can_replace_by is the jump2 rtl dump
after patch  can_replace_by.patch

Could you help me to review the patch?

Thanks.  This looks pretty good.

I expanded the comment for the new function a bit and renamed the
function in an effort to clarify its purpose.  From reviewing
can_replace_by, it seems it should have been handling this case, but
clearly wasn't due to implementation details.

I then bootstrapped and regression tested the patch on 
x86_64-linux-gnu

where it passed.  I also instrumented that compiler to see how often
this code triggers.  During a bootstrap it triggers a couple hundred
times (which is obviously a proxy for cross jumping improvements).  So
it's triggering regularly on x86_64, which is good.

I also verified that this fixes BZ64916 for an arm-non-eabi toolchain
configured with --with-arch=armv7.

Installed on the trunk.  No new testcase as it's covered by existing
tests.

Thanks,,
jeff



Hi,
I see this patch been committed in r56 on trunk. Is it okay to port
this to fsf-5?

It's not a regression, so backporting it would be generally frowned
upon.  If you feel strongly about it, you should ask Jakub, Joseph or
Richi (the release managers) for an exception to the general policy.

jeff


Hi Jakub,
Can this commit be ported to fsf-5? It fixed gcc.target/arm/pr43920-2.c
at the time, so I think it is a good idea to port. Please, see
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64916
Kind regards,
Alex


Ping!

Currently this test is passed on fsf-trunk, but not passed on fsf-5, so 
I think it is a regression on fsf-5:


arm-none-eabi fsf-5:
PASS: gcc.target/arm/pr43920-2.c (test for excess errors)
FAIL: gcc.target/arm/pr43920-2.c scan-assembler-times pop 2
PASS: gcc.target/arm/pr43920-2.c scan-assembler-times beq 3
Executing on host: arm-none-eabi-size pr43920-2.o   (timeout = 300)
spawn arm-none-eabi-size pr43920-2.o
   text   databssdechexfilename
 58  0  0 58 3apr43920-2.o
text size is 58
FAIL: gcc.target/arm/pr43920-2.c object-size text <= 54

arm-none-eabi fsf-trunk:

PASS: gcc.target/arm/pr43920-2.c (test for excess errors)
PASS: gcc.target/arm/pr43920-2.c scan-assembler-times pop 2
PASS: gcc.target/arm/pr43920-2.c scan-assembler-times beq 3
size is arm-none-eabi-size
Executing on host: arm-none-eabi-size pr43920-2.o   (timeout = 300)
spawn arm-none-eabi-size pr43920-2.o
   text   databssdechexfilename
 54  0  0 54 36pr43920-2.o
text size is 54
PASS: gcc.target/arm/pr43920-2.c object-size text <= 54

Can this, please, be ported?
Kind regards,
Alex



Re: ira.c update_equiv_regs patch causes gcc/testsuite/gcc.target/arm/pr43920-2.c regression

2015-08-18 Thread Alex Velenko



On 18/08/15 10:45, Marcus Shawcroft wrote:

On 18 August 2015 at 10:25, Alex Velenko  wrote:



On 31/07/15 12:04, Alex Velenko wrote:


On 29/07/15 23:14, Jeff Law wrote:


On 07/28/2015 12:18 PM, Alex Velenko wrote:


On 21/04/15 06:27, Jeff Law wrote:


On 04/20/2015 01:09 AM, Shiva Chen wrote:


Hi, Jeff

Thanks for your advice.

can_replace_by.patch is the new patch to handle both cases.

pr43920-2.c.244r.jump2.ori is the original  jump2 rtl dump

pr43920-2.c.244r.jump2.patch_can_replace_by is the jump2 rtl dump
after patch  can_replace_by.patch

Could you help me to review the patch?


Thanks.  This looks pretty good.

I expanded the comment for the new function a bit and renamed the
function in an effort to clarify its purpose.  From reviewing
can_replace_by, it seems it should have been handling this case, but
clearly wasn't due to implementation details.

I then bootstrapped and regression tested the patch on x86_64-linux-gnu
where it passed.  I also instrumented that compiler to see how often
this code triggers.  During a bootstrap it triggers a couple hundred
times (which is obviously a proxy for cross jumping improvements).  So
it's triggering regularly on x86_64, which is good.

I also verified that this fixes BZ64916 for an arm-non-eabi toolchain
configured with --with-arch=armv7.

Installed on the trunk.  No new testcase as it's covered by existing
tests.

Thanks,,
jeff



Hi,
I see this patch been committed in r56 on trunk. Is it okay to port
this to fsf-5?


It's not a regression, so backporting it would be generally frowned
upon.  If you feel strongly about it, you should ask Jakub, Joseph or
Richi (the release managers) for an exception to the general policy.

jeff


Hi Jakub,
Can this commit be ported to fsf-5? It fixed gcc.target/arm/pr43920-2.c
at the time, so I think it is a good idea to port. Please, see
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64916
Kind regards,
Alex



Ping!

Currently this test is passed on fsf-trunk, but not passed on fsf-5, so I
think it is a regression on fsf-5:


That does not make it a regression, it is only a regression if a
version prior to 5 passes, how does this test behave on 4.9?

Cheers
/Marcus


Hi Marcus,

On fsf-4.9 I see the test pass:

PASS: gcc.target/arm/pr43920-2.c (test for excess errors)
PASS: gcc.target/arm/pr43920-2.c scan-assembler-times pop 2
PASS: gcc.target/arm/pr43920-2.c scan-assembler-times beq 3
Executing on host: arm-none-eabi-size pr43920-2.o   (timeout = 300)
spawn arm-none-eabi-size pr43920-2.o
   textdata bss dec hex filename
 54   0   0  54  36 pr43920-2.o
text size is 54
PASS: gcc.target/arm/pr43920-2.c object-size text <= 54

So this is a regression in fsf-5.

Kind regards,
Alex



Re: [PATCH] [PATCH][ARM] Fix pr63210.c testcase.

2015-09-11 Thread Alex Velenko

Hi,
Committed fsf-trunk r227677, fsf-5 r227678.
Kind regards,
Alex



Re: [PATCH] [AArch64, RTL] Bics instruction generation for aarch64

2014-12-08 Thread Alex Velenko



On 08/12/14 10:33, Richard Earnshaw wrote:

On 11/11/14 10:38, Alex Velenko wrote:

  From 98bb6d7323ce79e28be8ef892b919391ed857e1f Mon Sep 17 00:00:00 2001
From: Alex Velenko 
Date: Fri, 31 Oct 2014 18:43:32 +
Subject: [PATCH] [AArch64, RTL] Bics instruction generation for aarch64

Hi,

This patch adds rtl patterns for aarch64 to generate bics instructions in
cases when caputed value gets discarded and only only the status regester
change of the instruction gets reused.

Previously, bics would only be generated, if the value computed by bics
would later be reused, which is not necessarily the case when computing
this value for "if" statements.

Is this patch ok?

Thanks,
Alex

gcc/

2014-11-10  Alex Velenko  

  * gcc/config/aarch64/aarch64.md
(and_one_cmpl3_compare0_no_reuse):
New define_insn.
  * (and_one_cmpl_3_compare0_no_reuse):
Likewise.

gcc/testsuite/

2014-11-10  Alex Velenko  

  * gcc.target/aarch64/bics1.c : New testcase.


OK.

R.


Committed,

Alex


---
   gcc/config/aarch64/aarch64.md | 26 
   gcc/testsuite/gcc.target/aarch64/bics_3.c | 69
+++
   2 files changed, 95 insertions(+)
   create mode 100644 gcc/testsuite/gcc.target/aarch64/bics_3.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 341c26f..6158d82 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2845,6 +2845,18 @@
 [(set_attr "type" "logics_reg")]
   )

+(define_insn "*and_one_cmpl3_compare0_no_reuse"
+  [(set (reg:CC_NZ CC_REGNUM)
+(compare:CC_NZ
+ (and:GPI (not:GPI
+   (match_operand:GPI 0 "register_operand" "r"))
+  (match_operand:GPI 1 "register_operand" "r"))
+ (const_int 0)))]
+  ""
+  "bics\\tzr, %1, %0"
+  [(set_attr "type" "logics_reg")]
+)
+
   (define_insn "*_one_cmpl_3"
 [(set (match_operand:GPI 0 "register_operand" "=r")
   (LOGICAL:GPI (not:GPI
@@ -2894,6 +2906,20 @@
 [(set_attr "type" "logics_shift_imm")]
   )

+(define_insn "*and_one_cmpl_3_compare0_no_reuse"
+  [(set (reg:CC_NZ CC_REGNUM)
+(compare:CC_NZ
+ (and:GPI (not:GPI
+   (SHIFT:GPI
+(match_operand:GPI 0 "register_operand" "r")
+(match_operand:QI 1 "aarch64_shift_imm_" "n")))
+  (match_operand:GPI 2 "register_operand" "r"))
+ (const_int 0)))]
+  ""
+  "bics\\tzr, %2, %0,  %1"
+  [(set_attr "type" "logics_shift_imm")]
+)
+
   (define_insn "clz2"
 [(set (match_operand:GPI 0 "register_operand" "=r")
   (clz:GPI (match_operand:GPI 1 "register_operand" "r")))]
diff --git a/gcc/testsuite/gcc.target/aarch64/bics_3.c
b/gcc/testsuite/gcc.target/aarch64/bics_3.c
new file mode 100644
index 000..ecb53e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bics_3.c
@@ -0,0 +1,69 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps" } */
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+bics_si_test (int a, int b)
+{
+  if (a & ~b)
+return 1;
+  else
+return 0;
+}
+
+int __attribute__ ((noinline))
+bics_si_test2 (int a, int b)
+{
+  if (a & ~ (b << 2))
+return 1;
+  else
+return 0;
+}
+
+typedef long long s64;
+
+int __attribute__ ((noinline))
+bics_di_test (s64 a, s64 b)
+{
+  if (a & ~b)
+return 1;
+  else
+return 0;
+}
+
+int __attribute__ ((noinline))
+bics_di_test2 (s64 a, s64 b)
+{
+  if (a & ~(b << 2))
+return 1;
+  else
+return 0;
+}
+
+int
+main (void)
+{
+  int a = 5;
+  int b = 5;
+  int c = 20;
+  s64 d = 5;
+  s64 e = 5;
+  s64 f = 20;
+  if (bics_si_test (a, b))
+abort ();
+  if (bics_si_test2 (c, b))
+abort ();
+  if (bics_di_test (d, e))
+abort ();
+  if (bics_di_test2 (f, e))
+abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "bics\twzr, w\[0-9\]+, w\[0-9\]+"
2 } } */
+/* { dg-final { scan-assembler-times "bics\twzr, w\[0-9\]+, w\[0-9\]+,
lsl 2" 1 } } */
+/* { dg-final { scan-assembler-times "bics\txzr, x\[0-9\]+, x\[0-9\]+"
2 } } */
+/* { dg-final { scan-assembler-times "bics\txzr, x\[0-9\]+, x\[0-9\]+,
lsl 2" 1 } } */
+
+/* { dg-final { cleanup-saved-temps } } */







[PATCH AArch64_BE 1/4] Big-Endian lane numbering fix

2014-01-16 Thread Alex Velenko

Hi,
This patch is the first patch in a series of patches fixing Big-Endian
lane numbering. The goal of this series of patches is to make proper
bridge between pure GCC big-endian view on lane numbering and internal
architected view.

Approach taken is to catch lane indexing when internal vector lane
indexes are passed to GCC lane indexing world view.

This will have a short-term impact on big-endian NEON intrinsics and
introduces a number of regressions. But this is the correct thing to do
to ensure that auto-vectorized and GCC vector extension code works
correctly.

This particular patch fixes vld1_ and vst1_ to generate st1 
and ld1 instructions, correcting their BE behaviour.


Regression tested on aarch64-none-elf and aarch64_be-none-elf with 
recent vec-perm with no unexpected issues.


Is it okay for trunk?

Regards,
Alex Velenko



gcc/
2014-01-16  Alex Velenko  

* config/aarch64/aarch64-simd.md (aarch64_be_ld1):
New define_insn.
(aarch64_be_st1): Likewise.
(aarch_ld1): Define_expand modified.
(aarch_st1): Likewise.
* config/aarch64/aarch64.md (UNSPEC_LD1): New unspec definition.
(UNSPEC_ST1): Likewise

gcc/testsuite
2014-01-16  Alex Velenko  

* /gcc.target/aarch64/vld1-vst1_1.c: New test_case.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 158b3dca6da12322de0af80d35f593039d716de6..2f2e74f6bccd54accd265a55cc8dbcfe2db2e76f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3544,6 +3544,24 @@
(set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
 )
 
+(define_insn "aarch64_be_ld1"
+  [(set (match_operand:VALLDI 0	"register_operand" "=w")
+	(unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")]
+	UNSPEC_LD1))]
+  "TARGET_SIMD"
+  "ld1\\t{%0}, %1"
+  [(set_attr "type" "neon_load1_1reg")]
+)
+
+(define_insn "aarch64_be_st1"
+  [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv")
+	(unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")]
+	UNSPEC_ST1))]
+  "TARGET_SIMD"
+  "st1\\t{%1}, %0"
+  [(set_attr "type" "neon_store1_1reg")]
+)
+
 (define_split
   [(set (match_operand:OI 0 "register_operand" "")
 	(match_operand:OI 1 "register_operand" ""))]
@@ -3762,7 +3780,11 @@
 {
   enum machine_mode mode = mode;
   rtx mem = gen_rtx_MEM (mode, operands[1]);
-  emit_move_insn (operands[0], mem);
+
+  if (BYTES_BIG_ENDIAN)
+emit_insn (gen_aarch64_be_ld1 (operands[0], mem));
+  else
+emit_move_insn (operands[0], mem);
   DONE;
 })
 
@@ -3988,7 +4010,11 @@
 {
   enum machine_mode mode = mode;
   rtx mem = gen_rtx_MEM (mode, operands[0]);
-  emit_move_insn (mem, operands[1]);
+
+  if (BYTES_BIG_ENDIAN)
+emit_insn (gen_aarch64_be_st1 (mem, operands[1]));
+  else
+emit_move_insn (mem, operands[1]);
   DONE;
 })
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c83622d6cad59883923f6eb0454c735c24a1eb3f..d5186f6211ec795672fc2631d7bbb1247a2d2773 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -81,6 +81,7 @@
 UNSPEC_GOTSMALLPIC
 UNSPEC_GOTSMALLTLS
 UNSPEC_GOTTINYPIC
+UNSPEC_LD1
 UNSPEC_LD2
 UNSPEC_LD3
 UNSPEC_LD4
@@ -92,6 +93,7 @@
 UNSPEC_SISD_SSHL
 UNSPEC_SISD_USHL
 UNSPEC_SSHL_2S
+UNSPEC_ST1
 UNSPEC_ST2
 UNSPEC_ST3
 UNSPEC_ST4
diff --git a/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c b/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
new file mode 100644
index ..d1834a264708fe6ab901ac1a27544ca8ebb815cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
@@ -0,0 +1,52 @@
+/* Test vld1 and vst1 maintain consistent indexing.  */
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+#include 
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+test_vld1_vst1 ()
+{
+  int8x8_t a;
+  int8x8_t b;
+  int i = 0;
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[8];
+  a = vld1_s8 (c);
+  asm volatile ("":::"memory");
+  vst1_s8 (d, a);
+  asm volatile ("":::"memory");
+  for (; i < 8; i++)
+if (c[i] != d[i])
+  return 1;
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vld1q_vst1q ()
+{
+  int16x8_t a;
+  int16x8_t b;
+  int i = 0;
+  int16_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int16_t d[8];
+  a = vld1q_s16 (c);
+  asm volatile ("":::"memory");
+  vst1q_s16 (d, a);
+  asm volatile ("":::"memory");
+  for (; i < 8; i++)
+if (c[i] != d[i])
+  return 1;
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vld1_vst1 ())
+abort ();
+  if (test_vld1q_vst1q ())
+abort ();
+  return 0;
+}


[PATCH AArch64_BE 2/4] Big-Endian lane numbering fix

2014-01-16 Thread Alex Velenko

Hi,
This patch changes get_lane intrinsics to provide a correct big-endian 
indexing. This fixes numerous BE load and store issues based on getting 
correct lane.


Is this good for trunk?

gcc/
2013-01-14  Alex Velenko  

* config/aarch64/aarch64-simd.md
(aarch64_be_checked_get_lane): New define_expand.
* config/aarch64/aarch64-simd-builtins.def
(BUILTIN_VALL (GETLANE, be_checked_get_lane, 0):
New builtin definition.
* config/aarch64/arm_neon.h: (__aarch64_vget_lane_any):
Uses new safe be builtin.
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 1dc3c1fe33fdb8148d2ff9c7198e4d85d5dac5d7..d255759713068e64c007cf8a90f57b0fcc0fc288 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -49,6 +49,7 @@
 
   BUILTIN_VALL (GETLANE, get_lane, 0)
   VAR1 (GETLANE, get_lane, 0, di)
+  BUILTIN_VALL (GETLANE, be_checked_get_lane, 0)
 
   BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
   BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 2f2e74f6bccd54accd265a55cc8dbcfe2db2e76f..2a7b5b12233a55dcbb61632f64ebee8a7f24ac02 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2062,6 +2062,20 @@
   [(set_attr "type" "neon_to_gp")]
 )
 
+(define_expand "aarch64_be_checked_get_lane"
+  [(match_operand: 0 "aarch64_simd_nonimmediate_operand")
+   (match_operand:VALL 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+  {
+operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2])));
+emit_insn (gen_aarch64_get_lane (operands[0],
+	   operands[1],
+	   operands[2]));
+DONE;
+  }
+)
+
 ;; Lane extraction of a value, neither sign nor zero extension
 ;; is guaranteed so upper bits should be considered undefined.
 (define_insn "aarch64_get_lane"
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 03549bd7a27cccb14ed8cdce91cbd4e4278c273f..33816d4381c8cf271fc4a85db6cc668f6c031dd8 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -453,7 +453,7 @@ typedef struct poly16x8x4_t
 
 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
   (__cast_ret\
- __builtin_aarch64_get_lane##__size (__cast_a __a, __b))
+ __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b))
 
 #define __aarch64_vget_lane_f32(__a, __b) \
   __aarch64_vget_lane_any (v2sf, , , __a, __b)


[PATCH AArch64_BE 3/4] Big-Endian lane numbering fix

2014-01-16 Thread Alex Velenko

Hi,

This patch by James Greenhalgh fixes "by-lane" patterns broken by
previous patches.

Regression tested on aarch64-none-elf and aarch64_be-none-elf
with no unexpected issues.

OK?

Thanks,
Alex

---
gcc/

2014-01-16  James Greenhalgh  

* config/aarch64/aarch64-simd.md
(aarch64_dup_lane): Correct lane number on big-endian.
(aarch64_dup_lane_): Likewise.
(*aarch64_mul3_elt): Likewise.
(*aarch64_mul3_elt): Likewise.
(*aarch64_mul3_elt_to_64v2df): Likewise.
(*aarch64_mla_elt): Likewise.
(*aarch64_mla_elt_): Likewise.
(*aarch64_mls_elt): Likewise.
(*aarch64_mls_elt_): Likewise.
(*aarch64_fma4_elt): Likewise.
(*aarch64_fma4_elt_): Likewise.
(*aarch64_fma4_elt_to_64v2df): Likewise.
(*aarch64_fnma4_elt): Likewise.
(*aarch64_fnma4_elt_): Likewise.
(*aarch64_fnma4_elt_to_64v2df): Likewise.
(aarch64_sqdmulh_lane): Likewise.
(aarch64_sqdmulh_laneq): Likewise.
(aarch64_sqdmll_lane_internal): Likewise.
(aarch64_sqdmll_lane_internal): Likewise.
(aarch64_sqdmll2_lane_internal): Likewise.
(aarch64_sqdmull_lane_internal): Likewise.
(aarch64_sqdmull2_lane_internal): Likewise.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e819e6ff54b43f0b24ef176ffd883fd30b774e77..00e85f88feec8c2456c8947fc9925cc583d2ad46 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -67,7 +67,10 @@
 	(parallel [(match_operand:SI 2 "immediate_operand" "i")])
   )))]
   "TARGET_SIMD"
-  "dup\\t%0., %1.[%2]"
+  {
+operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2])));
+return "dup\\t%0., %1.[%2]";
+  }
   [(set_attr "type" "neon_dup")]
 )
 
@@ -79,7 +82,11 @@
 	(parallel [(match_operand:SI 2 "immediate_operand" "i")])
   )))]
   "TARGET_SIMD"
-  "dup\\t%0., %1.[%2]"
+  {
+operands[2] = GEN_INT (ENDIAN_LANE_N (mode,
+	  INTVAL (operands[2])));
+return "dup\\t%0., %1.[%2]";
+  }
   [(set_attr "type" "neon_dup")]
 )
 
@@ -288,7 +295,10 @@
 	(parallel [(match_operand:SI 2 "immediate_operand")])))
   (match_operand:VMUL 3 "register_operand" "w")))]
   "TARGET_SIMD"
-  "mul\\t%0., %3., %1.[%2]"
+  {
+operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2])));
+return "mul\\t%0., %3., %1.[%2]";
+  }
   [(set_attr "type" "neon_mul__scalar")]
 )
 
@@ -301,7 +311,11 @@
 	(parallel [(match_operand:SI 2 "immediate_operand")])))
   (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
   "TARGET_SIMD"
-  "mul\\t%0., %3., %1.[%2]"
+  {
+operands[2] = GEN_INT (ENDIAN_LANE_N (mode,
+	  INTVAL (operands[2])));
+return "mul\\t%0., %3., %1.[%2]";
+  }
   [(set_attr "type" "neon_mul__scalar")]
 )
 
@@ -324,7 +338,10 @@
 	 (parallel [(match_operand:SI 2 "immediate_operand")]))
(match_operand:DF 3 "register_operand" "w")))]
   "TARGET_SIMD"
-  "fmul\\t%0.2d, %3.2d, %1.d[%2]"
+  {
+operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
+return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
+  }
   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 )
 
@@ -783,7 +800,10 @@
 	   (match_operand:VDQHS 3 "register_operand" "w"))
 	 (match_operand:VDQHS 4 "register_operand" "0")))]
  "TARGET_SIMD"
- "mla\t%0., %3., %1.[%2]"
+  {
+operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2])));
+return "mla\t%0., %3., %1.[%2]";
+  }
   [(set_attr "type" "neon_mla__scalar")]
 )
 
@@ -798,7 +818,11 @@
 	   (match_operand:VDQHS 3 "register_operand" "w"))
 	 (match_operand:VDQHS 4 "register_operand" "0")))]
  "TARGET_SIMD"
- "mla\t%0., %3., %1.[%2]"
+  {
+operands[2] = GEN_INT (ENDIAN_LANE_N (mode,
+	  INTVAL (operands[2])));
+return "mla\t%0., %3., %1.[%2]";
+  }
   [(set_attr "type" "neon_mla__scalar")]
 )
 
@@ -823,7 +847,10 @@
 		  (parallel [(match_operand:SI 2 "immediate_operand")])))
 	   (match_operand:VDQHS 3 "register_operand" "w"]
  "TARGET_SIMD"
- "mls\t%0., %3., %1.[%2]"
+  {
+operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2])));
+return "mls\t%0., %3., %1.[%2]";
+  }
   [(set_attr "type" "neon_mla__scalar")]
 )
 
@@ -838,7 +865,11 @@
 		  (parallel [(match_operand:SI 2 "immediate_operand")])))
 	   (match_operand:VDQHS 3 "register_operand" "w"]
  "TARGET_SIMD"
- "mls\t%0., %3., %1.[%2]"
+  {
+operands[2] = GEN_INT (ENDIAN_LANE_N (mode,
+	  INTVAL (operands[2])));
+return "mls\t%0., %3., %1.[%2]";
+  }
   [(set_attr "type" "neon_mla__scalar")]
 )
 
@@ -1237,7 +1268,10 @@
   (match_operand:VDQF 3 "register_operand" "w")
   (match_operand:VDQF 4 "register_operand" "0")))]
   "TARGET_SIMD"
-  "fmla\\t%0., %3., %1.[%2]"
+  {
+operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2])));
+return "fmla\\t%0., %3., %1.[%2]";
+  }
   [(set_attr "type" "neon_fp_mla__scalar")]
 )
 
@@ -1251,7 +1285,11 @@
  

[PATCH AArch64_BE 4/4] Big-Endian lane numbering fix

2014-01-16 Thread Alex Velenko

Hi,
In previous BE patches the way lane indexing in lanes is calculated has
been changed. To accommodate the change, arm neon intrinsics had to be
updated.

Is it okay?

/gcc/

2014-01-16  James Greenhalgh  
Alex Velenko  

* config/aarch64/arm_neon.h (vaddv_s8): __LANE0 cleanup.
(vaddv_s16): Likewise.
(vaddv_s32): Likewise.
(vaddv_u8): Likewise.
(vaddv_u16): Likewise.
(vaddv_u32): Likewise.
(vaddvq_s8): Likewise.
(vaddvq_s16): Likewise.
(vaddvq_s32): Likewise.
(vaddvq_s64): Likewise.
(vaddvq_u8): Likewise.
(vaddvq_u16): Likewise.
(vaddvq_u32): Likewise.
(vaddvq_u64): Likewise.
(vaddv_f32): Likewise.
(vaddvq_f32): Likewise.
(vaddvq_f64): Likewise.
(vmaxv_f32): Likewise.
(vmaxv_s8): Likewise.
(vmaxv_s16): Likewise.
(vmaxv_s32): Likewise.
(vmaxv_u8): Likewise.
(vmaxv_u16): Likewise.
(vmaxv_u32): Likewise.
(vmaxvq_f32): Likewise.
(vmaxvq_f64): Likewise.
(vmaxvq_s8): Likewise.
(vmaxvq_s16): Likewise.
(vmaxvq_s32): Likewise.
(vmaxvq_u8): Likewise.
(vmaxvq_u16): Likewise.
(vmaxvq_u32): Likewise.
(vmaxnmv_f32): Likewise.
(vmaxnmvq_f32): Likewise.
(vmaxnmvq_f64): Likewise.
(vminv_f32): Likewise.
(vminv_s8): Likewise.
(vminv_s16): Likewise.
(vminv_s32): Likewise.
(vminv_u8): Likewise.
(vminv_u16): Likewise.
(vminv_u32): Likewise.
(vminvq_f32): Likewise.
(vminvq_f64): Likewise.
(vminvq_s8): Likewise.
(vminvq_s16): Likewise.
(vminvq_s32): Likewise.
(vminvq_u8): Likewise.
(vminvq_u16): Likewise.
(vminvq_u32): Likewise.
(vminnmv_f32): Likewise.
(vminnmvq_f32): Likewise.
(vminnmvq_f64): Likewise.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 33816d4381c8cf271fc4a85db6cc668f6c031dd8..568ade61653d213da5c1826c970ee350e1fdee97 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -15307,30 +15307,24 @@ vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
   return __a + __b;
 }
 
-#if __AARCH64EB__
-#define __LANE0(__t) ((__t) - 1)
-#else
-#define __LANE0(__t) 0
-#endif
-
 /* vaddv */
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vaddv_s8 (int8x8_t __a)
 {
-  return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), __LANE0 (8));
+  return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vaddv_s16 (int16x4_t __a)
 {
-  return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), __LANE0 (4));
+  return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vaddv_s32 (int32x2_t __a)
 {
-  return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), __LANE0 (2));
+  return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
 }
 
 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
@@ -15338,7 +15332,7 @@ vaddv_u8 (uint8x8_t __a)
 {
   return vget_lane_u8 ((uint8x8_t)
 		__builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
-		__LANE0 (8));
+		0);
 }
 
 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
@@ -15346,7 +15340,7 @@ vaddv_u16 (uint16x4_t __a)
 {
   return vget_lane_u16 ((uint16x4_t)
 		__builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
-		__LANE0 (4));
+		0);
 }
 
 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
@@ -15354,32 +15348,32 @@ vaddv_u32 (uint32x2_t __a)
 {
   return vget_lane_u32 ((uint32x2_t)
 		__builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
-		__LANE0 (2));
+		0);
 }
 
 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
 vaddvq_s8 (int8x16_t __a)
 {
   return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
-			__LANE0 (16));
+			0);
 }
 
 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
 vaddvq_s16 (int16x8_t __a)
 {
-  return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), __LANE0 (8));
+  return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
 }
 
 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vaddvq_s32 (int32x4_t __a)
 {
-  return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), __LANE0 (4));
+  return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
 }
 
 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
 vaddvq_s64 (int64x2_t __a)
 {
-  return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), __LANE0 (2));
+  return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
 }
 
 __extension__ static __inline uint8_t __attribute__

[Patch][AArch64] NEON vdup testcases

2014-01-16 Thread Alex Velenko

[AArch64] VDUP testcases

Hi,

This patch implements test cases for following NEON intrinsics:
vdup_lane_f32
vdup_lane_s[8,16]
vdup_lane_s[32,64]
vdup_n_[p,s,u][8,16]
vdup_n_[s,u][32,64]

vdupb_lane_[s,u]8
vduph_lane_[s,u]16
vdupd_lane_[f,s,u]64
vdups_lane_[f,s,u]32

vdupq_lane_[f,s][32,64]
vdupq_lane_s[8,16]
vdup[q]_n_f32
vdupq_n_f64
vdupq_n_[s,p,u][8,16]
vdupq_n_[s,u][32,64]

Tests succeed on both Little-Endian and Big-Eendian.

Ok for trunk?

Thanks,
Alex

gcc/testsuite/

2014-01-16  Alex Velenko  

* gcc.target/aarch64/vdup_lane_1.c: New testcase.
* gcc.target/aarch64/vdup_lane_2.c: New testcase.
* gcc.target/aarch64/vdup_n_1.c: New testcase.
diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
new file mode 100644
index ..a80e10146a6e45b44c3a09701da949a8e9aa7653
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
@@ -0,0 +1,409 @@
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include 
+
+extern void abort (void);
+
+#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
+
+#define force_simd(V1)   asm volatile ("orr %0.16b, %1.16b, %1.16b"	\
+	   : "=w"(V1)		\
+	   : "w"(V1)		\
+	   : /* No clobbers */);
+
+int
+__attribute__ ((noinline)) test_vdup_lane_f32 ()
+{
+  float32x2_t a;
+  float32x2_t b;
+  int i = 0;
+  float32_t c[2] = { 0.0E0 , 3.14 };
+  float32_t d[2];
+  a = vld1_f32 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_f32 (a, 0);
+  vst1_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+{
+  if (c[0] != d[i])
+	return 1;
+}
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_f32 (a, 1);
+  vst1_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+{
+  if (c[1] != d[i])
+	return 1;
+}
+
+  return 0;
+}
+
+/* Covers test_vdup_lane_f32 and test_vdup_lane_s32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_f32 ()
+{
+  float32x2_t a;
+  float32x4_t b;
+  int i = 0;
+  float32_t c[2] = { 0.0E0 , 3.14 };
+  float32_t d[4];
+  a = vld1_f32 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_f32 (a, 0);
+  vst1q_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+{
+  if (c[0] != d[i])
+	return 1;
+}
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_f32 (a, 1);
+  vst1q_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+{
+  if (c[1] != d[i])
+	return 1;
+}
+  return 0;
+}
+
+/* Covers test_vdupq_lane_f32 and test_vdupq_lane_s32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
+
+int
+__attribute__ ((noinline)) test_vdup_lane_s8 ()
+{
+  int8x8_t a;
+  int8x8_t b;
+  int i = 0;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[8];
+  a = vld1_s8 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s8 (a, 0);
+  vst1_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+{
+  if (c[0] != d[i])
+	return 1;
+}
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  force_simd (a)
+  b = vdup_lane_s8 (a, 4);
+  vst1_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+{
+  if (c[4] != d[i])
+	return 1;
+}
+  return 0;
+}
+
+/* Covers test_vdup_lane_s8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[4\\\]" 1 } } */
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_s8 ()
+{
+  int8x8_t a;
+  int8x16_t b;
+  int i = 0;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[16];
+  a = vld1_s8 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s8 (a, 0);
+  vst1q_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+{
+  if (c[0] != d[i])
+	return 1;
+}
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s8 (a, 4);
+  vst1q_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+{
+  if (c[4] != d[i])
+	return 1;
+}
+
+  return 0;
+}
+
+/* Covers test_vdupq_lane_s8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[4\\\]" 1 } } */
+
+int
+__attribute__ ((noinline)) test_vdup_lane_s16 ()
+{
+  int16x4_t a;
+  int16x4_t b;
+  int i 

[Patch][AArch64] vneg floating point testcase BE fixed

2014-01-16 Thread Alex Velenko

Hi,
This patch fixes testcase vneg_f.c which  was using an inconsistent
vector model causing problems for Big-Endian compiler.

Now testcase runs on both LE and BE without regressions.

Is it okay?

Kind regards,
Alex Velenko

gcc/testsuite/

2013-01-16  Alex Velenko  

   */gcc.target/aarch64/vneg_f.c (STORE_INST): ST1 macro added.
   (RUN_TEST): Macro updated to use STORE_INST.
   (test_vneg_f32): Changed to provide definitions for RUN_TEST.
   (test_vneg_f64): Likewise.
   (test_vnegq_f32): Likewise.
   (test_vnegq_f64): Likewise.
diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_f.c b/gcc/testsuite/gcc.target/aarch64/vneg_f.c
index 1eaf21d34eb57b4e7e5388a4686fe6341197447a..01503028547f320ab3d8ea725ff09ee5d0487f18 100644
--- a/gcc/testsuite/gcc.target/aarch64/vneg_f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vneg_f.c
@@ -44,34 +44,27 @@ extern void abort (void);
 #define DATA_TYPE_64 double
 #define DATA_TYPE(data_len) DATA_TYPE_##data_len
 
-#define INDEX64_32 [i]
-#define INDEX64_64
-#define INDEX128_32 [i]
-#define INDEX128_64 [i]
-#define INDEX(reg_len, data_len) \
-  CONCAT1 (INDEX, reg_len##_##data_len)
-
+#define STORE_INST(reg_len, data_len) \
+  CONCAT1 (vst1, POSTFIX (reg_len, data_len))
 #define LOAD_INST(reg_len, data_len) \
   CONCAT1 (vld1, POSTFIX (reg_len, data_len))
 #define NEG_INST(reg_len, data_len) \
   CONCAT1 (vneg, POSTFIX (reg_len, data_len))
 
 #define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
-
-#define RUN_TEST(test_set, reg_len, data_len, n, a, b) \
+#define RUN_TEST(test_set, reg_len, data_len, n, a, b, c) \
   {		   \
 int i;	   \
 (a) = LOAD_INST (reg_len, data_len) (test_set);\
 (b) = NEG_INST (reg_len, data_len) (a);	   \
+STORE_INST (reg_len, data_len) (c, b);	   \
 for (i = 0; i < n; i++)			   \
   {		   \
 	DATA_TYPE (data_len) diff;		   \
 	INHIB_OPTIMIZATION;			   \
-	diff	   \
-	  = a INDEX (reg_len, data_len)		   \
-	+ b INDEX (reg_len, data_len);	   \
+	diff = test_set[i] + c[i];		   \
 	if (diff > EPSILON)			   \
-	  return 1;   \
+	return 1;   \
   }		   \
   }
 
@@ -84,28 +77,29 @@ extern void abort (void);
 int
 test_vneg_f32 ()
 {
-  float test_set0[2] = { TEST0, TEST1 };
-  float test_set1[2] = { TEST2, TEST3 };
-  float test_set2[2] = { VAR_MAX, VAR_MIN };
-  float test_set3[2] = { INFINITY, NAN };
-
   float32x2_t a;
   float32x2_t b;
+  float32_t c[2];
 
-  RUN_TEST (test_set0, 64, 32, 2, a, b);
-  RUN_TEST (test_set1, 64, 32, 2, a, b);
-  RUN_TEST (test_set2, 64, 32, 2, a, b);
-  RUN_TEST (test_set3, 64, 32, 0, a, b);
+  float32_t test_set0[2] = { TEST0, TEST1 };
+  float32_t test_set1[2] = { TEST2, TEST3 };
+  float32_t test_set2[2] = { VAR_MAX, VAR_MIN };
+  float32_t test_set3[2] = { INFINITY, NAN };
+
+  RUN_TEST (test_set0, 64, 32, 2, a, b, c);
+  RUN_TEST (test_set1, 64, 32, 2, a, b, c);
+  RUN_TEST (test_set2, 64, 32, 2, a, b, c);
+  RUN_TEST (test_set3, 64, 32, 0, a, b, c);
 
   /* Since last test cannot be checked in a uniform way by adding
  negation result to original value, the number of lanes to be
  checked in RUN_TEST is 0 (last argument).  Instead, result
  will be checked manually.  */
 
-  if (b[0] != -INFINITY)
+  if (c[0] != -INFINITY)
 return 1;
 
-  if (!__builtin_isnan (b[1]))
+  if (!__builtin_isnan (c[1]))
 return 1;
 
   return 0;
@@ -130,37 +124,38 @@ test_vneg_f64 ()
 {
   float64x1_t a;
   float64x1_t b;
-
-  double test_set0[1] = { TEST0 };
-  double test_set1[1] = { TEST1 };
-  double test_set2[1] = { TEST2 };
-  double test_set3[1] = { TEST3 };
-  double test_set4[1] = { VAR_MAX };
-  double test_set5[1] = { VAR_MIN };
-  double test_set6[1] = { INFINITY };
-  double test_set7[1] = { NAN };
-
-  RUN_TEST (test_set0, 64, 64, 1, a, b);
-  RUN_TEST (test_set1, 64, 64, 1, a, b);
-  RUN_TEST (test_set2, 64, 64, 1, a, b);
-  RUN_TEST (test_set3, 64, 64, 1, a, b);
-  RUN_TEST (test_set4, 64, 64, 1, a, b);
-  RUN_TEST (test_set5, 64, 64, 1, a, b);
-  RUN_TEST (test_set6, 64, 64, 0, a, b);
+  float64_t c[1];
+
+  float64_t test_set0[1] = { TEST0 };
+  float64_t test_set1[1] = { TEST1 };
+  float64_t test_set2[1] = { TEST2 };
+  float64_t test_set3[1] = { TEST3 };
+  float64_t test_set4[1] = { VAR_MAX };
+  float64_t test_set5[1] = { VAR_MIN };
+  float64_t test_set6[1] = { INFINITY };
+  float64_t test_set7[1] = { NAN };
+
+  RUN_TEST (test_set0, 64, 64, 1, a, b, c);
+  RUN_TEST (test_set1, 64, 64, 1, a, b, c);
+  RUN_TEST (test_set2, 64, 64, 1, a, b, c);
+  RUN_TEST (test_set3, 64, 64, 1, a, b, c);
+  RUN_TEST (test_set4, 64, 64, 1, a, b, c);
+  RUN_TEST (test_set5, 64, 64, 1, a, b, c);
+  RUN_TEST (test_set6, 64, 64, 0, a, b, c);
 
   /* Since last test cannot be checked in a uniform way by adding
  negation result to original value, the number of lanes to be
  checked in RUN_TEST is 0

Re: [Patch AArch64] Implement Vector Permute Support

2014-01-16 Thread Alex Velenko

On 14/01/14 15:51, pins...@gmail.com wrote:




On Jan 14, 2014, at 7:19 AM, Alex Velenko  wrote:

Hi,

This patch turns off the vec_perm patterns for aarch64_be, this should resolve
the issue  highlighted here 
http://gcc.gnu.org/ml/gcc-patches/2014-01/msg00321.html
With this patch applied, the test case provided in that link compiles without 
an ICE.

However, the Big-Endian port is still in development. This patch exposes
another known but unrelated issue with Big-Endian Large-Int modes.

The patch has been tested on aarch64-none-elf and aarch64_be-none-elf resulting 
in five
further regression due to the broken implementation of Big-Endian Large-Int 
modes.

Kind regards,
Alex Velenko

gcc/

2014-01-14  Alex Velenko  

* config/aarch64/aarch64-simd.md (vec_perm): Add BE check.
* config/aarch64/aarch64.c (aarch64_expand_vec_perm): Add comment.

gcc/testsuite/

2014-01-14  Alex Velenko  

* lib/target-supports.exp
(check_effective_target_vect_perm): Exclude aarch64_be.
(check_effective_target_vect_perm_byte): Likewise.
(check_effective_target_vect_perm_short): Likewise.


I think you want to use a function to check if the target is effectively 
big-endian instead.  Internally at Cavium, our elf compiler has big-endian 
multi-lib.

Thanks,
Andrew








Hi,
Here is a vec-perm patch with changes proposed previously.
Little and Big-Endian tested with no additional issues appearing.

Kind regards,
Alex

gcc/

2014-01-16  Alex Velenko  

* config/aarch64/aarch64-simd.md (vec_perm): Add BE check.
* config/aarch64/aarch64.c (aarch64_expand_vec_perm): Add comment.

gcc/testsuite/

2014-01-16  Alex Velenko  

* lib/target-supports.exp
(check_effective_target_vect_perm): Exclude aarch64_be.
(check_effective_target_vect_perm_byte): Likewise.
(check_effective_target_vect_perm_short): Likewise.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index bc47a291de4b9b24d829e4dbf060fff7a321558f..43a9c5b27d78a47cf965636a03232005a4c8e7c3 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3840,7 +3840,7 @@
(match_operand:VB 1 "register_operand")
(match_operand:VB 2 "register_operand")
(match_operand:VB 3 "register_operand")]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
 {
   aarch64_expand_vec_perm (operands[0], operands[1],
 			   operands[2], operands[3]);
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 159f88f28dd838d4aee6d75f8d21897695609c49..b425183c1e893c6511ba575a0cd416563c9510be 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3436,7 +3436,8 @@ proc check_effective_target_vect_perm { } {
 } else {
 set et_vect_perm_saved 0
 if { [is-effective-target arm_neon_ok]
-	 || [istarget aarch64*-*-*]
+	 || ([istarget aarch64*-*-*]
+		 && [is-effective-target aarch64_little_endian])
 	 || [istarget powerpc*-*-*]
  || [istarget spu-*-*]
 	 || [istarget i?86-*-*]
@@ -3464,7 +3465,8 @@ proc check_effective_target_vect_perm_byte { } {
 set et_vect_perm_byte_saved 0
 if { ([is-effective-target arm_neon_ok]
 	  && [is-effective-target arm_little_endian])
-	 || [istarget aarch64*-*-*]
+	 || ([istarget aarch64*-*-*]
+		 && [is-effective-target aarch64_little_endian])
 	 || [istarget powerpc*-*-*]
  || [istarget spu-*-*] } {
 set et_vect_perm_byte_saved 1
@@ -3488,7 +3490,8 @@ proc check_effective_target_vect_perm_short { } {
 set et_vect_perm_short_saved 0
 if { ([is-effective-target arm_neon_ok]
 	  && [is-effective-target arm_little_endian])
-	 || [istarget aarch64*-*-*]
+	 || ([istarget aarch64*-*-*]
+		 && [is-effective-target aarch64_little_endian])
 	 || [istarget powerpc*-*-*]
  || [istarget spu-*-*] } {
 set et_vect_perm_short_saved 1


Re: [Patch][AArch64] vneg floating point testcase BE fixed

2014-01-17 Thread Alex Velenko

Hi,
I agree the correct changelog entry should be:

gcc/testsuite/

2013-01-16  Alex Velenko  
*/gcc.target/aarch64/vneg_f.c (STORE_INST): New macro.
(RUN_TEST): Use new macro.
(INDEX): marcro removed
(test_vneg_f32): Use fixed RUN_TEST.
(test_vneg_f64): Likewise.
(test_vnegq_f32): Likewise.
(test_vnegq_f64): Likewise.

Kind regards,
Alex Velenko

On 16/01/14 16:58, Richard Earnshaw wrote:

On 16/01/14 12:23, Alex Velenko wrote:

Hi,
This patch fixes testcase vneg_f.c which  was using an inconsistent
vector model causing problems for Big-Endian compiler.

Now testcase runs on both LE and BE without regressions.

Is it okay?

Kind regards,
Alex Velenko

gcc/testsuite/

2013-01-16  Alex Velenko  

   */gcc.target/aarch64/vneg_f.c (STORE_INST): ST1 macro added.

Just say: "New macro."


   (RUN_TEST): Macro updated to use STORE_INST.

"Use it."


   (test_vneg_f32): Changed to provide definitions for RUN_TEST.

"Use RUN_TEST."


   (test_vneg_f64): Likewise.
   (test_vnegq_f32): Likewise.
   (test_vnegq_f64): Likewise.



You also need to mention the INDEX* macros that you've removed.  Just
say "Delete."



Vneg_fix.patch


diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_f.c 
b/gcc/testsuite/gcc.target/aarch64/vneg_f.c
index 
1eaf21d34eb57b4e7e5388a4686fe6341197447a..01503028547f320ab3d8ea725ff09ee5d0487f18
 100644
--- a/gcc/testsuite/gcc.target/aarch64/vneg_f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vneg_f.c
@@ -44,34 +44,27 @@ extern void abort (void);
  #define DATA_TYPE_64 double
  #define DATA_TYPE(data_len) DATA_TYPE_##data_len

-#define INDEX64_32 [i]
-#define INDEX64_64
-#define INDEX128_32 [i]
-#define INDEX128_64 [i]
-#define INDEX(reg_len, data_len) \
-  CONCAT1 (INDEX, reg_len##_##data_len)
-
+#define STORE_INST(reg_len, data_len) \
+  CONCAT1 (vst1, POSTFIX (reg_len, data_len))
  #define LOAD_INST(reg_len, data_len) \
CONCAT1 (vld1, POSTFIX (reg_len, data_len))
  #define NEG_INST(reg_len, data_len) \
CONCAT1 (vneg, POSTFIX (reg_len, data_len))

  #define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
-
-#define RUN_TEST(test_set, reg_len, data_len, n, a, b) \
+#define RUN_TEST(test_set, reg_len, data_len, n, a, b, c) \
{  \
  int i;   \
  (a) = LOAD_INST (reg_len, data_len) (test_set);\
  (b) = NEG_INST (reg_len, data_len) (a);  \
+STORE_INST (reg_len, data_len) (c, b);\
  for (i = 0; i < n; i++)   \
{  \
DATA_TYPE (data_len) diff; \
INHIB_OPTIMIZATION;\
-   diff   \
- = a INDEX (reg_len, data_len)\
-   + b INDEX (reg_len, data_len); \
+   diff = test_set[i] + c[i]; \
if (diff > EPSILON) \
- return 1;\
+   return 1;  \
}  \
}

@@ -84,28 +77,29 @@ extern void abort (void);
  int
  test_vneg_f32 ()
  {
-  float test_set0[2] = { TEST0, TEST1 };
-  float test_set1[2] = { TEST2, TEST3 };
-  float test_set2[2] = { VAR_MAX, VAR_MIN };
-  float test_set3[2] = { INFINITY, NAN };
-
float32x2_t a;
float32x2_t b;
+  float32_t c[2];

-  RUN_TEST (test_set0, 64, 32, 2, a, b);
-  RUN_TEST (test_set1, 64, 32, 2, a, b);
-  RUN_TEST (test_set2, 64, 32, 2, a, b);
-  RUN_TEST (test_set3, 64, 32, 0, a, b);
+  float32_t test_set0[2] = { TEST0, TEST1 };
+  float32_t test_set1[2] = { TEST2, TEST3 };
+  float32_t test_set2[2] = { VAR_MAX, VAR_MIN };
+  float32_t test_set3[2] = { INFINITY, NAN };
+
+  RUN_TEST (test_set0, 64, 32, 2, a, b, c);
+  RUN_TEST (test_set1, 64, 32, 2, a, b, c);
+  RUN_TEST (test_set2, 64, 32, 2, a, b, c);
+  RUN_TEST (test_set3, 64, 32, 0, a, b, c);

/* Since last test cannot be checked in a uniform way by adding
   negation result to original value, the number of lanes to be
   checked in RUN_TEST is 0 (last argument).  Instead, result
   will be checked manually.  */

-  if (b[0] != -INFINITY)
+  if (c[0] != -INFINITY)
  return 1;

-  if (!__builtin_isnan (b[1]))
+  if (!__builtin_isnan (c[1]))
  return 1;

return 0;
@@ -130,37 +124,38 @@ test_vneg_f64 ()
  {
float64x1_t a;
float64x1_t b;
-
-  double test_set0[1] = { TEST0 };
-  double test_set1[1] = { TEST1 };
-  double test_set2[1] = { TEST2 };
-  double test_set3[1] = { TEST3 };
-  double test_set4[1] = { VAR_MAX };
-  double test_set5[1] = { VAR_MIN };
-  double test_set6[1] = { INFINITY };
-  double test_set7[1] = { NAN };
-
-  RUN_TES

Re: [Patch][AArch64] vneg floating point testcase BE fixed

2014-01-17 Thread Alex Velenko

Hi,
Here are some more improvements on changelog entry:

gcc/testsuite/

2013-01-16  Alex Velenko  

* gcc.target/aarch64/vneg_f.c (STORE_INST): New macro.
(RUN_TEST): Use new macro.
(INDEX64_32): Delete.
(INDEX64_64): Likewise.
(INDEX128_32): Likewise.
(INDEX128_64): Likewise.
(INDEX): Likewise.
(test_vneg_f32): Use fixed RUN_TEST.
(test_vneg_f64): Likewise.
(test_vnegq_f32): Likewise.
(test_vnegq_f64): Likewise.


Kind regards,
Alex Velenko

On 16/01/14 16:58, Richard Earnshaw wrote:

On 16/01/14 12:23, Alex Velenko wrote:

Hi,
This patch fixes testcase vneg_f.c which  was using an inconsistent
vector model causing problems for Big-Endian compiler.

Now testcase runs on both LE and BE without regressions.

Is it okay?

Kind regards,
Alex Velenko

gcc/testsuite/

2013-01-16  Alex Velenko  

   */gcc.target/aarch64/vneg_f.c (STORE_INST): ST1 macro added.

Just say: "New macro."


   (RUN_TEST): Macro updated to use STORE_INST.

"Use it."


   (test_vneg_f32): Changed to provide definitions for RUN_TEST.

"Use RUN_TEST."


   (test_vneg_f64): Likewise.
   (test_vnegq_f32): Likewise.
   (test_vnegq_f64): Likewise.



You also need to mention the INDEX* macros that you've removed.  Just
say "Delete."



Vneg_fix.patch


diff --git a/gcc/testsuite/gcc.target/aarch64/vneg_f.c 
b/gcc/testsuite/gcc.target/aarch64/vneg_f.c
index 
1eaf21d34eb57b4e7e5388a4686fe6341197447a..01503028547f320ab3d8ea725ff09ee5d0487f18
 100644
--- a/gcc/testsuite/gcc.target/aarch64/vneg_f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vneg_f.c
@@ -44,34 +44,27 @@ extern void abort (void);
  #define DATA_TYPE_64 double
  #define DATA_TYPE(data_len) DATA_TYPE_##data_len

-#define INDEX64_32 [i]
-#define INDEX64_64
-#define INDEX128_32 [i]
-#define INDEX128_64 [i]
-#define INDEX(reg_len, data_len) \
-  CONCAT1 (INDEX, reg_len##_##data_len)
-
+#define STORE_INST(reg_len, data_len) \
+  CONCAT1 (vst1, POSTFIX (reg_len, data_len))
  #define LOAD_INST(reg_len, data_len) \
CONCAT1 (vld1, POSTFIX (reg_len, data_len))
  #define NEG_INST(reg_len, data_len) \
CONCAT1 (vneg, POSTFIX (reg_len, data_len))

  #define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
-
-#define RUN_TEST(test_set, reg_len, data_len, n, a, b) \
+#define RUN_TEST(test_set, reg_len, data_len, n, a, b, c) \
{  \
  int i;   \
  (a) = LOAD_INST (reg_len, data_len) (test_set);\
  (b) = NEG_INST (reg_len, data_len) (a);  \
+STORE_INST (reg_len, data_len) (c, b);\
  for (i = 0; i < n; i++)   \
{  \
DATA_TYPE (data_len) diff; \
INHIB_OPTIMIZATION;\
-   diff   \
- = a INDEX (reg_len, data_len)\
-   + b INDEX (reg_len, data_len); \
+   diff = test_set[i] + c[i]; \
if (diff > EPSILON) \
- return 1;\
+   return 1;  \
}  \
}

@@ -84,28 +77,29 @@ extern void abort (void);
  int
  test_vneg_f32 ()
  {
-  float test_set0[2] = { TEST0, TEST1 };
-  float test_set1[2] = { TEST2, TEST3 };
-  float test_set2[2] = { VAR_MAX, VAR_MIN };
-  float test_set3[2] = { INFINITY, NAN };
-
float32x2_t a;
float32x2_t b;
+  float32_t c[2];

-  RUN_TEST (test_set0, 64, 32, 2, a, b);
-  RUN_TEST (test_set1, 64, 32, 2, a, b);
-  RUN_TEST (test_set2, 64, 32, 2, a, b);
-  RUN_TEST (test_set3, 64, 32, 0, a, b);
+  float32_t test_set0[2] = { TEST0, TEST1 };
+  float32_t test_set1[2] = { TEST2, TEST3 };
+  float32_t test_set2[2] = { VAR_MAX, VAR_MIN };
+  float32_t test_set3[2] = { INFINITY, NAN };
+
+  RUN_TEST (test_set0, 64, 32, 2, a, b, c);
+  RUN_TEST (test_set1, 64, 32, 2, a, b, c);
+  RUN_TEST (test_set2, 64, 32, 2, a, b, c);
+  RUN_TEST (test_set3, 64, 32, 0, a, b, c);

/* Since last test cannot be checked in a uniform way by adding
   negation result to original value, the number of lanes to be
   checked in RUN_TEST is 0 (last argument).  Instead, result
   will be checked manually.  */

-  if (b[0] != -INFINITY)
+  if (c[0] != -INFINITY)
  return 1;

-  if (!__builtin_isnan (b[1]))
+  if (!__builtin_isnan (c[1]))
  return 1;

return 0;
@@ -130,37 +124,38 @@ test_vneg_f64 ()
  {
float64x1_t a;
float64x1_t b;
-
-  double test_set0[1] = { TEST0 };
-  double test_set1[1] = { TEST1 };
-  double test_set2[1] = { TEST2 };
-  double test_set3[1] = { TEST3 };
-  double test_set4[1] = {

Re: [Patch AArch64] Implement Vector Permute Support

2014-01-20 Thread Alex Velenko

On 17/01/14 15:55, Richard Earnshaw wrote:

On 16/01/14 14:43, Alex Velenko wrote:

On 14/01/14 15:51, pins...@gmail.com wrote:




On Jan 14, 2014, at 7:19 AM, Alex Velenko  wrote:

Hi,

This patch turns off the vec_perm patterns for aarch64_be, this should resolve
the issue  highlighted here 
http://gcc.gnu.org/ml/gcc-patches/2014-01/msg00321.html
With this patch applied, the test case provided in that link compiles without 
an ICE.

However, the Big-Endian port is still in development. This patch exposes
another known but unrelated issue with Big-Endian Large-Int modes.

The patch has been tested on aarch64-none-elf and aarch64_be-none-elf resulting 
in five
further regression due to the broken implementation of Big-Endian Large-Int 
modes.

Kind regards,
Alex Velenko

gcc/

2014-01-14  Alex Velenko  

 * config/aarch64/aarch64-simd.md (vec_perm): Add BE check.
 * config/aarch64/aarch64.c (aarch64_expand_vec_perm): Add comment.

gcc/testsuite/

2014-01-14  Alex Velenko  

 * lib/target-supports.exp
 (check_effective_target_vect_perm): Exclude aarch64_be.
 (check_effective_target_vect_perm_byte): Likewise.
 (check_effective_target_vect_perm_short): Likewise.


I think you want to use a function to check if the target is effectively 
big-endian instead.  Internally at Cavium, our elf compiler has big-endian 
multi-lib.

Thanks,
Andrew








Hi,
Here is a vec-perm patch with changes proposed previously.
Little and Big-Endian tested with no additional issues appearing.

Kind regards,
Alex

gcc/

2014-01-16  Alex Velenko  

* config/aarch64/aarch64-simd.md (vec_perm): Add BE check.
* config/aarch64/aarch64.c (aarch64_expand_vec_perm): Add comment.

gcc/testsuite/

2014-01-16  Alex Velenko  

* lib/target-supports.exp
(check_effective_target_vect_perm): Exclude aarch64_be.
(check_effective_target_vect_perm_byte): Likewise.
(check_effective_target_vect_perm_short): Likewise.



The patch is missing the hunk for aarch64.c.




Hi,
It is a faulty changelog entry, not patch.
Should be:

gcc/

2014-01-16  Alex Velenko  

* config/aarch64/aarch64-simd.md (vec_perm): Add BE check.

gcc/testsuite/

2014-01-16  Alex Velenko  

* lib/target-supports.exp
(check_effective_target_vect_perm): Exclude aarch64_be.
(check_effective_target_vect_perm_byte): Likewise.
(check_effective_target_vect_perm_short): Likewise.


Re: [Patch][AArch64] vneg floating point testcase BE fixed

2014-01-20 Thread Alex Velenko

On 17/01/14 14:39, Richard Earnshaw wrote:

On 17/01/14 14:22, Alex Velenko wrote:

Hi,
Here are some more improvements on changelog entry:

gcc/testsuite/

2013-01-16  Alex Velenko  

* gcc.target/aarch64/vneg_f.c (STORE_INST): New macro.
(RUN_TEST): Use new macro.
(INDEX64_32): Delete.
(INDEX64_64): Likewise.
(INDEX128_32): Likewise.
(INDEX128_64): Likewise.
(INDEX): Likewise.
(test_vneg_f32): Use fixed RUN_TEST.
(test_vneg_f64): Likewise.
(test_vnegq_f32): Likewise.
(test_vnegq_f64): Likewise.




OK.

R.



Could someone, please, commit it, as I do not have commit rights

Alex


Re: [Patch AArch64] Implement Vector Permute Support

2014-01-20 Thread Alex Velenko

On 20/01/14 11:16, Richard Earnshaw wrote:

On 20/01/14 11:15, Alex Velenko wrote:

On 17/01/14 15:55, Richard Earnshaw wrote:

On 16/01/14 14:43, Alex Velenko wrote:

On 14/01/14 15:51, pins...@gmail.com wrote:




On Jan 14, 2014, at 7:19 AM, Alex Velenko  wrote:

Hi,

This patch turns off the vec_perm patterns for aarch64_be, this should resolve
the issue  highlighted here 
http://gcc.gnu.org/ml/gcc-patches/2014-01/msg00321.html
With this patch applied, the test case provided in that link compiles without 
an ICE.

However, the Big-Endian port is still in development. This patch exposes
another known but unrelated issue with Big-Endian Large-Int modes.

The patch has been tested on aarch64-none-elf and aarch64_be-none-elf resulting 
in five
further regression due to the broken implementation of Big-Endian Large-Int 
modes.

Kind regards,
Alex Velenko

gcc/

2014-01-14  Alex Velenko  

  * config/aarch64/aarch64-simd.md (vec_perm): Add BE check.
  * config/aarch64/aarch64.c (aarch64_expand_vec_perm): Add comment.

gcc/testsuite/

2014-01-14  Alex Velenko  

  * lib/target-supports.exp
  (check_effective_target_vect_perm): Exclude aarch64_be.
  (check_effective_target_vect_perm_byte): Likewise.
  (check_effective_target_vect_perm_short): Likewise.


I think you want to use a function to check if the target is effectively 
big-endian instead.  Internally at Cavium, our elf compiler has big-endian 
multi-lib.

Thanks,
Andrew








Hi,
Here is a vec-perm patch with changes proposed previously.
Little and Big-Endian tested with no additional issues appearing.

Kind regards,
Alex

gcc/

2014-01-16  Alex Velenko  

* config/aarch64/aarch64-simd.md (vec_perm): Add BE check.
* config/aarch64/aarch64.c (aarch64_expand_vec_perm): Add comment.

gcc/testsuite/

2014-01-16  Alex Velenko  

* lib/target-supports.exp
(check_effective_target_vect_perm): Exclude aarch64_be.
(check_effective_target_vect_perm_byte): Likewise.
(check_effective_target_vect_perm_short): Likewise.



The patch is missing the hunk for aarch64.c.




Hi,
It is a faulty changelog entry, not patch.
Should be:

gcc/

2014-01-16  Alex Velenko  

  * config/aarch64/aarch64-simd.md (vec_perm): Add BE check.

gcc/testsuite/

2014-01-16  Alex Velenko  

  * lib/target-supports.exp
  (check_effective_target_vect_perm): Exclude aarch64_be.
  (check_effective_target_vect_perm_byte): Likewise.
  (check_effective_target_vect_perm_short): Likewise.



On that basis, OK.

R.



Can someone, please, commit this patch, as I do not have permissions?
Kind regards,
Alex


Re: [PATCH AArch64_BE 1/4] Big-Endian lane numbering fix

2014-01-21 Thread Alex Velenko

Hi,
Can someone, please, commit this patch as I do not have privileges to
do so.
Kind regards,
Alex Velenko

On 21/01/14 13:27, Marcus Shawcroft wrote:

On 16 January 2014 11:49, Alex Velenko  wrote:

Hi,
This patch is the first patch in a series of patches fixing Big-Endian
lane numbering. The goal of this series of patches is to make proper
bridge between pure GCC big-endian view on lane numbering and internal
architected view.


OK /Marcus





Re: [PATCH AArch64_BE 2/4] Big-Endian lane numbering fix

2014-01-21 Thread Alex Velenko

Hi,
Can someone, please, commit this patch as I do not have privileges to
do so.
Kind regards,
Alex Velenko

On 21/01/14 13:31, Marcus Shawcroft wrote:

On 16 January 2014 11:49, Alex Velenko  wrote:

Hi,
This patch changes get_lane intrinsics to provide a correct big-endian
indexing. This fixes numerous BE load and store issues based on getting
correct lane.

Is this good for trunk?


OK
/Marcus





Re: [PATCH AArch64_BE 3/4] Big-Endian lane numbering fix

2014-01-21 Thread Alex Velenko

Hi,
Can someone, please, commit this patch as I do not have privileges to
do so.
Kind regards,
Alex Velenko

On 21/01/14 13:32, Marcus Shawcroft wrote:

On 16 January 2014 11:50, Alex Velenko  wrote:

Hi,

This patch by James Greenhalgh fixes "by-lane" patterns broken by
previous patches.


Regression tested on aarch64-none-elf and aarch64_be-none-elf
with no unexpected issues.

OK?



OK /Marcus





Re: [PATCH AArch64_BE 4/4] Big-Endian lane numbering fix

2014-01-21 Thread Alex Velenko

Hi,
Can someone, please, commit this patch as I do not have privileges to
do so.
Kind regards,
Alex Velenko

On 21/01/14 13:34, Marcus Shawcroft wrote:

2014/1/16 Alex Velenko :

Hi,
In previous BE patches the way lane indexing in lanes is calculated has
been changed. To accommodate the change, arm neon intrinsics had to be
updated.

Is it okay?


OK /Marcus





Re: [Patch][AArch64] NEON vdup testcases

2014-01-22 Thread Alex Velenko

On 16/01/14 12:12, Alex Velenko wrote:

[AArch64] VDUP testcases

Hi,

This patch implements test cases for following NEON intrinsics:
vdup_lane_f32
vdup_lane_s[8,16]
vdup_lane_s[32,64]
vdup_n_[p,s,u][8,16]
vdup_n_[s,u][32,64]

vdupb_lane_[s,u]8
vduph_lane_[s,u]16
vdupd_lane_[f,s,u]64
vdups_lane_[f,s,u]32

vdupq_lane_[f,s][32,64]
vdupq_lane_s[8,16]
vdup[q]_n_f32
vdupq_n_f64
vdupq_n_[s,p,u][8,16]
vdupq_n_[s,u][32,64]

Tests succeed on both Little-Endian and Big-Eendian.

Ok for trunk?

Thanks,
Alex

gcc/testsuite/

2014-01-16  Alex Velenko  

 * gcc.target/aarch64/vdup_lane_1.c: New testcase.
 * gcc.target/aarch64/vdup_lane_2.c: New testcase.
 * gcc.target/aarch64/vdup_n_1.c: New testcase.


Ping!

Hi,
Can someone, please, review the patch?
Kind regards,
Alex Velenko


Re: [PATCH][AArch64] Vector shift by 64 fix

2014-01-22 Thread Alex Velenko

On 06/01/14 11:52, Alex Velenko wrote:

Hi,

This patch fixes vector shift by 64 behavior to meet reference
manual expectations. Testcase included to check that expectations
are now met. No regressions found.

Is patch OK?

Thanks,
Alex

2014-01-06  Alex Velenko  

gcc/

 * config/aarch64/aarch64-simd-builtins.def (ashr): DI mode removed.
 (ashr_simd): New builtin handling DI mode.
 * config/aarch64/aarch64-simd.md (aarch64_ashr_simddi): New pattern.
 (aarch64_sshr_simddi): New match pattern.
 * config/aarch64/arm_neon.h (vshr_n_s32): Builtin call modified.
 (vshrd_n_s64): Likewise.
 * config/aarch64/predicates.md (aarch64_shift_imm64_di): New
predicate.

gcc/testsuite/

 * gcc.target/aarch64/sshr64_1.c: New testcase.


Ping!

Hi,
Can someone, please, review the patch.
Kind regards,
Alex


Re: [PATCH][AArch64] Vector shift by 64 fix

2014-01-23 Thread Alex Velenko

Hi,
Could someone, please, commit this patch, as I do not have permissions 
to do so.

Kind regards,
Alex

On 23/01/14 12:04, Marcus Shawcroft wrote:

On 6 January 2014 11:52, Alex Velenko  wrote:

Hi,

This patch fixes vector shift by 64 behavior to meet reference
manual expectations. Testcase included to check that expectations
are now met. No regressions found.

Is patch OK?


OK
/Marcus





[Patch][AArch64] Shift right pattern fix

2014-01-30 Thread Alex Velenko

Hi,
This patch fixes shift right pattern, as it failed on -O0 after shift
right patch. The reason was unnecessary movement of immediate value to
a register due to type mismatch.
Patch is tested not to cause any additional regressions.
Could someone, please, approve and commit this patch, as I do not have
the rights to do so?

Kind regards,
Alex

2014-01-28  Alex Velenko  

gcc/

* config/aarch64/aarch64-simd.md (aarch64_ashr_simddi): Fixed.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 7378da9122d550f869c3e830e3e5a7681e7581f6..4dffb59e856aeaafb79007255d3b91a73ef1ef13 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -689,7 +689,7 @@
 (define_expand "aarch64_ashr_simddi"
   [(match_operand:DI 0 "register_operand" "=w")
(match_operand:DI 1 "register_operand" "w")
-   (match_operand:QI 2 "aarch64_shift_imm64_di" "")]
+   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
   "TARGET_SIMD"
   {
 if (INTVAL (operands[2]) == 64)


Re: [Patch][AArch64] Shift right pattern fix

2014-02-03 Thread Alex Velenko

Hi,
I agree to changelog change. Could this patch, please, be submitted as I 
do not have rights to do so.

Kind regards,
Alex

On 30/01/14 22:36, Marcus Shawcroft wrote:

On 30 January 2014 15:28, Alex Velenko  wrote:

Hi,
This patch fixes shift right pattern, as it failed on -O0 after shift
right patch. The reason was unnecessary movement of immediate value to
a register due to type mismatch.
Patch is tested not to cause any additional regressions.
Could someone, please, approve and commit this patch, as I do not have
the rights to do so?

Kind regards,
Alex

2014-01-28  Alex Velenko  

gcc/

 * config/aarch64/aarch64-simd.md (aarch64_ashr_simddi): Fixed.


Fixed doesn't say what was changed, how about:

* config/aarch64/aarch64-simd.md (aarch64_ashr_simddi): Change QI to SI.

OK with that change.
/Marcus





[PATCH][AArch64] vqneg and vqabs intrinsics implementation

2014-02-12 Thread Alex Velenko

Hi,

This patch implements vqneg_s64, vqnegd_s64, vqabs_s64 and
vqabsd_s64 AArch64 intrinsics. Regression tests added.
Run full regression with no regressions.

Is patch OK?

Thanks,
Alex

gcc/

2014-02-12  Alex Velenko  

* gcc/config/aarch64/aarch64-simd.md (aarch64_s):
Pattern extended.
* config/aarch64/aarch64-simd-builtins.def (sqneg): Iterator
extended.
(sqabs): Likewise.
* config/aarch64/arm_neon.h (vqneg_s64): New intrinsic.
(vqnegd_s64): Likewise.
(vqabs_s64): Likewise.
(vqabsd_s64): Likewise.

gcc/testsuite/

2014-02-12  Alex Velenko  

*gcc.target/aarch64/vqneg_s64_1.c: New testcase.
*gcc.target/aarch64/vqabs_s64_1.c: New testcase.
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e5f71b479ccfd1a9cbf84aed0f96b49762053f59..b3d0989f1b3bce1cab301f5fdb522324ed758c87 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -142,8 +142,8 @@
   BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
   BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0)
   /* Implemented by aarch64_s.  */
-  BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0)
-  BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0)
+  BUILTIN_VSDQ_I (UNOP, sqabs, 0)
+  BUILTIN_VSDQ_I (UNOP, sqneg, 0)
 
   BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0)
   BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 7378da9122d550f869c3e830e3e5a7681e7581f6..8a63dcdae8376b935c004fc84081e222d0a9a720 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2585,9 +2585,9 @@
 ;; q
 
 (define_insn "aarch64_s"
-  [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w")
-	(UNQOPS:VSDQ_I_BHSI
-	  (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))]
+  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
+	(UNQOPS:VSDQ_I
+	  (match_operand:VSDQ_I 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "s\\t%0, %1"
   [(set_attr "type" "neon_")]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 6af99361b8e265f66026dc506cfc23f044d153b4..7347bc0b18968d69b1c66ec75d30facb59450936 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -2318,6 +2318,12 @@ vqneg_s32 (int32x2_t __a)
   return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
 }
 
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqneg_s64 (int64x1_t __a)
+{
+  return __builtin_aarch64_sqnegdi (__a);
+}
+
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vqnegq_s8 (int8x16_t __a)
 {
@@ -2354,6 +2360,12 @@ vqabs_s32 (int32x2_t __a)
   return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
 }
 
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vqabs_s64 (int64x1_t __a)
+{
+  return __builtin_aarch64_sqabsdi (__a);
+}
+
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vqabsq_s8 (int8x16_t __a)
 {
@@ -20943,6 +20955,12 @@ vqabss_s32 (int32x1_t __a)
   return (int32x1_t) __builtin_aarch64_sqabssi (__a);
 }
 
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqabsd_s64 (int64_t __a)
+{
+  return __builtin_aarch64_sqabsdi (__a);
+}
+
 /* vqadd */
 
 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
@@ -21561,6 +21579,12 @@ vqnegs_s32 (int32x1_t __a)
   return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
 }
 
+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
+vqnegd_s64 (int64_t __a)
+{
+  return __builtin_aarch64_sqnegdi (__a);
+}
+
 /* vqrdmulh */
 
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
diff --git a/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c b/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c
new file mode 100644
index ..3ea532278d6db7aedc0b6cc6c2498658ad80a72b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c
@@ -0,0 +1,54 @@
+/* Test vqabs_s64 intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "--save-temps" } */
+
+#include 
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+test_vqabs_s64 (int64x1_t passed, int64_t expected)
+{
+  return vget_lane_s64 (vqabs_s64 (passed), 0) != expected;
+}
+
+int __attribute__ ((noinline))
+test_vqabsd_s64 (int64_t passed, int64_t expected)
+{
+  return vqabsd_s64 (passed) != expected;
+}
+
+/* { dg-final { scan-assembler-times "sqabs\\td\[0-9\]+, d\[0-9\]+" 2 } } */
+
+int
+main (int argc, char **argv)
+{
+  /* Basic test.  */
+  if (test_vqabs_s64 (vcreate_s64 (-1), 1))
+abort ();
+  if (test_vqabsd_s64 (-1, 1))
+abort ();
+
+  /* Getting absolute value of min int64_t.
+ Note, exact resu

[PATCH][AArch64] vrnd<*>_f64 patch for stage-1

2014-02-13 Thread Alex Velenko

Hi,
This patch adds vrnd<*>_f64 aarch64 intrinsics. A testcase for those
intrinsics is added. Run a complete LE and BE regression run with no 
regressions.


Is patch OK for stage-1?

2014-02-13  Alex Velenko  

gcc/

* config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro
added.
* config/aarch64/aarch64-simd-builtins.def (frintn): Use added
macro.
* config/aarch64/aarch64-simd.md (): Comment
corrected.
* config/aarch64/aarch64.md (): Likewise.
* config/aarch64/arm_neon.h (vrnd_f64): Added.
(vrnda_f64): Likewise.
(vrndi_f64): Likewise.
(vrndm_f64): Likewise.
(vrndn_f64): Likewise.
(vrndp_f64): Likewise.
(vrndx_f64): Likewise.

gcc/testsuite/

gcc.target/aarch64/vrnd_f64_1.c : New testcase.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index ebab2ce8347a4425977c5cbd0f285c3ff1d9f2f1..7adc5fb96b6473ecde5c4f76973aff68af0ca7d4 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -307,6 +307,8 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
 #define BUILTIN_VDQF(T, N, MAP) \
   VAR3 (T, N, MAP, v2sf, v4sf, v2df)
+#define BUILTIN_VDQF_DF(T, N, MAP) \
+  VAR4 (T, N, MAP, v2sf, v4sf, v2df, df)
 #define BUILTIN_VDQH(T, N, MAP) \
   VAR2 (T, N, MAP, v4hi, v8hi)
 #define BUILTIN_VDQHS(T, N, MAP) \
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e5f71b479ccfd1a9cbf84aed0f96b49762053f59..09e230c56683a0225f8760472d7137b7bac98297 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -264,7 +264,7 @@
   BUILTIN_VDQF (UNOP, nearbyint, 2)
   BUILTIN_VDQF (UNOP, rint, 2)
   BUILTIN_VDQF (UNOP, round, 2)
-  BUILTIN_VDQF (UNOP, frintn, 2)
+  BUILTIN_VDQF_DF (UNOP, frintn, 2)
 
   /* Implemented by l2.  */
   VAR1 (UNOP, lbtruncv2sf, 2, v2si)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 4dffb59e856aeaafb79007255d3b91a73ef1ef13..0c1d7de5b3f4fb0fa8fa226b81ec690d8112b849 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1427,7 +1427,7 @@
 )
 
 ;; Vector versions of the floating-point frint patterns.
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 (define_insn "2"
   [(set (match_operand:VDQF 0 "register_operand" "=w")
 	(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 99a6ac8fcbdcd24a0ea18cc037bef9cf72070281..577aa9fe08bb445e66734bc404e94e13dc1fa65b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3187,7 +3187,7 @@
 ;; ---
 
 ;; frint floating-point round to integral standard patterns.
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 
 (define_insn "2"
   [(set (match_operand:GPF 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 6af99361b8e265f66026dc506cfc23f044d153b4..797e37ad638648312ef34bcd63c463e5873c30c4 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -22481,6 +22481,12 @@ vrnd_f32 (float32x2_t __a)
   return __builtin_aarch64_btruncv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrnd_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndq_f32 (float32x4_t __a)
 {
@@ -22501,6 +22507,12 @@ vrnda_f32 (float32x2_t __a)
   return __builtin_aarch64_roundv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrnda_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndaq_f32 (float32x4_t __a)
 {
@@ -22521,6 +22533,12 @@ vrndi_f32 (float32x2_t __a)
   return __builtin_aarch64_nearbyintv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrndi_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndiq_f32 (float32x4_t __a)
 {
@@ -22541,6 +22559,12 @@ vrndm_f32 (float32x2_t __a)
   return __builtin_aarch64_floorv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __att

Re: [PATCH][AArch64] vrnd<*>_f64 patch for stage-1

2014-02-21 Thread Alex Velenko

On 13/02/14 17:43, Richard Henderson wrote:

On 02/13/2014 03:17 AM, Alex Velenko wrote:

+/* Sets "rmode" field of "FPCR" control register to
+   "FPROUNDING_ZERO".  */


Comment is wrong, or at least misleading.


+void __inline __attribute__ ((__always_inline__))
+set_rounding_mode (uint32_t mode)
+{
+  uint32_t r;
+
+  /* Read current FPCR.  */
+  asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :);
+
+  /* Clear rmode.  */
+  r &= 3 << RMODE_START;


   ~(3 << RMODE_START)


+  /* Calculate desired FPCR.  */
+  r |= mode << RMODE_START;
+
+  /* Write desired FPCR back.  */
+  asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :);
+}


Fortunately for this testcase, you do always use FPROUNDING_ZERO == 3 when
calling this function, so the bugs are hidden.


r~



Hi Richard,
Thank you for pointing those issue out. here is a respin of the same 
patch with indecated issues fixed. the description of the patch is as 
follows:


This patch adds vrnd<*>_f64 aarch64 intrinsics. A testcase for those
intrinsics is added. Run a complete LE and BE regression run with no 
regressions.


Is patch OK for stage-1?

gcc/

2014-02-21  Alex Velenko  

* config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro
added.
* config/aarch64/aarch64-simd-builtins.def (frintn): Use added
macro.
* config/aarch64/aarch64-simd.md (): Comment
corrected.
* config/aarch64/aarch64.md (): Likewise.
* config/aarch64/arm_neon.h (vrnd_f64): Added.
(vrnda_f64): Likewise.
(vrndi_f64): Likewise.
(vrndm_f64): Likewise.
(vrndn_f64): Likewise.
(vrndp_f64): Likewise.
(vrndx_f64): Likewise.

gcc/testsuite/

2014-02-21  Alex Velenko  

gcc.target/aarch64/vrnd_f64_1.c : New testcase.


diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index ebab2ce8347a4425977c5cbd0f285c3ff1d9f2f1..7adc5fb96b6473ecde5c4f76973aff68af0ca7d4 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -307,6 +307,8 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
 #define BUILTIN_VDQF(T, N, MAP) \
   VAR3 (T, N, MAP, v2sf, v4sf, v2df)
+#define BUILTIN_VDQF_DF(T, N, MAP) \
+  VAR4 (T, N, MAP, v2sf, v4sf, v2df, df)
 #define BUILTIN_VDQH(T, N, MAP) \
   VAR2 (T, N, MAP, v4hi, v8hi)
 #define BUILTIN_VDQHS(T, N, MAP) \
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e5f71b479ccfd1a9cbf84aed0f96b49762053f59..09e230c56683a0225f8760472d7137b7bac98297 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -264,7 +264,7 @@
   BUILTIN_VDQF (UNOP, nearbyint, 2)
   BUILTIN_VDQF (UNOP, rint, 2)
   BUILTIN_VDQF (UNOP, round, 2)
-  BUILTIN_VDQF (UNOP, frintn, 2)
+  BUILTIN_VDQF_DF (UNOP, frintn, 2)
 
   /* Implemented by l2.  */
   VAR1 (UNOP, lbtruncv2sf, 2, v2si)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 4dffb59e856aeaafb79007255d3b91a73ef1ef13..0c1d7de5b3f4fb0fa8fa226b81ec690d8112b849 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1427,7 +1427,7 @@
 )
 
 ;; Vector versions of the floating-point frint patterns.
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 (define_insn "2"
   [(set (match_operand:VDQF 0 "register_operand" "=w")
 	(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 99a6ac8fcbdcd24a0ea18cc037bef9cf72070281..577aa9fe08bb445e66734bc404e94e13dc1fa65b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3187,7 +3187,7 @@
 ;; ---
 
 ;; frint floating-point round to integral standard patterns.
-;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
 
 (define_insn "2"
   [(set (match_operand:GPF 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 6af99361b8e265f66026dc506cfc23f044d153b4..797e37ad638648312ef34bcd63c463e5873c30c4 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -22481,6 +22481,12 @@ vrnd_f32 (float32x2_t __a)
   return __builtin_aarch64_btruncv2sf (__a);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vrnd_f64 (float64x1_t __a)
+{
+  return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vrndq_f32 (f

[AArch64] Logical vector shift right conformance

2014-02-25 Thread Alex Velenko

Hi,

This patch fixes a bug in vshr_n_u64 and vshrd_n_u64 intrinsic
behavior in case of shift by 64. Shift by 64 is strictly defined in ACLE 
to use ushr instruction intended by those intrinsics.


The testcase provided also tests the behavior for intrinsics mentioned
above with values other then 64. Besides, the test checks that an 
illeagal ushr shift by 0 is not generated, expecting the test to compile 
and run correctly generating instructions other than ushr.


The patch was tested for LE and BE with no regressions.

Is given patch ok for stage-4?

Thanks,
Alex

gcc/

2014-02-25  Alex Velenko  

* config/aarch64/aarch64-simd-builtins.def (lshr): DI mode excluded.
(lshr_simd): DI mode added.
* config/aarch64/aarch64-simd.md (aarch64_lshr_simddi): New pattern.
(aarch64_ushr_simddi): Likewise.
* config/aarch64/aarch64.md (UNSPEC_USHR64): New unspec.
* config/aarch64/arm_neon.h (vshr_n_u64): Intrinsic fixed.
(vshrd_n_u64): Likewise.

gcc/testsuite/

2014-02-25  Alex Velenko  

* gcc.target/aarch64/ushr64_1.c: New testcase.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index ebab2ce8347a4425977c5cbd0f285c3ff1d9f2f1..ac5522cac00e6dd8a808ac3c68b4fa8cc15d9120 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -183,6 +183,10 @@ aarch64_types_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define TYPES_GETLANE (aarch64_types_getlane_qualifiers)
 #define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers)
 static enum aarch64_type_qualifiers
+aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
+#define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
+static enum aarch64_type_qualifiers
 aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
 #define TYPES_SETLANE (aarch64_types_setlane_qualifiers)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e5f71b479ccfd1a9cbf84aed0f96b49762053f59..c9b7570e565979cb454d594c84e625380419d0e6 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -192,7 +192,8 @@
 
   BUILTIN_VDQ_I (SHIFTIMM, ashr, 3)
   VAR1 (SHIFTIMM, ashr_simd, 0, di)
-  BUILTIN_VSDQ_I_DI (SHIFTIMM, lshr, 3)
+  BUILTIN_VDQ_I (SHIFTIMM, lshr, 3)
+  VAR1 (USHIFTIMM, lshr_simd, 0, di)
   /* Implemented by aarch64_shr_n.  */
   BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
   BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 4dffb59e856aeaafb79007255d3b91a73ef1ef13..6048d605c72e6a43b9a004a8bc89dbfa89f3ed5b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -724,6 +724,31 @@
   DONE;
 })
 
+(define_expand "aarch64_lshr_simddi"
+  [(match_operand:DI 0 "register_operand" "=w")
+   (match_operand:DI 1 "register_operand" "w")
+   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
+  "TARGET_SIMD"
+  {
+if (INTVAL (operands[2]) == 64)
+  emit_insn (gen_aarch64_ushr_simddi (operands[0], operands[1]));
+else
+  emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
+DONE;
+  }
+)
+
+;; SIMD shift by 64.  This pattern is a special case as standard pattern does
+;; not handle NEON shifts by 64.
+(define_insn "aarch64_ushr_simddi"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+(unspec:DI
+  [(match_operand:DI 1 "register_operand" "w")] UNSPEC_USHR64))]
+  "TARGET_SIMD"
+  "ushr\t%d0, %d1, 64"
+  [(set_attr "type" "neon_shift_imm")]
+)
+
 (define_expand "vec_set"
   [(match_operand:VQ_S 0 "register_operand")
(match_operand: 1 "register_operand")
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 99a6ac8fcbdcd24a0ea18cc037bef9cf72070281..c86a29d8e7f8df21f25e14d22df1c3e8c37c907f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -101,6 +101,7 @@
 UNSPEC_TLS
 UNSPEC_TLSDESC
 UNSPEC_USHL_2S
+UNSPEC_USHR64
 UNSPEC_VSTRUCTDUMMY
 ])
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 6af99361b8e265f66026dc506cfc23f044d153b4..612b899f31584378844f1b82353e8d1dd3d5ec61 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -23364,7 +23364,7 @@ vshr_n_u32 (uint32x2_t __a, const int __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vshr_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
+  return __built

[AArch64] 64-bit float vreinterpret implemention

2014-02-25 Thread Alex Velenko

Hi,
This patch introduces vreinterpret implementation for 64-bit float 
vectors intrinsics and adds testcase for them.

This patch tested on LE or BE with no regressions.

Is this patch ok for stage-1?

Thanks,
Alex

gcc/

2014-02-14  Alex Velenko  

* config/aarch64/aarch64-builtins.c (aarch64_types_su_qualifiers):
Qualifier added.
(aarch64_types_sp_qualifiers): Likewise.
(aarch64_types_us_qualifiers): Likewise.
(aarch64_types_ps_qualifiers): Likewise.
(TYPES_REINTERP_SS): Type macro added.
(TYPES_REINTERP_SU): Likewise.
(TYPES_REINTERP_SP): Likewise.
(TYPES_REINTERP_US): Likewise.
(TYPES_REINTERP_PS): Likewise.
* config/aarch64/aarch64-simd-builtins.def (REINTERP):
Declarations removed.
(REINTERP_SS): Declarations added.
(REINTERP_US): Likewise.
(REINTERP_PS): Likewise.
(REINTERP_SU): Likewise.
(REINTERP_SP): Likewise.
* config/aarch64/arm_neon.h (vreinterpret_p8_f64): Implemented.
(vreinterpretq_p8_f64): Likewise.
(vreinterpret_p16_f64): Likewise.
(vreinterpretq_p16_f64): Likewise.
(vreinterpret_f32_f64): Likewise.
(vreinterpretq_f32_f64): Likewise.
(vreinterpret_f64_f32): Likewise.
(vreinterpret_f64_p8): Likewise.
(vreinterpret_f64_p16): Likewise.
(vreinterpret_f64_s8): Likewise.
(vreinterpret_f64_s16): Likewise.
(vreinterpret_f64_s32): Likewise.
(vreinterpret_f64_s64): Likewise.
(vreinterpret_f64_u8): Likewise.
(vreinterpret_f64_u16): Likewise.
(vreinterpret_f64_u32): Likewise.
(vreinterpret_f64_u64): Likewise.
(vreinterpretq_f64_f32): Likewise.
(vreinterpretq_f64_p8): Likewise.
(vreinterpretq_f64_p16): Likewise.
(vreinterpretq_f64_s8): Likewise.
(vreinterpretq_f64_s16): Likewise.
(vreinterpretq_f64_s32): Likewise.
(vreinterpretq_f64_s64): Likewise.
(vreinterpretq_f64_u8): Likewise.
(vreinterpretq_f64_u16): Likewise.
(vreinterpretq_f64_u32): Likewise.
(vreinterpretq_f64_u64): Likewise.
(vreinterpret_s64_f64): Likewise.
(vreinterpretq_s64_f64): Likewise.
(vreinterpret_u64_f64): Likewise.
(vreinterpretq_u64_f64): Likewise.
(vreinterpret_s8_f64): Likewise.
(vreinterpretq_s8_f64): Likewise.
(vreinterpret_s16_f64): Likewise.
(vreinterpretq_s16_f64): Likewise.
(vreinterpret_s32_f64): Likewise.
(vreinterpretq_s32_f64): Likewise.
(vreinterpret_u8_f64): Likewise.
(vreinterpretq_u8_f64): Likewise.
(vreinterpret_u16_f64): Likewise.
(vreinterpretq_u16_f64): Likewise.
(vreinterpret_u32_f64): Likewise.
(vreinterpretq_u32_f64): Likewise.

gcc/testsuite/

2014-02-14  Alex Velenko  

* gcc.target/aarch64/vreinterpret_f64_1.c: new_testcase
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 5e0e9b94653deb1530955d62d9842c39da95058a..0485447d266fd7542d66f01f2d4d4cbc37177079 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -147,6 +147,23 @@ aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned };
 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
 #define TYPES_CREATE (aarch64_types_unop_qualifiers)
+#define TYPES_REINTERP_SS (aarch64_types_unop_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unop_su_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_unsigned };
+#define TYPES_REINTERP_SU (aarch64_types_unop_su_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unop_sp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_poly };
+#define TYPES_REINTERP_SP (aarch64_types_unop_sp_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unop_us_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+= { qualifier_unsigned, qualifier_none };
+#define TYPES_REINTERP_US (aarch64_types_unop_us_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unop_ps_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+= { qualifier_poly, qualifier_none };
+#define TYPES_REINTERP_PS (aarch64_types_unop_ps_qualifiers)
 static enum aarch64_type_qualifiers
 aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 8a3d7ecbbfc7743310da3f46a03f42a524302c9f..82aceedb4ec3c639df504aaeff9a54a174b6acf8 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -51,6 +51,28 @@
   VAR1 (GETLANE, get_lane, 0, di)
   BUILTIN_VALL (GETLANE, be_checked_get_lane, 0)
 
+  VAR1 (REINTERP_SS, reinterpretdi, 0, df)
+  VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
+  VAR1

Re: [AArch64] 64-bit float vreinterpret implemention

2014-02-28 Thread Alex Velenko

On 25/02/14 18:15, Richard Henderson wrote:

On 02/25/2014 09:02 AM, Alex Velenko wrote:

+(define_expand "aarch64_reinterpretdf"
+  [(match_operand:DF 0 "register_operand" "")
+   (match_operand:VD_RE 1 "register_operand" "")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_reinterpret (operands[0], operands[1]);
+  DONE;
+})


I believe you want to implement these in aarch64_fold_builtin to fold to a
VIEW_CONVERT_EXPR.  No sense in leaving these opaque until rtl expansion.


r~



Hi Richard,
Thank you for your suggestion. Attached is a patch that includes
implementation of your proposition. A testsuite was run on LE and BE
compilers with no regressions.

Here is the description of the patch:

This patch introduces vreinterpret implementation for vectors with 
64-bit float lanes and adds testcase for those intrinsics.


Thanks,
Alex

gcc/

2014-02-28  Alex Velenko  

* config/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed.
(aarch64_types_signed_unsigned_qualifiers): Qualifier added.
(aarch64_types_signed_poly_qualifiers): Likewise.
(aarch64_types_unsigned_signed_qualifiers): Likewise.
(aarch64_types_poly_signed_qualifiers): Likewise.
(TYPES_REINTERP_SS): Type macro added.
(TYPES_REINTERP_SU): Likewise.
(TYPES_REINTERP_SP): Likewise.
(TYPES_REINTERP_US): Likewise.
(TYPES_REINTERP_PS): Likewise.
(aarch64_fold_builtin): New expression folding added.
* config/aarch64/aarch64-simd-builtins.def (REINTERP):
Declarations removed.
(REINTERP_SS): Declarations added.
(REINTERP_US): Likewise.
(REINTERP_PS): Likewise.
(REINTERP_SU): Likewise.
(REINTERP_SP): Likewise.
* config/aarch64/arm_neon.h (vreinterpret_p8_f64): Implemented.
(vreinterpretq_p8_f64): Likewise.
(vreinterpret_p16_f64): Likewise.
(vreinterpretq_p16_f64): Likewise.
(vreinterpret_f32_f64): Likewise.
(vreinterpretq_f32_f64): Likewise.
(vreinterpret_f64_f32): Likewise.
(vreinterpret_f64_p8): Likewise.
(vreinterpret_f64_p16): Likewise.
(vreinterpret_f64_s8): Likewise.
(vreinterpret_f64_s16): Likewise.
(vreinterpret_f64_s32): Likewise.
(vreinterpret_f64_s64): Likewise.
(vreinterpret_f64_u8): Likewise.
(vreinterpret_f64_u16): Likewise.
(vreinterpret_f64_u32): Likewise.
(vreinterpret_f64_u64): Likewise.
(vreinterpretq_f64_f32): Likewise.
(vreinterpretq_f64_p8): Likewise.
(vreinterpretq_f64_p16): Likewise.
(vreinterpretq_f64_s8): Likewise.
(vreinterpretq_f64_s16): Likewise.
(vreinterpretq_f64_s32): Likewise.
(vreinterpretq_f64_s64): Likewise.
(vreinterpretq_f64_u8): Likewise.
(vreinterpretq_f64_u16): Likewise.
(vreinterpretq_f64_u32): Likewise.
(vreinterpretq_f64_u64): Likewise.
(vreinterpret_s64_f64): Likewise.
(vreinterpretq_s64_f64): Likewise.
(vreinterpret_u64_f64): Likewise.
(vreinterpretq_u64_f64): Likewise.
(vreinterpret_s8_f64): Likewise.
(vreinterpretq_s8_f64): Likewise.
(vreinterpret_s16_f64): Likewise.
(vreinterpretq_s16_f64): Likewise.
(vreinterpret_s32_f64): Likewise.
(vreinterpretq_s32_f64): Likewise.
(vreinterpret_u8_f64): Likewise.
(vreinterpretq_u8_f64): Likewise.
(vreinterpret_u16_f64): Likewise.
(vreinterpretq_u16_f64): Likewise.
(vreinterpret_u32_f64): Likewise.
(vreinterpretq_u32_f64): Likewise.

gcc/testsuite/

2014-02-28  Alex Velenko  

* gcc.target/aarch64/vreinterpret_f64_1.c: new_testcase
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 5e0e9b94653deb1530955d62d9842c39da95058a..8241f918e3fcfb71144daf1c873ba1ed481a4385 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -147,6 +147,23 @@ aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned };
 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
 #define TYPES_CREATE (aarch64_types_unop_qualifiers)
+#define TYPES_REINTERP_SS (aarch64_types_unop_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unop_su_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_unsigned };
+#define TYPES_REINTERP_SU (aarch64_types_unop_su_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unop_sp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_poly };
+#define TYPES_REINTERP_SP (aarch64_types_unop_sp_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_unop_us_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_none };
+#define TYPES_REINTERP_US (aarch64_types_unop_us_qualifiers)
+static enum aarch64_type_qualif

[AArch64] VDUP Testcases

2014-03-14 Thread Alex Velenko

Hi,
This patch adds vdup intrinsic testcases for AArch64. those testcases
are nice to have, as it allows to reason about vdup consistency for
both LE and BE compiler flavors.

This patch covers following intrinsics:

vdup_lane_f32
vdup_lane_s[8,16]
vdup_lane_s[32,64]
vdup_n_[p,s,u][8,16]
vdup_n_[s,u][32,64]

vdupb_lane_[s,u]8
vduph_lane_[s,u]16
vdupd_lane_[f,s,u]64
vdups_lane_[f,s,u]32

vdupq_lane_[f,s][32,64]
vdupq_lane_s[8,16]
vdup[q]_n_f32
vdupq_n_f64
vdupq_n_[s,p,u][8,16]
vdupq_n_[s,u][32,64]

Is it OK for trunk?

Kind regards,
Alex

gcc/testsuite/

2014-03-14  Alex Velenko  

* gcc.target/aarch64/vdup_lane_1.c: New testcase.
* gcc.target/aarch64/vdup_lane_2.c: New testcase.
* gcc.target/aarch64/vdup_n_1.c: New testcase.
diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
new file mode 100644
index ..4582471c8aad3d855eb33494ac01a62c87978ca9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
@@ -0,0 +1,430 @@
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "--save-temps -O1" } */
+
+#include 
+
+extern void abort (void);
+
+float32x2_t __attribute__ ((noinline))
+wrap_vdup_lane_f32_0 (float32x2_t a)
+{
+  return vdup_lane_f32 (a, 0);
+}
+
+float32x2_t __attribute__ ((noinline))
+wrap_vdup_lane_f32_1 (float32x2_t a)
+{
+  return vdup_lane_f32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_f32 ()
+{
+  float32x2_t a;
+  float32x2_t b;
+  int i;
+  float32_t c[2] = { 0.0 , 3.14 };
+  float32_t d[2];
+
+  a = vld1_f32 (c);
+  b = wrap_vdup_lane_f32_0 (a);
+  vst1_f32 (d, b);
+  for (i = 0; i < 2; i++)
+if (c[0] != d[i])
+  return 1;
+
+  b = wrap_vdup_lane_f32_1 (a);
+  vst1_f32 (d, b);
+  for (i = 0; i < 2; i++)
+if (c[1] != d[i])
+  return 1;
+  return 0;
+}
+
+float32x4_t __attribute__ ((noinline))
+wrap_vdupq_lane_f32_0 (float32x2_t a)
+{
+  return vdupq_lane_f32 (a, 0);
+}
+
+float32x4_t __attribute__ ((noinline))
+wrap_vdupq_lane_f32_1 (float32x2_t a)
+{
+  return vdupq_lane_f32 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_f32 ()
+{
+  float32x2_t a;
+  float32x4_t b;
+  int i;
+  float32_t c[2] = { 0.0 , 3.14 };
+  float32_t d[4];
+
+  a = vld1_f32 (c);
+  b = wrap_vdupq_lane_f32_0 (a);
+  vst1q_f32 (d, b);
+  for (i = 0; i < 4; i++)
+if (c[0] != d[i])
+  return 1;
+
+  b = wrap_vdupq_lane_f32_1 (a);
+  vst1q_f32 (d, b);
+  for (i = 0; i < 4; i++)
+if (c[1] != d[i])
+  return 1;
+  return 0;
+}
+
+int8x8_t __attribute__ ((noinline))
+wrap_vdup_lane_s8_0 (int8x8_t a)
+{
+  return vdup_lane_s8 (a, 0);
+}
+
+int8x8_t __attribute__ ((noinline))
+wrap_vdup_lane_s8_1 (int8x8_t a)
+{
+  return vdup_lane_s8 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_s8 ()
+{
+  int8x8_t a;
+  int8x8_t b;
+  int i;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[8];
+
+  a = vld1_s8 (c);
+  b = wrap_vdup_lane_s8_0 (a);
+  vst1_s8 (d, b);
+  for (i = 0; i < 8; i++)
+if (c[0] != d[i])
+  return 1;
+
+  b = wrap_vdup_lane_s8_1 (a);
+  vst1_s8 (d, b);
+  for (i = 0; i < 8; i++)
+if (c[1] != d[i])
+  return 1;
+  return 0;
+}
+
+int8x16_t __attribute__ ((noinline))
+wrap_vdupq_lane_s8_0 (int8x8_t a)
+{
+  return vdupq_lane_s8 (a, 0);
+}
+
+int8x16_t __attribute__ ((noinline))
+wrap_vdupq_lane_s8_1 (int8x8_t a)
+{
+  return vdupq_lane_s8 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdupq_lane_s8 ()
+{
+  int8x8_t a;
+  int8x16_t b;
+  int i;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[16];
+
+  a = vld1_s8 (c);
+  b = wrap_vdupq_lane_s8_0 (a);
+  vst1q_s8 (d, b);
+  for (i = 0; i < 16; i++)
+if (c[0] != d[i])
+  return 1;
+
+  b = wrap_vdupq_lane_s8_1 (a);
+  vst1q_s8 (d, b);
+  for (i = 0; i < 16; i++)
+if (c[1] != d[i])
+  return 1;
+  return 0;
+}
+
+int16x4_t __attribute__ ((noinline))
+wrap_vdup_lane_s16_0 (int16x4_t a)
+{
+  return vdup_lane_s16 (a, 0);
+}
+
+int16x4_t __attribute__ ((noinline))
+wrap_vdup_lane_s16_1 (int16x4_t a)
+{
+  return vdup_lane_s16 (a, 1);
+}
+
+int __attribute__ ((noinline))
+test_vdup_lane_s16 ()
+{
+  int16x4_t a;
+  int16x4_t b;
+  int i;
+  /* Only two first cases are interesting.  */
+  int16_t c[4] = { 0, 1, 2, 3 };
+  int16_t d[4];
+
+  a = vld1_s16 (c);
+  b = wrap_vdup_lane_s16_0 (a);
+  vst1_s16 (d, b);
+  for (i = 0; i < 4; i++)
+if (c[0] != d[i])
+  return 1;
+
+  b = wrap_vdup_lane_s16_1 (a);
+  vst1_s16 (d, b);
+  for (i = 0; i < 4; i++)
+if (c[1] != d[i])
+  return 1;
+  return 0;
+}
+
+int16x8_t __attribute__ ((noinline))
+wrap_vdupq_lane_s16_0 (int16x4_t a)
+{
+  return vdupq_lane_s16 (a, 0);
+}
+
+int16x8_t __attribute__ ((noinline))
+wrap_vdupq_lane_s16_1 (int16x4_t a)
+{
+  return vdupq_lane_s16 (a