Eric, I'm sure you have noticed this, but the Sparc target test "combined-1.c" fails for some time on 32-bit because of how float arguments are passed in the 32-bit SPARC ABI.
Since they are passed in integer registers, the vectorizer does the initial logical operations using non-VIS instructions, then pops them into the FPU regs to use the VIS vector addition and subtraction. Because of this some of the scan-assembler directives in that test miss. Would you mind if I added a hack, like we use in the other test cases and the one I added here, to force the VIS operations to be used on 32-bit? We could either pass the vectors as pointer args, or use the trick where we return the vector values from extern functions. Any preference? Committed to trunk. gcc/ * config/sparc/sparc.c (sparc_vis_init_builtins): Add explicit builtins for VIS vector addition and subtraction. * config/sparc/visintrin.h (__vis_fpadd16, __vis_fpadd16s, __vis_fpadd32, __vis_fpadd32s, __vis_fpsub16, __vis_fpsub16s, __vis_fpsub32, __vis_fpsub32s): New. * doc/extend.texi: Document new VIS intrinsics. gcc/testsuite/ * gcc.target/sparc/fpaddsubi.c: New test. --- gcc/ChangeLog | 7 +++ gcc/config/sparc/sparc.c | 21 ++++++++++ gcc/config/sparc/visintrin.h | 57 +++++++++++++++++++++++++++ gcc/doc/extend.texi | 10 +++++ gcc/testsuite/ChangeLog | 2 + gcc/testsuite/gcc.target/sparc/fpaddsubi.c | 58 ++++++++++++++++++++++++++++ 6 files changed, 155 insertions(+), 0 deletions(-) create mode 100644 gcc/testsuite/gcc.target/sparc/fpaddsubi.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ecdb26b..a3dd883 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -24,6 +24,13 @@ * config/sparc/sparc-protos.h (sparc_target_macros): Declare. * config/sparc/sparc.h (TARGE_CPU_CPP_BUILTINS): Call it. + * config/sparc/sparc.c (sparc_vis_init_builtins): Add explicit + builtins for VIS vector addition and subtraction. + * config/sparc/visintrin.h (__vis_fpadd16, __vis_fpadd16s, + __vis_fpadd32, __vis_fpadd32s, __vis_fpsub16, __vis_fpsub16s, + __vis_fpsub32, __vis_fpsub32s): New. + * doc/extend.texi: Document new VIS intrinsics. + 2011-09-26 Georg-Johann Lay <a...@gjlay.de> * config/avr/avr.md (peephole casesi+2): Use -1 instead of 65536. diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index d1d8355..44d7f20 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -9173,6 +9173,7 @@ sparc_vis_init_builtins (void) tree v4hi = build_vector_type (intHI_type_node, 4); tree v2hi = build_vector_type (intHI_type_node, 2); tree v2si = build_vector_type (intSI_type_node, 2); + tree v1si = build_vector_type (intSI_type_node, 1); tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0); tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0); @@ -9186,6 +9187,8 @@ sparc_vis_init_builtins (void) tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0); tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0); tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0); + tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0); + tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0); tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node, v8qi, v8qi, intDI_type_node, 0); @@ -9350,6 +9353,24 @@ sparc_vis_init_builtins (void) def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis, si_ftype_v2si_v2si); } + + /* Addition and subtraction. */ + def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3, + v4hi_ftype_v4hi_v4hi); + def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3, + v2hi_ftype_v2hi_v2hi); + def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3, + v2si_ftype_v2si_v2si); + def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addsi3, + v1si_ftype_v1si_v1si); + def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3, + v4hi_ftype_v4hi_v4hi); + def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3, + v2hi_ftype_v2hi_v2hi); + def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3, + v2si_ftype_v2si_v2si); + def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subsi3, + v1si_ftype_v1si_v1si); } /* Handle TARGET_EXPAND_BUILTIN target hook. diff --git a/gcc/config/sparc/visintrin.h b/gcc/config/sparc/visintrin.h index d37bd95..eb2b4ec 100644 --- a/gcc/config/sparc/visintrin.h +++ b/gcc/config/sparc/visintrin.h @@ -25,6 +25,7 @@ #define _VISINTRIN_H_INCLUDED typedef int __v2si __attribute__ ((__vector_size__ (8))); +typedef int __v1si __attribute__ ((__vector_size__ (4))); typedef short __v4hi __attribute__ ((__vector_size__ (8))); typedef short __v2hi __attribute__ ((__vector_size__ (4))); typedef unsigned char __v8qi __attribute__ ((__vector_size__ (8))); @@ -276,4 +277,60 @@ __vis_fcmpeq32 (__v2si __A, __v2si __B) return __builtin_vis_fcmpeq32 (__A, __B); } +extern __inline __v4hi +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__vis_fpadd16 (__v4hi __A, __v4hi __B) +{ + return __builtin_vis_fpadd16 (__A, __B); +} + +extern __inline __v2hi +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__vis_fpadd16s (__v2hi __A, __v2hi __B) +{ + return __builtin_vis_fpadd16s (__A, __B); +} + +extern __inline __v2si +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__vis_fpadd32 (__v2si __A, __v2si __B) +{ + return __builtin_vis_fpadd32 (__A, __B); +} + +extern __inline __v1si +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__vis_fpadd32s (__v1si __A, __v1si __B) +{ + return __builtin_vis_fpadd32s (__A, __B); +} + +extern __inline __v4hi +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__vis_fpsub16 (__v4hi __A, __v4hi __B) +{ + return __builtin_vis_fpsub16 (__A, __B); +} + +extern __inline __v2hi +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__vis_fpsub16s (__v2hi __A, __v2hi __B) +{ + return __builtin_vis_fpsub16s (__A, __B); +} + +extern __inline __v2si +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__vis_fpsub32 (__v2si __A, __v2si __B) +{ + return __builtin_vis_fpsub32 (__A, __B); +} + +extern __inline __v1si +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__vis_fpsub32s (__v1si __A, __v1si __B) +{ + return __builtin_vis_fpsub32s (__A, __B); +} + #endif /* _VISINTRIN_H_INCLUDED */ diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index e9d0bc7..195fa8c 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -12929,6 +12929,7 @@ the SPARC Visual Instruction Set (VIS). When you use the @option{-mvis} switch, the VIS extension is exposed as the following built-in functions: @smallexample +typedef int v1si __attribute__ ((vector_size (4))); typedef int v2si __attribute__ ((vector_size (8))); typedef short v4hi __attribute__ ((vector_size (8))); typedef short v2hi __attribute__ ((vector_size (4))); @@ -12977,6 +12978,15 @@ long __builtin_vis_fcmpgt16 (v4hi, v4hi); long __builtin_vis_fcmpgt32 (v2si, v2si); long __builtin_vis_fcmpeq16 (v4hi, v4hi); long __builtin_vis_fcmpeq32 (v2si, v2si); + +v4hi __builtin_vis_fpadd16 (v4hi, v4hi); +v2hi __builtin_vis_fpadd16s (v2hi, v2hi); +v2si __builtin_vis_fpadd32 (v2si, v2si); +v1si __builtin_vis_fpadd32s (v1si, v1si); +v4hi __builtin_vis_fpsub16 (v4hi, v4hi); +v2hi __builtin_vis_fpsub16s (v2hi, v2hi); +v2si __builtin_vis_fpsub32 (v2si, v2si); +v1si __builtin_vis_fpsub32s (v1si, v1si); @end smallexample @node SPU Built-in Functions diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ebc9385..b430baf 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -14,6 +14,8 @@ * gcc.target/sparc/edge.c: Update for new return types. * gcc.target/sparc/fcmp.c: Likewise. + * gcc.target/sparc/fpaddsubi.c: New test. + 2011-09-26 Janus Weil <ja...@gcc.gnu.org> PR fortran/50515 diff --git a/gcc/testsuite/gcc.target/sparc/fpaddsubi.c b/gcc/testsuite/gcc.target/sparc/fpaddsubi.c new file mode 100644 index 0000000..a36108e --- /dev/null +++ b/gcc/testsuite/gcc.target/sparc/fpaddsubi.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mcpu=ultrasparc -mvis" } */ +typedef int __v2si __attribute__((vector_size(8))); +typedef int __v1si __attribute__((vector_size(4))); +typedef short __v4hi __attribute__((vector_size(8))); +typedef short __v2hi __attribute__((vector_size(4))); + +extern __v1si foo_x (void); +extern __v1si foo_y (void); + +__v4hi test_fpadd16 (__v4hi x, __v4hi y) +{ + return __builtin_vis_fpadd16 (x, y); +} + +__v2hi test_fpadd16s (__v2hi x, __v2hi y) +{ + return __builtin_vis_fpadd16s (x, y); +} + +__v4hi test_fpsub16 (__v4hi x, __v4hi y) +{ + return __builtin_vis_fpsub16 (x, y); +} + +__v2hi test_fpsub16s (__v2hi x, __v2hi y) +{ + return __builtin_vis_fpsub16s (x, y); +} + +__v2si test_fpadd32 (__v2si x, __v2si y) +{ + return __builtin_vis_fpadd32 (x, y); +} + +__v1si test_fpadd32s (void) +{ + return __builtin_vis_fpadd32s (foo_x (), foo_y ()); +} + +__v2si test_fpsub32 (__v2si x, __v2si y) +{ + return __builtin_vis_fpsub32 (x, y); +} + +__v1si test_fpsub32s (__v1si x, __v1si y) +{ + return __builtin_vis_fpsub32s (foo_x (), foo_y ()); +} + +/* { dg-final { scan-assembler "fpadd16\t%" } } */ +/* { dg-final { scan-assembler "fpadd16s\t%" } } */ +/* { dg-final { scan-assembler "fpsub16\t%" } } */ +/* { dg-final { scan-assembler "fpsub16s\t%" } } */ +/* { dg-final { scan-assembler "fpadd32\t%" } } */ +/* { dg-final { scan-assembler "fpadd32s\t%" } } */ +/* { dg-final { scan-assembler "fpsub32\t%" } } */ +/* { dg-final { scan-assembler "fpsub32s\t%" } } */ -- 1.7.6.401.g6a319