Hi, This patch adds built-in functions for vec_cmpge and vec_cmple required by the ELFv2 ABI but not yet present in gcc. These make use of the existing patterns for gt:VEC_I and gtu:VEC_I, applying the "not" operator and reversing the order of operands as needed. The result is generation of a vmpgt[su][bhwd] instruction followed by an xxlnor.
Since it is likely that a vec_cmp[gl]e will be followed by a vec_sel, it will be important to optimize the xxlnor/xxsel* to remove the xxlnor by reversing the sense of the select. A separate patch will follow to add this pattern to simplify-rtx.c. Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions. Ok for trunk? Thanks, Bill [gcc] 2015-06-29 Bill Schmidt <wschm...@linux.vnet.ibm.com> * config/rs6000/rs6000-builtin.def (CMPGE_16QI): New built-in definition. (CMPGE_8HI): Likewise. (CMPGE_4SI): Likewise. (CMPGE_2DI): Likewise. (CMPGE_U16QI): Likewise. (CMPGE_U8HI): Likewise. (CMPGE_U4SI): Likewise. (CMPGE_U2DI): Likewise. (CMPLE_16QI): Likewise. (CMPLE_8HI): Likewise. (CMPLE_4SI): Likewise. (CMPLE_2DI): Likewise. (CMPLE_U16QI): Likewise. (CMPLE_U8HI): Likewise. (CMPLE_U4SI): Likewise. (CMPLE_U2DI): Likewise. * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add overloads for ALTIVEC_BUILTIN_VEC_CMPGE and ALTIVEC_BUILTIN_VEC_CMPLE. * config/rs6000/vector.md (vector_ge<mode>): Restrict to floating-point vector modes. (vector_nlt<mode>): New define_expand. (vector_nltu<mode>): Likewise. (vector_ngt<mode>): Likewise. (vector_ngtu<mode>): Likewise. [gcc/testsuite] 2015-06-29 Bill Schmidt <wschm...@linux.vnet.ibm.com> * gcc.target/powerpc/vec-cmp.c: New test. Index: gcc/config/rs6000/rs6000-builtin.def =================================================================== --- gcc/config/rs6000/rs6000-builtin.def (revision 224924) +++ gcc/config/rs6000/rs6000-builtin.def (working copy) @@ -1284,6 +1284,24 @@ BU_VSX_2 (XVCVUXDDP_SCALE, "xvcvuxddp_scale", C BU_VSX_2 (XVCVDPSXDS_SCALE, "xvcvdpsxds_scale", CONST, vsx_xvcvdpsxds_scale) BU_VSX_2 (XVCVDPUXDS_SCALE, "xvcvdpuxds_scale", CONST, vsx_xvcvdpuxds_scale) +BU_VSX_2 (CMPGE_16QI, "cmpge_16qi", CONST, vector_nltv16qi) +BU_VSX_2 (CMPGE_8HI, "cmpge_8hi", CONST, vector_nltv8hi) +BU_VSX_2 (CMPGE_4SI, "cmpge_4si", CONST, vector_nltv4si) +BU_VSX_2 (CMPGE_2DI, "cmpge_2di", CONST, vector_nltv2di) +BU_VSX_2 (CMPGE_U16QI, "cmpge_u16qi", CONST, vector_nltuv16qi) +BU_VSX_2 (CMPGE_U8HI, "cmpge_u8hi", CONST, vector_nltuv8hi) +BU_VSX_2 (CMPGE_U4SI, "cmpge_u4si", CONST, vector_nltuv4si) +BU_VSX_2 (CMPGE_U2DI, "cmpge_u2di", CONST, vector_nltuv2di) + +BU_VSX_2 (CMPLE_16QI, "cmple_16qi", CONST, vector_ngtv16qi) +BU_VSX_2 (CMPLE_8HI, "cmple_8hi", CONST, vector_ngtv8hi) +BU_VSX_2 (CMPLE_4SI, "cmple_4si", CONST, vector_ngtv4si) +BU_VSX_2 (CMPLE_2DI, "cmple_2di", CONST, vector_ngtv2di) +BU_VSX_2 (CMPLE_U16QI, "cmple_u16qi", CONST, vector_ngtuv16qi) +BU_VSX_2 (CMPLE_U8HI, "cmple_u8hi", CONST, vector_ngtuv8hi) +BU_VSX_2 (CMPLE_U4SI, "cmple_u4si", CONST, vector_ngtuv4si) +BU_VSX_2 (CMPLE_U2DI, "cmple_u2di", CONST, vector_ngtuv2di) + /* VSX abs builtin functions. */ BU_VSX_A (XVABSDP, "xvabsdp", CONST, absv2df2) BU_VSX_A (XVNABSDP, "xvnabsdp", CONST, vsx_nabsv2df2) Index: gcc/config/rs6000/rs6000-c.c =================================================================== --- gcc/config/rs6000/rs6000-c.c (revision 224924) +++ gcc/config/rs6000/rs6000-c.c (working copy) @@ -1096,6 +1096,26 @@ const struct altivec_builtin_types altivec_overloa RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_XVCMPGEDP, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPGE, VSX_BUILTIN_CMPGE_U2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0}, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTUB, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSB, @@ -1146,6 +1166,26 @@ const struct altivec_builtin_types altivec_overloa RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_XVCMPGEDP, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0}, + { ALTIVEC_BUILTIN_VEC_CMPLE, VSX_BUILTIN_CMPLE_U2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0}, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTUB, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSB, Index: gcc/config/rs6000/vector.md =================================================================== --- gcc/config/rs6000/vector.md (revision 224924) +++ gcc/config/rs6000/vector.md (working copy) @@ -446,12 +446,25 @@ "") (define_expand "vector_ge<mode>" - [(set (match_operand:VEC_C 0 "vlogical_operand" "") - (ge:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "") - (match_operand:VEC_C 2 "vlogical_operand" "")))] + [(set (match_operand:VEC_F 0 "vlogical_operand" "") + (ge:VEC_F (match_operand:VEC_F 1 "vlogical_operand" "") + (match_operand:VEC_F 2 "vlogical_operand" "")))] "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +; >= for integer vectors: swap operands and apply not-greater-than +(define_expand "vector_nlt<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gt:VEC_I (match_operand:VEC_I 2 "vlogical_operand" "") + (match_operand:VEC_I 1 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + (define_expand "vector_gtu<mode>" [(set (match_operand:VEC_I 0 "vint_operand" "") (gtu:VEC_I (match_operand:VEC_I 1 "vint_operand" "") @@ -459,6 +472,19 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +; >= for integer vectors: swap operands and apply not-greater-than +(define_expand "vector_nltu<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gtu:VEC_I (match_operand:VEC_I 2 "vlogical_operand" "") + (match_operand:VEC_I 1 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + (define_expand "vector_geu<mode>" [(set (match_operand:VEC_I 0 "vint_operand" "") (geu:VEC_I (match_operand:VEC_I 1 "vint_operand" "") @@ -466,6 +492,31 @@ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" "") +; <= for integer vectors: apply not-greater-than +(define_expand "vector_ngt<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gt:VEC_I (match_operand:VEC_I 1 "vlogical_operand" "") + (match_operand:VEC_I 2 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + +(define_expand "vector_ngtu<mode>" + [(set (match_operand:VEC_I 3 "vlogical_operand" "") + (gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand" "") + (match_operand:VEC_I 2 "vlogical_operand" ""))) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (not:VEC_I (match_dup 3)))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)" + " +{ + operands[3] = gen_reg_rtx_and_attrs (operands[0]); +}") + (define_insn_and_split "*vector_uneq<mode>" [(set (match_operand:VEC_F 0 "vfloat_operand" "") (uneq:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") Index: gcc/testsuite/gcc.target/powerpc/vec-cmp.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vec-cmp.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vec-cmp.c (working copy) @@ -0,0 +1,113 @@ +/* { dg-do compile { target { powerpc64*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc64*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-O2 -mcpu=power8" } */ +/* { dg-final { scan-assembler-times "vcmpgtsb" 2 } } */ +/* { dg-final { scan-assembler-times "vcmpgtub" 2 } } */ +/* { dg-final { scan-assembler-times "vcmpgtsh" 2 } } */ +/* { dg-final { scan-assembler-times "vcmpgtuh" 2 } } */ +/* { dg-final { scan-assembler-times "vcmpgtsw" 2 } } */ +/* { dg-final { scan-assembler-times "vcmpgtuw" 2 } } */ +/* { dg-final { scan-assembler-times "vcmpgtsd" 2 } } */ +/* { dg-final { scan-assembler-times "vcmpgtud" 2 } } */ +/* { dg-final { scan-assembler-times "xxlnor" 16 } } */ + +#include <altivec.h> + +vector bool char +cmple_sc (vector signed char x, vector signed char y) +{ + return vec_cmple (x, y); +} + +vector bool char +cmple_uc (vector unsigned char x, vector unsigned char y) +{ + return vec_cmple (x, y); +} + +vector bool short +cmple_ss (vector signed short x, vector signed short y) +{ + return vec_cmple (x, y); +} + +vector bool short +cmple_us (vector unsigned short x, vector unsigned short y) +{ + return vec_cmple (x, y); +} + +vector bool int +cmple_si (vector signed int x, vector signed int y) +{ + return vec_cmple (x, y); +} + +vector bool int +cmple_ui (vector unsigned int x, vector unsigned int y) +{ + return vec_cmple (x, y); +} + +vector bool long long +cmple_sl (vector signed long long x, vector signed long long y) +{ + return vec_cmple (x, y); +} + +vector bool long long +cmple_ul (vector unsigned long long x, vector unsigned long long y) +{ + return vec_cmple (x, y); +} + +vector bool char +cmpge_sc (vector signed char x, vector signed char y) +{ + return vec_cmpge (x, y); +} + +vector bool char +cmpge_uc (vector unsigned char x, vector unsigned char y) +{ + return vec_cmpge (x, y); +} + +vector bool short +cmpge_ss (vector signed short x, vector signed short y) +{ + return vec_cmpge (x, y); +} + +vector bool short +cmpge_us (vector unsigned short x, vector unsigned short y) +{ + return vec_cmpge (x, y); +} + +vector bool int +cmpge_si (vector signed int x, vector signed int y) +{ + return vec_cmpge (x, y); +} + +vector bool int +cmpge_ui (vector unsigned int x, vector unsigned int y) +{ + return vec_cmpge (x, y); +} + +vector bool long long +cmpge_sl (vector signed long long x, vector signed long long y) +{ + return vec_cmpge (x, y); +} + +vector bool long long +cmpge_ul (vector unsigned long long x, vector unsigned long long y) +{ + return vec_cmpge (x, y); +} +