[PATCH] RS6000 Add testlsbb by Byte operations Hi, Add support for new instructions to test LSB by Byte. [v2] Additional updates per feedback. Including adding _all to the internal name, typos and cosmetic fixups throughout, extraneous -mvsx removed from tests. V2 has completed tests on powerpc64le-unknown-linux-gnu Power8LE, with other regression tests still in progress on some other powerpc platforms. OK for trunk?
Thanks, -Will [gcc] 2020-07-29 Will Schmidt <will_schm...@vnet.ibm.com> * config/rs6000/altivec.h (vec_test_lsbb_all_ones): New define. (vec_test_lsbb_all_zeros): New define. * config/rs6000/rs6000-builtin.def (BU_P10_VSX_1): New built-in handling macro. (XVTLSBB_ZEROS, XVTLSBB_ONES): New builtin defines. (xvtlsbb_all_zeros, xvtlsbb_all_ones): New builtin overloads. * config/rs6000/rs6000-call.c (P10_BUILTIN_VEC_XVTLSBB_ZEROS, P10_BUILTIN_VEC_XVTLSBB_ONES): New altivec_builtin_types entries. * config/rs6000/rs6000.md (UNSPEC_XVTLSBB): New unspec. * config/rs6000/vsx.md (*xvtlsbb_internal): New instruction define. (xvtlsbbo, xvtlsbbz): New instruction expands. [testsuite] * testsuite/gcc.target/powerpc/lsbb-runnable.c: New test. * testsuite/gcc.target/powerpc/lsbb.c: New test. diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 6c43124..119fb1c 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -491,10 +491,13 @@ #define vec_cmpnez __builtin_vec_vcmpnez #define vec_cntlz_lsbb __builtin_vec_vclzlsbb #define vec_cnttz_lsbb __builtin_vec_vctzlsbb +#define vec_test_lsbb_all_ones __builtin_vec_xvtlsbb_all_ones +#define vec_test_lsbb_all_zeros __builtin_vec_xvtlsbb_all_zeros + #define vec_xlx __builtin_vec_vextulx #define vec_xrx __builtin_vec_vexturx #endif /* Predicates. diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index f703755..38f859f 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1060,10 +1060,18 @@ RS6000_BTM_P10, /* MASK */ \ (RS6000_BTC_ ## ATTR /* ATTR */ \ | RS6000_BTC_QUATERNARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +#define BU_P10_VSX_1(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_1 (P10_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + RS6000_BTM_P10, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_UNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + #define BU_P10_OVERLOAD_1(ENUM, NAME) \ RS6000_BUILTIN_1 (P10_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ "__builtin_vec_" NAME, /* NAME */ \ RS6000_BTM_P10, /* MASK */ \ (RS6000_BTC_OVERLOADED /* ATTR */ \ @@ -2734,10 +2742,13 @@ BU_P10V_1 (VSTRIHL, "vstrihl", CONST, vstril_v8hi) BU_P10V_1 (VSTRIBR_P, "vstribr_p", CONST, vstrir_p_v16qi) BU_P10V_1 (VSTRIHR_P, "vstrihr_p", CONST, vstrir_p_v8hi) BU_P10V_1 (VSTRIBL_P, "vstribl_p", CONST, vstril_p_v16qi) BU_P10V_1 (VSTRIHL_P, "vstrihl_p", CONST, vstril_p_v8hi) +BU_P10_VSX_1 (XVTLSBB_ZEROS, "xvtlsbb_all_zeros", CONST, xvtlsbbz) +BU_P10_VSX_1 (XVTLSBB_ONES, "xvtlsbb_all_ones", CONST, xvtlsbbo) + BU_P10V_1 (MTVSRBM, "mtvsrbm", CONST, vec_mtvsr_v16qi) BU_P10V_1 (MTVSRHM, "mtvsrhm", CONST, vec_mtvsr_v8hi) BU_P10V_1 (MTVSRWM, "mtvsrwm", CONST, vec_mtvsr_v4si) BU_P10V_1 (MTVSRDM, "mtvsrdm", CONST, vec_mtvsr_v2di) BU_P10V_1 (MTVSRQM, "mtvsrqm", CONST, vec_mtvsr_v1ti) @@ -2769,10 +2780,14 @@ BU_P10_OVERLOAD_3 (EXTRACTH, "extracth") BU_P10_OVERLOAD_1 (VSTRIR, "strir") BU_P10_OVERLOAD_1 (VSTRIL, "stril") BU_P10_OVERLOAD_1 (VSTRIR_P, "strir_p") BU_P10_OVERLOAD_1 (VSTRIL_P, "stril_p") + +BU_P10_OVERLOAD_1 (XVTLSBB_ZEROS, "xvtlsbb_all_zeros") +BU_P10_OVERLOAD_1 (XVTLSBB_ONES, "xvtlsbb_all_ones") + BU_P10_OVERLOAD_1 (MTVSRBM, "mtvsrbm") BU_P10_OVERLOAD_1 (MTVSRHM, "mtvsrhm") BU_P10_OVERLOAD_1 (MTVSRWM, "mtvsrwm") BU_P10_OVERLOAD_1 (MTVSRDM, "mtvsrdm") diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index 5ec3f2c..ece8d76 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -5679,10 +5679,15 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P10_BUILTIN_VEC_VEXTRACTM, P10_BUILTIN_VEXTRACTMD, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, 0, 0 }, { P10_BUILTIN_VEC_VEXTRACTM, P10_BUILTIN_VEXTRACTMQ, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, 0, 0 }, + { P10_BUILTIN_VEC_XVTLSBB_ZEROS, P10_BUILTIN_XVTLSBB_ZEROS, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P10_BUILTIN_VEC_XVTLSBB_ONES, P10_BUILTIN_XVTLSBB_ONES, + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { RS6000_BUILTIN_NONE, RS6000_BUILTIN_NONE, 0, 0, 0, 0 } }; /* Nonzero if we can use a floating-point register to pass this arg. */ #define USE_FP_FOR_ARG_P(CUM,MODE) \ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index b3fcb84..074a1b9 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -75,10 +75,11 @@ UNSPEC_TLSGD UNSPEC_TLSLD UNSPEC_TLS_GET_ADDR UNSPEC_MOVESI_FROM_CR UNSPEC_MOVESI_TO_CR + UNSPEC_XVTLSBB UNSPEC_TLSDTPREL UNSPEC_TLSDTPRELHA UNSPEC_TLSDTPRELLO UNSPEC_TLSGOTDTPREL UNSPEC_TLSTPREL diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index f753771..57da340 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1983,10 +1983,49 @@ (match_dup 2)))] "VECTOR_UNIT_VSX_P (<MODE>mode)" "xvcmpgt<sd>p. %x0,%x1,%x2" [(set_attr "type" "<VStype_simple>")]) +;; xvtlsbb BF,XB +;; Set the CR field BF to indicate if bit 7 of every byte element in VSR[XB] +;; is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE). +(define_insn "*xvtlsbb_internal" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")] + UNSPEC_XVTLSBB))] + "TARGET_POWER10" + "xvtlsbb %0,%x1" + [(set_attr "type" "logical")]) + +;; Vector Test Least Significant Bit by Byte +;; for the implementation of the builtin +;; __builtin_vec_test_lsbb_all_ones +;; int vec_test_lsbb_all_ones (vector unsigned char); +;; and +;; __builtin_vec_test_lsbb_all_zeros +;; int vec_test_lsbb_all_zeros (vector unsigned char); +(define_expand "xvtlsbbo" + [(set (match_dup 2) + (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")] + UNSPEC_XVTLSBB)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (lt:SI (match_dup 2) (const_int 0)))] + "TARGET_POWER10" + { + operands[2] = gen_reg_rtx (CCmode); + }) +(define_expand "xvtlsbbz" + [(set (match_dup 2) + (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")] + UNSPEC_XVTLSBB)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (eq:SI (match_dup 2) (const_int 0)))] + "TARGET_POWER10" + { + operands[2] = gen_reg_rtx (CCmode); + }) + (define_insn "*vsx_ge_<mode>_p" [(set (reg:CC CR6_REGNO) (unspec:CC [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] diff --git a/gcc/testsuite/gcc.target/powerpc/lsbb-runnable.c b/gcc/testsuite/gcc.target/powerpc/lsbb-runnable.c new file mode 100644 index 0000000..7da530c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/lsbb-runnable.c @@ -0,0 +1,65 @@ +/* + Test the least significant bit by byte instruction + xvtlsbb BF,XB + Using the builtins + int vec_test_lsbb_all_zeros (vector unsigned char); + int vec_test_lsbb_all_ones (vector unsigned char); + */ + +/* { dg-do run } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-fno-inline -mdejagnu-cpu=power10 -O2" } */ + +#include <altivec.h> +#include <stdio.h> + +void abort (void); + +#define ITERS 7 +vector char input_vec[ITERS] = { + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, + {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe}, + {0xfe, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9} +}; + +int expected_allzeros_results[ITERS] = {1, 0, 0, 0, 0, 1, 0}; +int expected_allones_results[ITERS] = {0, 1, 0, 0, 1, 0, 0}; + +int test_for_zeros(vector char vc) { + return vec_test_lsbb_all_zeros(vc); +} + +int test_for_ones(vector char vc) { + return vec_test_lsbb_all_ones(vc); +} + +int main () +{ +int allzeros,allones; +int iter; +int failcount=0; +vector char srcvec; + +for (iter=0;iter<ITERS;iter++) { + srcvec = input_vec[iter]; + allzeros = test_for_zeros(srcvec); + allones = test_for_ones(srcvec); + if (allzeros != expected_allzeros_results[iter]) { + printf("fail on allzero check. iter %d, result was %d \n", iter, allzeros); + failcount++; + } + if (allones != expected_allones_results[iter]) { + printf("fail on allones check. iter %d, result was %d \n", iter, allones); + failcount++; + } +} + +if (failcount) + abort(); +return 0; +} + diff --git a/gcc/testsuite/gcc.target/powerpc/lsbb.c b/gcc/testsuite/gcc.target/powerpc/lsbb.c new file mode 100644 index 0000000..b5c0370 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/lsbb.c @@ -0,0 +1,24 @@ +/* + Test the least significant bit by byte instruction + xvtlsbb BF,XB + Using the builtins + int vec_test_lsbb_all_zeros (vector unsigned char); + int vec_test_lsbb_all_ones (vector unsigned char); + */ + +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-fno-inline -mdejagnu-cpu=power10 -O2" } */ + +/* { dg-final { scan-assembler-times {\mxvtlsbb\M} 2 } } */ +/* { dg-final { scan-assembler-times {\msetbc\M} 2 } } */ + +#include <altivec.h> + +int test_for_zeros(vector char vc) { + return vec_test_lsbb_all_zeros(vc); +} + +int test_for_ones(vector char vc) { + return vec_test_lsbb_all_ones(vc); +} +