On Tue, 2020-05-26 at 11:12 -0500, will schmidt wrote:
> Hi,
>
> Add support for new instructions to test LSB by Byte.
>
> Tested on powerpc64le-unknown-linux-gnu with no
> regressions. (power7BE, power8LE, power8BE, power9LE).
>
> [gcc]
>
> 2020-05-26 Will Schmidt <[email protected]>
>
> * config/rs6000/altivec.h (vec_test_lsbb_all_ones): New define.
> (vec_test_lsbb_all_zeros): New define.
> * config/rs6000/rs6000-builtin.def (BU_FUTURE_VSX_1): New built-in
> handling macro. (XVTLSBB_ZEROS, XVTLSBB_ONES) New builtin defines.
> (xvtlsbb_zeros, xvtlsbb_ones) New builtin overloads.
> * config/rs6000/rs6000-call.c (FUTURE_BUILTIN_VEC_XVTLSBB_ZEROS,
> FUTURE_BUILTIN_VEC_XVTLSBB_ONES): New altivec_builtin_types entries.
> * config/rs6000/rs6000.md (UNSPEC_XVTLSBB): New unspec.
> * config/rs6000/vsx.md (*xvtlsbb_internal): New instruction define.
> (xvtlsbbo, xvtlsbbz): New instruction expands.
>
> [testsuite]
>
> 2020-05-26 Will Schmidt <[email protected]>
>
> * testsuite/gcc.target/powerpc/lsbb-runnable.c: New test.
> * testsuite/gcc.target/powerpc/lsbb.c: New test.
>
>
ok
>
> diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
> index 0a7e8ab..94afaf7 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -490,10 +490,13 @@
> #define vec_cmpnez __builtin_vec_vcmpnez
>
> #define vec_cntlz_lsbb __builtin_vec_vclzlsbb
> #define vec_cnttz_lsbb __builtin_vec_vctzlsbb
>
> +#define vec_test_lsbb_all_ones __builtin_vec_xvtlsbb_ones
> +#define vec_test_lsbb_all_zeros __builtin_vec_xvtlsbb_zeros
> +
> #define vec_xlx __builtin_vec_vextulx
> #define vec_xrx __builtin_vec_vexturx
> #endif
>
> /* Predicates.
> diff --git a/gcc/config/rs6000/rs6000-builtin.def
> b/gcc/config/rs6000/rs6000-builtin.def
> index 8b1ddb0..e149c5a 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -956,10 +956,18 @@
> RS6000_BTM_FUTURE, /* MASK */ \
> (RS6000_BTC_ ## ATTR /* ATTR */ \
> | RS6000_BTC_UNARY), \
> CODE_FOR_ ## ICODE) /* ICODE */
>
> +#define BU_FUTURE_VSX_1(ENUM, NAME, ATTR, ICODE) \
> + RS6000_BUILTIN_1 (FUTURE_BUILTIN_ ## ENUM, /* ENUM */ \
> + "__builtin_vsx_" NAME, /* NAME */ \
> + RS6000_BTM_FUTURE, /* MASK */ \
> + (RS6000_BTC_ ## ATTR /* ATTR */ \
> + | RS6000_BTC_UNARY), \
> + CODE_FOR_ ## ICODE) /* ICODE */
> +
> #define BU_FUTURE_V_2(ENUM, NAME, ATTR, ICODE) \
> RS6000_BUILTIN_2 (FUTURE_BUILTIN_ ## ENUM, /* ENUM */ \
> "__builtin_altivec_" NAME, /* NAME */ \
> RS6000_BTM_FUTURE, /* MASK */ \
> (RS6000_BTC_ ## ATTR /* ATTR */ \
> @@ -2635,10 +2643,13 @@ BU_FUTURE_V_1 (VSTRIHL, "vstrihl", CONST, vstril_v8hi)
> BU_FUTURE_V_1 (VSTRIBR_P, "vstribr_p", CONST, vstrir_p_v16qi)
> BU_FUTURE_V_1 (VSTRIHR_P, "vstrihr_p", CONST, vstrir_p_v8hi)
> BU_FUTURE_V_1 (VSTRIBL_P, "vstribl_p", CONST, vstril_p_v16qi)
> BU_FUTURE_V_1 (VSTRIHL_P, "vstrihl_p", CONST, vstril_p_v8hi)
>
> +BU_FUTURE_VSX_1 (XVTLSBB_ZEROS, "xvtlsbb_zeros", CONST, xvtlsbbz)
> +BU_FUTURE_VSX_1 (XVTLSBB_ONES, "xvtlsbb_ones", CONST, xvtlsbbo)
> +
> /* Future architecture overloaded vector built-ins. */
> BU_FUTURE_OVERLOAD_2 (CLRL, "clrl")
> BU_FUTURE_OVERLOAD_2 (CLRR, "clrr")
> BU_FUTURE_OVERLOAD_2 (GNB, "gnb")
> BU_FUTURE_OVERLOAD_4 (XXEVAL, "xxeval")
> @@ -2650,10 +2661,13 @@ BU_FUTURE_OVERLOAD_3 (EXTRACTH, "extracth")
> BU_FUTURE_OVERLOAD_1 (VSTRIR, "strir")
> BU_FUTURE_OVERLOAD_1 (VSTRIL, "stril")
>
> BU_FUTURE_OVERLOAD_1 (VSTRIR_P, "strir_p")
> BU_FUTURE_OVERLOAD_1 (VSTRIL_P, "stril_p")
> +
> +BU_FUTURE_OVERLOAD_1 (XVTLSBB_ZEROS, "xvtlsbb_zeros")
> +BU_FUTURE_OVERLOAD_1 (XVTLSBB_ONES, "xvtlsbb_ones")
>
> /* 1 argument crypto functions. */
> BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox_v2di)
> BU_CRYPTO_1 (VSBOX_BE, "vsbox_be", CONST,
> crypto_vsbox_v16qi)
>
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index 0ac8054..f83167f 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -5616,10 +5616,15 @@ const struct altivec_builtin_types
> altivec_overloaded_builtins[] = {
> { FUTURE_BUILTIN_VEC_VSTRIR_P, FUTURE_BUILTIN_VSTRIHR_P,
> RS6000_BTI_INTSI, RS6000_BTI_unsigned_V8HI, 0, 0 },
> { FUTURE_BUILTIN_VEC_VSTRIR_P, FUTURE_BUILTIN_VSTRIHR_P,
> RS6000_BTI_INTSI, RS6000_BTI_V8HI, 0, 0 },
>
> + { FUTURE_BUILTIN_VEC_XVTLSBB_ZEROS, FUTURE_BUILTIN_XVTLSBB_ZEROS,
> + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0},
> + { FUTURE_BUILTIN_VEC_XVTLSBB_ONES, FUTURE_BUILTIN_XVTLSBB_ONES,
> + RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0},
> +
Nearby entries have a space before the closing "}".
> { RS6000_BUILTIN_NONE, RS6000_BUILTIN_NONE, 0, 0, 0, 0 }
> };
>
> /* Nonzero if we can use a floating-point register to pass this arg. */
> #define USE_FP_FOR_ARG_P(CUM,MODE) \
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 0aa5265..df0b8b7 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -75,10 +75,11 @@
> UNSPEC_TLSGD
> UNSPEC_TLSLD
> UNSPEC_TLS_GET_ADDR
> UNSPEC_MOVESI_FROM_CR
> UNSPEC_MOVESI_TO_CR
> + UNSPEC_XVTLSBB
> UNSPEC_TLSDTPREL
> UNSPEC_TLSDTPRELHA
> UNSPEC_TLSDTPRELLO
> UNSPEC_TLSGOTDTPREL
> UNSPEC_TLSTPREL
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 2a28215..1211e0d 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -1966,10 +1966,50 @@
> (match_dup 2)))]
> "VECTOR_UNIT_VSX_P (<MODE>mode)"
> "xvcmpgt<sd>p. %x0,%x1,%x2"
> [(set_attr "type" "<VStype_simple>")])
>
> +;; xvtlsbb BF,XB
> +;; set CR field BF to indicate if bit 7 of every byte element in VSR[XB}
> +;; is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE).
> +;; CR.field_WRITE (BF, ALL_TRUE, 0, ALL_FALSE, 0);
s/set/Set the/
> +(define_insn "*xvtlsbb_internal"
> + [(set (match_operand:CC 0 "cc_reg_operand" "=y")
> + (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
> + UNSPEC_XVTLSBB))]
> +"TARGET_FUTURE"
> +"xvtlsbb %0,%x1"
> +[(set_attr "type" "logical")])
> +
> +;; Vector Test Least Significant Bit by Byte
> +;; for the implementation of the builtin
> +;; __builtin_vec_test_lsbb_all_ones
> +;; int vec_test_lsbb_all_ones (vector unsigned char);
> +;; and
> +;; __builtin_vec_test_lsbb_all_zeros
> +;; int vec_test_lsbb_all_zeros (vector unsigned char);
> +(define_expand "xvtlsbbo"
> + [(set (match_dup 2)
> + (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
> + UNSPEC_XVTLSBB))
> + (set (match_operand:SI 0 "gpc_reg_operand" "=r")
> + (lt:SI (match_dup 2) (const_int 0)))]
> +"TARGET_FUTURE"
> +{
> +operands[2] = gen_reg_rtx (CCmode);
> +})
> +(define_expand "xvtlsbbz"
> + [(set (match_dup 2)
> + (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
> + UNSPEC_XVTLSBB))
> + (set (match_operand:SI 0 "gpc_reg_operand" "=r")
> + (eq:SI (match_dup 2) (const_int 0)))]
> +"TARGET_FUTURE"
> +{
> +operands[2] = gen_reg_rtx (CCmode);
> +})
> +
> (define_insn "*vsx_ge_<mode>_p"
> [(set (reg:CC CR6_REGNO)
> (unspec:CC
> [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
> (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
> diff --git a/gcc/testsuite/gcc.target/powerpc/lsbb-runnable.c
> b/gcc/testsuite/gcc.target/powerpc/lsbb-runnable.c
> new file mode 100644
> index 0000000..9c770e0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/lsbb-runnable.c
> @@ -0,0 +1,65 @@
> +/*
> + Test the least significant bit by byte instruction
> + xvtlsbb BF,XB
> + Using the builtins
> + int vec_test_lsbb_all_zeros(vector unsigned char);
> + int vec_test_lsbb_all_ones(vector unsigned char);
> + */
For consistency, space before "(".
Same for subsequent test.
> +
> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_future_hw } */
> +/* { dg-options "-O2 -fno-inline -mvsx -mdejagnu-cpu=future" } */
> +
-mvsx is probably not necessary. (doesn't hurt?)
Nothing noted aside from those cosmetic nits.
lgtm
thanks
-Will
> +#include <altivec.h>
> +#include <stdio.h>
> +
> +void abort (void);
> +
> +#define ITERS 7
> +vector char input_vec[ITERS] = {
> + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
> + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
> + {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
> + {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
> + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
> 0xff, 0xff, 0xff, 0xff},
> + {0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
> 0xfe, 0xfe, 0xfe, 0xfe},
> + {0xfe, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5,
> 0xf6, 0xf7, 0xf8, 0xf9}
> +};
> +
> +int expected_allzeros_results[ITERS] = {1, 0, 0, 0, 0, 1, 0};
> +int expected_allones_results[ITERS] = {0, 1, 0, 0, 1, 0, 0};
> +
> +int test_for_zeros(vector char vc) {
> + return vec_test_lsbb_all_zeros(vc);
> +}
> +
> +int test_for_ones(vector char vc) {
> + return vec_test_lsbb_all_ones(vc);
> +}
> +
> +int main ()
> +{
> +int allzeros,allones;
> +int iter;
> +int failcount=0;
> +vector char srcvec;
> +
> +for (iter=0;iter<ITERS;iter++) {
> + srcvec = input_vec[iter];
> + allzeros = test_for_zeros(srcvec);
> + allones = test_for_ones(srcvec);
> + if (allzeros != expected_allzeros_results[iter]) {
> + printf("fail on allzero check. iter %d, result was %d \n", iter,
> allzeros);
> + failcount++;
> + }
> + if (allones != expected_allones_results[iter]) {
> + printf("fail on allones check. iter %d, result was %d \n", iter,
> allones);
> + failcount++;
> + }
> +}
> +
> +if (failcount)
> + abort();
> +return 0;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/lsbb.c
> b/gcc/testsuite/gcc.target/powerpc/lsbb.c
> new file mode 100644
> index 0000000..a990518
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/lsbb.c
> @@ -0,0 +1,24 @@
> +/*
> + Test the least significant bit by byte instruction
> + xvtlsbb BF,XB
> + Using the builtins
> + int vec_test_lsbb_all_zeros(vector unsigned char);
> + int vec_test_lsbb_all_ones(vector unsigned char);
> + */
> +
> +/* { dg-require-effective-target powerpc_future_ok } */
> +/* { dg-options "-O2 -fno-inline -mvsx -mdejagnu-cpu=future" } */
> +
> +/* { dg-final { scan-assembler-times {\mxvtlsbb\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\msetbc\M} 2 } } */
> +
> +#include <altivec.h>
> +
> +int test_for_zeros(vector char vc) {
> + return vec_test_lsbb_all_zeros(vc);
> +}
> +
> +int test_for_ones(vector char vc) {
> + return vec_test_lsbb_all_ones(vc);
> +}
> +