[PATCH] RS6000 Add testlsbb by Byte operations
    
Hi,

Add support for new instructions to test LSB by Byte.
    
[v2] Additional updates per feedback.  Including adding _all
to the internal name, typos and cosmetic fixups throughout, extraneous
-mvsx removed from tests.
    
V2 has completed tests on powerpc64le-unknown-linux-gnu Power8LE, with
other regression tests still in progress on some other powerpc platforms.
OK for trunk? 

Thanks,
-Will

[gcc]

2020-07-29  Will Schmidt  <will_schm...@vnet.ibm.com>

    * config/rs6000/altivec.h (vec_test_lsbb_all_ones): New define.
    (vec_test_lsbb_all_zeros): New define.
    * config/rs6000/rs6000-builtin.def (BU_P10_VSX_1): New built-in
    handling macro.
    (XVTLSBB_ZEROS, XVTLSBB_ONES): New builtin defines.
    (xvtlsbb_all_zeros, xvtlsbb_all_ones): New builtin overloads.
    * config/rs6000/rs6000-call.c (P10_BUILTIN_VEC_XVTLSBB_ZEROS,
    P10_BUILTIN_VEC_XVTLSBB_ONES): New altivec_builtin_types entries.
    * config/rs6000/rs6000.md (UNSPEC_XVTLSBB):  New unspec.
    * config/rs6000/vsx.md (*xvtlsbb_internal): New instruction define.
    (xvtlsbbo, xvtlsbbz): New instruction expands.

[testsuite]
    * testsuite/gcc.target/powerpc/lsbb-runnable.c: New test.
    * testsuite/gcc.target/powerpc/lsbb.c: New test.

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 6c43124..119fb1c 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -491,10 +491,13 @@
 #define vec_cmpnez __builtin_vec_vcmpnez
 
 #define vec_cntlz_lsbb __builtin_vec_vclzlsbb
 #define vec_cnttz_lsbb __builtin_vec_vctzlsbb
 
+#define vec_test_lsbb_all_ones __builtin_vec_xvtlsbb_all_ones
+#define vec_test_lsbb_all_zeros __builtin_vec_xvtlsbb_all_zeros
+
 #define vec_xlx __builtin_vec_vextulx
 #define vec_xrx __builtin_vec_vexturx
 #endif
 
 /* Predicates.
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index f703755..38f859f 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1060,10 +1060,18 @@
                    RS6000_BTM_P10,                     /* MASK */      \
                    (RS6000_BTC_ ## ATTR                /* ATTR */      \
                     | RS6000_BTC_QUATERNARY),                          \
                    CODE_FOR_ ## ICODE)                 /* ICODE */
 
+#define BU_P10_VSX_1(ENUM, NAME, ATTR, ICODE)                          \
+  RS6000_BUILTIN_1 (P10_BUILTIN_ ## ENUM,              /* ENUM */      \
+                   "__builtin_vsx_" NAME,              /* NAME */      \
+                   RS6000_BTM_P10,                     /* MASK */      \
+                   (RS6000_BTC_ ## ATTR                /* ATTR */      \
+                    | RS6000_BTC_UNARY),                               \
+                   CODE_FOR_ ## ICODE)                 /* ICODE */
+
 #define BU_P10_OVERLOAD_1(ENUM, NAME)                                  \
   RS6000_BUILTIN_1 (P10_BUILTIN_VEC_ ## ENUM,          /* ENUM */      \
                    "__builtin_vec_" NAME,              /* NAME */      \
                    RS6000_BTM_P10,                     /* MASK */      \
                    (RS6000_BTC_OVERLOADED              /* ATTR */      \
@@ -2734,10 +2742,13 @@ BU_P10V_1 (VSTRIHL, "vstrihl", CONST, vstril_v8hi)
 BU_P10V_1 (VSTRIBR_P, "vstribr_p", CONST, vstrir_p_v16qi)
 BU_P10V_1 (VSTRIHR_P, "vstrihr_p", CONST, vstrir_p_v8hi)
 BU_P10V_1 (VSTRIBL_P, "vstribl_p", CONST, vstril_p_v16qi)
 BU_P10V_1 (VSTRIHL_P, "vstrihl_p", CONST, vstril_p_v8hi)
 
+BU_P10_VSX_1 (XVTLSBB_ZEROS, "xvtlsbb_all_zeros", CONST, xvtlsbbz)
+BU_P10_VSX_1 (XVTLSBB_ONES, "xvtlsbb_all_ones", CONST, xvtlsbbo)
+
 BU_P10V_1 (MTVSRBM, "mtvsrbm", CONST, vec_mtvsr_v16qi)
 BU_P10V_1 (MTVSRHM, "mtvsrhm", CONST, vec_mtvsr_v8hi)
 BU_P10V_1 (MTVSRWM, "mtvsrwm", CONST, vec_mtvsr_v4si)
 BU_P10V_1 (MTVSRDM, "mtvsrdm", CONST, vec_mtvsr_v2di)
 BU_P10V_1 (MTVSRQM, "mtvsrqm", CONST, vec_mtvsr_v1ti)
@@ -2769,10 +2780,14 @@ BU_P10_OVERLOAD_3 (EXTRACTH, "extracth")
 BU_P10_OVERLOAD_1 (VSTRIR, "strir")
 BU_P10_OVERLOAD_1 (VSTRIL, "stril")
 
 BU_P10_OVERLOAD_1 (VSTRIR_P, "strir_p")
 BU_P10_OVERLOAD_1 (VSTRIL_P, "stril_p")
+
+BU_P10_OVERLOAD_1 (XVTLSBB_ZEROS, "xvtlsbb_all_zeros")
+BU_P10_OVERLOAD_1 (XVTLSBB_ONES, "xvtlsbb_all_ones")
+
 
 BU_P10_OVERLOAD_1 (MTVSRBM, "mtvsrbm")
 BU_P10_OVERLOAD_1 (MTVSRHM, "mtvsrhm")
 BU_P10_OVERLOAD_1 (MTVSRWM, "mtvsrwm")
 BU_P10_OVERLOAD_1 (MTVSRDM, "mtvsrdm")
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 5ec3f2c..ece8d76 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -5679,10 +5679,15 @@ const struct altivec_builtin_types 
altivec_overloaded_builtins[] = {
   { P10_BUILTIN_VEC_VEXTRACTM, P10_BUILTIN_VEXTRACTMD,
     RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, 0, 0 },
   { P10_BUILTIN_VEC_VEXTRACTM, P10_BUILTIN_VEXTRACTMQ,
     RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, 0, 0 },
 
+ { P10_BUILTIN_VEC_XVTLSBB_ZEROS, P10_BUILTIN_XVTLSBB_ZEROS,
+    RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P10_BUILTIN_VEC_XVTLSBB_ONES, P10_BUILTIN_XVTLSBB_ONES,
+    RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
   { RS6000_BUILTIN_NONE, RS6000_BUILTIN_NONE, 0, 0, 0, 0 }
 };
 
 /* Nonzero if we can use a floating-point register to pass this arg.  */
 #define USE_FP_FOR_ARG_P(CUM,MODE)             \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index b3fcb84..074a1b9 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -75,10 +75,11 @@
    UNSPEC_TLSGD
    UNSPEC_TLSLD
    UNSPEC_TLS_GET_ADDR
    UNSPEC_MOVESI_FROM_CR
    UNSPEC_MOVESI_TO_CR
+   UNSPEC_XVTLSBB
    UNSPEC_TLSDTPREL
    UNSPEC_TLSDTPRELHA
    UNSPEC_TLSDTPRELLO
    UNSPEC_TLSGOTDTPREL
    UNSPEC_TLSTPREL
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f753771..57da340 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1983,10 +1983,49 @@
                  (match_dup 2)))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "xvcmpgt<sd>p. %x0,%x1,%x2"
   [(set_attr "type" "<VStype_simple>")])
 
+;; xvtlsbb BF,XB
+;; Set the CR field BF to indicate if bit 7 of every byte element in VSR[XB]
+;; is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE).
+(define_insn "*xvtlsbb_internal"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=y")
+       (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
+        UNSPEC_XVTLSBB))]
+  "TARGET_POWER10"
+  "xvtlsbb %0,%x1"
+  [(set_attr "type" "logical")])
+
+;; Vector Test Least Significant Bit by Byte
+;; for the implementation of the builtin
+;;     __builtin_vec_test_lsbb_all_ones
+;;     int vec_test_lsbb_all_ones (vector unsigned char);
+;; and
+;;     __builtin_vec_test_lsbb_all_zeros
+;;     int vec_test_lsbb_all_zeros (vector unsigned char);
+(define_expand "xvtlsbbo"
+  [(set (match_dup 2)
+       (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
+        UNSPEC_XVTLSBB))
+       (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+               (lt:SI (match_dup 2) (const_int 0)))]
+  "TARGET_POWER10"
+  {
+  operands[2] = gen_reg_rtx (CCmode);
+  })
+(define_expand "xvtlsbbz"
+  [(set (match_dup 2)
+       (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
+        UNSPEC_XVTLSBB))
+       (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+               (eq:SI (match_dup 2) (const_int 0)))]
+  "TARGET_POWER10"
+  {
+  operands[2] = gen_reg_rtx (CCmode);
+  })
+
 (define_insn "*vsx_ge_<mode>_p"
   [(set (reg:CC CR6_REGNO)
        (unspec:CC
         [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
                 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
diff --git a/gcc/testsuite/gcc.target/powerpc/lsbb-runnable.c 
b/gcc/testsuite/gcc.target/powerpc/lsbb-runnable.c
new file mode 100644
index 0000000..7da530c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/lsbb-runnable.c
@@ -0,0 +1,65 @@
+/*
+ Test the least significant bit by byte instruction
+    xvtlsbb BF,XB
+ Using the builtins
+    int vec_test_lsbb_all_zeros (vector unsigned char);
+    int vec_test_lsbb_all_ones (vector unsigned char);
+ */
+
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-fno-inline -mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+#include <stdio.h>
+
+void abort (void);
+
+#define ITERS 7
+vector char input_vec[ITERS] = {
+  {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+  {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
+  {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
+  {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0xff, 0xff},
+  {0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 
0xfe, 0xfe, 0xfe, 0xfe},
+  {0xfe, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 
0xf6, 0xf7, 0xf8, 0xf9}
+};
+
+int expected_allzeros_results[ITERS] = {1, 0, 0, 0, 0, 1, 0};
+int expected_allones_results[ITERS] =  {0, 1, 0, 0, 1, 0, 0};
+
+int test_for_zeros(vector char vc) {
+  return vec_test_lsbb_all_zeros(vc);
+}
+
+int test_for_ones(vector char vc) {
+  return vec_test_lsbb_all_ones(vc);
+}
+
+int main ()
+{
+int allzeros,allones;
+int iter;
+int failcount=0;
+vector char srcvec;
+
+for (iter=0;iter<ITERS;iter++) {
+  srcvec = input_vec[iter];
+  allzeros = test_for_zeros(srcvec);
+  allones = test_for_ones(srcvec);
+  if (allzeros != expected_allzeros_results[iter]) {
+    printf("fail on allzero check. iter %d, result was %d \n", iter, allzeros);
+    failcount++;
+  }
+  if (allones != expected_allones_results[iter]) {
+    printf("fail on allones check. iter %d, result was %d \n", iter, allones);
+    failcount++;
+  }
+}
+
+if (failcount)
+  abort();
+return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/lsbb.c 
b/gcc/testsuite/gcc.target/powerpc/lsbb.c
new file mode 100644
index 0000000..b5c0370
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/lsbb.c
@@ -0,0 +1,24 @@
+/*
+ Test the least significant bit by byte instruction
+    xvtlsbb BF,XB
+ Using the builtins
+    int vec_test_lsbb_all_zeros (vector unsigned char);
+    int vec_test_lsbb_all_ones (vector unsigned char);
+ */
+
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-fno-inline -mdejagnu-cpu=power10 -O2" } */
+
+/* { dg-final { scan-assembler-times {\mxvtlsbb\M} 2 } } */
+/* { dg-final { scan-assembler-times {\msetbc\M} 2 } } */
+
+#include <altivec.h>
+
+int test_for_zeros(vector char vc) {
+  return vec_test_lsbb_all_zeros(vc);
+}
+
+int test_for_ones(vector char vc) {
+  return vec_test_lsbb_all_ones(vc);
+}
+

Reply via email to