Changes from v3:
        * Added New testcase.
Changes from v2:
        * Some formatting.
Changes from v1:
        * Added more info to commit message fixed indentation.

This patch removes redundant vector compare instructions and logic
from the vec_first_mismatch_or_eos_index intrinsic.
Currently, GCC generates extra vcmpneb instructions and additional
masking logic (xxland, xxlorc) to handle EOS and mismatch comparisons.
However, a single vcmpnezb instruction already suffices, as it covers
both By eliminating the redundant comparisons (vcmpneb) and the
associated logic (xxland/xxlorc) we produce shorter,
more efficient code.

Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.

2025-10-22  Vijay Shankar  <[email protected]>

gcc/ChangeLog:
        PR target/116004
        * config/rs6000/vsx.md (first_mismatch_or_eos_index): Remove redundant
        emit_insns.

gcc/testsuite/ChangeLog:
        PR target/116004
        * gcc.target/powerpc/pr116004.c: New Test.
---
 gcc/config/rs6000/vsx.md                    | 22 ++------
 gcc/testsuite/gcc.target/powerpc/pr116004.c | 58 +++++++++++++++++++++
 2 files changed, 61 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr116004.c

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index cfad9b8c6..3c2319a53 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5668,29 +5668,13 @@
   "TARGET_P9_VECTOR"
 {
   int sh;
-  rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
-  rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
-  rtx cmpz_result = gen_reg_rtx (<MODE>mode);
-  rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
-  rtx and_result = gen_reg_rtx (<MODE>mode);
   rtx result = gen_reg_rtx (<MODE>mode);
-  rtx vzero = gen_reg_rtx (<MODE>mode);
-
-  /* Vector with zeros in elements that correspond to zeros in operands.  */
-  emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
 
-  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
-  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
-  emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
+  /* Vector with ones in elements that do not match or elements corresponding
+     to zeros in operands.  */
 
-  /* Vector with ones in elments that match.  */
-  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
+  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (result, operands[1],
                                              operands[2]));
-  emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
-
-  /* Create vector with ones in elements where there was a zero in one of
-     the source elements or the elements did not match.  */
-  emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
 
   if (<MODE>mode == V16QImode)
diff --git a/gcc/testsuite/gcc.target/powerpc/pr116004.c 
b/gcc/testsuite/gcc.target/powerpc/pr116004.c
new file mode 100644
index 000000000..2d0982d1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr116004.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-final { scan-assembler-times {\mvcmpnezb\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvcmpnezh\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvcmpnezw\M} 2 } } */
+/* { dg-final { scan-assembler-not "vcmpneb " } } */
+/* { dg-final { scan-assembler-not "vcmpneh " } } */
+/* { dg-final { scan-assembler-not "vcmpnew " } } */
+
+#include <altivec.h>
+#include <stdint.h>
+
+int main(void) {
+  vector signed char char_src1, char_src2;
+  vector unsigned char uchar_src1, uchar_src2;
+  vector signed short short_src1, short_src2;
+  vector unsigned short ushort_src1, ushort_src2;
+  vector signed int int_src1, int_src2;
+  vector unsigned int uint_src1, uint_src2;
+
+  volatile unsigned int r1, r2, r3, r4, r5, r6;
+
+  /* signed char */
+  char_src1 = (vector signed char) {-1, 2, 3, 0, -5, 6, 7, 8,
+                                    9, 10, 11, 12, 13, 14, 15, 16};
+  char_src2 = (vector signed char) {2, 3, 20, 0, -5, 6, 7, 8,
+                                    9, 10, 11, 12, 13, 14, 15, 16};
+  r1 = vec_first_mismatch_or_eos_index(char_src1, char_src2);
+
+  /* unsigned char */
+  uchar_src1 = (vector unsigned char) {1, 2, 3, 4, 5, 6, 7, 8,
+                                       9, 10, 11, 12, 13, 14, 15, 16};
+  uchar_src2 = (vector unsigned char) {1, 0, 3, 4, 5, 6, 7, 8,
+                                       9, 10, 11, 12, 13, 14, 15, 16};
+  r2 = vec_first_mismatch_or_eos_index(uchar_src1, uchar_src2);
+
+  /* signed short */
+  short_src1 = (vector signed short) {-10, -20, 30, 40, 50, 60, 70, 80};
+  short_src2 = (vector signed short) {-10, 20, 30, 40, 50, 60, 70, 80};
+  r3 = vec_first_mismatch_or_eos_index(short_src1, short_src2);
+
+  /* unsigned short */
+  ushort_src1 = (vector unsigned short) {10, 20, 30, 40, 50, 60, 70, 0};
+  ushort_src2 = (vector unsigned short) {10, 20, 30, 40, 50, 60, 70, 80};
+  r4 = vec_first_mismatch_or_eos_index(ushort_src1, ushort_src2);
+
+  /* signed int */
+  int_src1 = (vector signed int) {1, 2, 3, 4};
+  int_src2 = (vector signed int) {1, 20, 3, 4};
+  r5 = vec_first_mismatch_or_eos_index(int_src1, int_src2);
+
+  /* unsigned int */
+  uint_src1 = (vector unsigned int) {1, 2, 3, 0};
+  uint_src2 = (vector unsigned int) {1, 2, 3, 0};
+  r6 = vec_first_mismatch_or_eos_index(uint_src1, uint_src2);
+
+  return 0;
+}
-- 
2.47.3

Reply via email to