We may simplify (subreg (vec_merge (vec_duplicate X) (vector) (const_int 1)) 0)
to X when mode of X is the same as of mode of subreg. gcc/ PR target/87537 * simplify-rtx.c (simplify_subreg): Simplify subreg of vec_merge of vec_duplicate. gcc/testsuite/ PR target/87537 * gcc.target/i386/pr87537-1.c: New test. --- gcc/simplify-rtx.c | 11 +++++++++++ gcc/testsuite/gcc.target/i386/pr87537-1.c | 12 ++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr87537-1.c diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 9bc53866b9f..e2a0533b23d 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -6601,6 +6601,17 @@ simplify_subreg (machine_mode outermode, rtx op, return NULL_RTX; } + /* Return X for + (subreg (vec_merge (vec_duplicate X) (vector) (const_int 1)) 0) + */ + if (known_eq (byte, 0U) + && GET_CODE (op) == VEC_MERGE + && GET_CODE (XEXP (op, 0)) == VEC_DUPLICATE + && GET_MODE (XEXP (XEXP (op, 0), 0)) == outermode + && CONST_INT_P (XEXP (op, 2)) + && INTVAL (XEXP (op, 2)) == 1) + return XEXP (XEXP (op, 0), 0); + /* A SUBREG resulting from a zero extension may fold to zero if it extracts higher bits that the ZERO_EXTEND's source bits. */ if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode)) diff --git a/gcc/testsuite/gcc.target/i386/pr87537-1.c b/gcc/testsuite/gcc.target/i386/pr87537-1.c new file mode 100644 index 00000000000..df849b032e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr87537-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O2" } */ +/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-not "vmovss" } } */ + +#include <immintrin.h> + +__m128 +foo (float *x) +{ + return _mm_broadcastss_ps(_mm_load_ss(x)); +} -- 2.17.2