Hi! This PR is a repetition of PR87853, just for avx2 instead of sse2. See https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00195.html for the previous patch.
This time there is just one intrinsic with the problem (note, the previous patch didn't have to change _mm_cmpeq_epi8, as __v16qi vs. __v16qs doesn't make any difference for equality comparisons). I've grepped for similar issues in other headers, including 512-byte vectors, but couldn't find any, in those cases we use a builtin with a mask argument. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and release branches? I've also noticed PR87853 didn't come up with a testcase, so that is also attached, ok to commit too? As for tests, I chose not to do a dg-do run test with -funsigned-char, because that option is an ABI change and when including some headers that also include system headers one is never sure what will become of that. 2019-09-09 Jakub Jelinek <ja...@redhat.com> PR target/91704 * config/i386/avxintrin.h (__v32qs): New typedef. * config/i386/avx2intrin.h (_mm256_cmpgt_epi8): Use casts to __v32qs instead of __v32qi. * gcc.target/i386/pr91704.c: New test. --- gcc/config/i386/avxintrin.h.jj 2019-08-12 17:55:19.039139772 +0200 +++ gcc/config/i386/avxintrin.h 2019-09-08 23:22:11.829573162 +0200 @@ -47,6 +47,7 @@ typedef unsigned int __v8su __attribute_ typedef short __v16hi __attribute__ ((__vector_size__ (32))); typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))); typedef char __v32qi __attribute__ ((__vector_size__ (32))); +typedef signed char __v32qs __attribute__ ((__vector_size__ (32))); typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32))); /* The Intel API is flexible enough that we must allow aliasing with other --- gcc/config/i386/avx2intrin.h.jj 2019-01-01 12:37:32.000731417 +0100 +++ gcc/config/i386/avx2intrin.h 2019-09-08 23:24:23.391560853 +0200 @@ -258,7 +258,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpgt_epi8 (__m256i __A, __m256i __B) { - return (__m256i) ((__v32qi)__A > (__v32qi)__B); + return (__m256i) ((__v32qs)__A > (__v32qs)__B); } extern __inline __m256i --- gcc/testsuite/gcc.target/i386/pr91704.c.jj 2019-09-09 11:01:14.588282654 +0200 +++ gcc/testsuite/gcc.target/i386/pr91704.c 2019-09-09 11:09:55.659355290 +0200 @@ -0,0 +1,14 @@ +/* PR target/91704 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -funsigned-char -mavx2 -mavx512f -masm=att" } */ +/* { dg-final { scan-assembler-times "\tvpcmpgtb\t%ymm" 1 } } */ +/* { dg-final { scan-assembler-not "\tvpsubusb\t" } } */ +/* { dg-final { scan-assembler-not "\tvpcmpeqb\t" } } */ + +#include <x86intrin.h> + +__m256i +foo (__m256i x, __m256i y) +{ + return _mm256_cmpgt_epi8 (x, y); +} Jakub
2019-09-09 Jakub Jelinek <ja...@redhat.com> PR target/87853 * gcc.target/i386/pr87853.c: New test. --- gcc/testsuite/gcc.target/i386/pr87853.c.jj 2019-09-09 11:00:43.984752380 +0200 +++ gcc/testsuite/gcc.target/i386/pr87853.c 2019-09-09 11:03:14.580448353 +0200 @@ -0,0 +1,20 @@ +/* PR target/87853 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -funsigned-char -msse2 -mno-sse3 -masm=att" } */ +/* { dg-final { scan-assembler-times "\tpcmpgtb\t%xmm" 2 } } */ +/* { dg-final { scan-assembler-not "\tpsubusb\t" } } */ +/* { dg-final { scan-assembler-not "\tpcmpeqb\t" } } */ + +#include <x86intrin.h> + +__m128i +foo (__m128i x, __m128i y) +{ + return _mm_cmpgt_epi8 (x, y); +} + +__m128i +bar (__m128i x, __m128i y) +{ + return _mm_cmplt_epi8 (x, y); +}