Enable V4QI, V2QI and V2HI mode signed saturated arithmetic insn patterns and add a couple of testcases to test for PADDSB and PADDSW instructions.
PR target/112600 gcc/ChangeLog: * config/i386/mmx.md (<sat_plusminus:insn><mode>3): Rename from *<sat_plusminus:insn><mode>3. gcc/testsuite/ChangeLog: * gcc.target/i386/pr112600-3a.c: New test. * gcc.target/i386/pr112600-3b.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Uros.
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 2f8d958dd5f..e88a06c441f 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -3218,7 +3218,7 @@ (define_insn "*mmx_<insn><mode>3" (set_attr "type" "mmxadd,sseadd,sseadd") (set_attr "mode" "DI,TI,TI")]) -(define_insn "*<insn><mode>3" +(define_insn "<insn><mode>3" [(set (match_operand:VI_16_32 0 "register_operand" "=x,Yw") (sat_plusminus:VI_16_32 (match_operand:VI_16_32 1 "register_operand" "<comm>0,Yw") diff --git a/gcc/testsuite/gcc.target/i386/pr112600-3a.c b/gcc/testsuite/gcc.target/i386/pr112600-3a.c new file mode 100644 index 00000000000..0c38659643d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112600-3a.c @@ -0,0 +1,25 @@ +/* PR middle-end/112600 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +#define MIN -128 +#define MAX 127 + +typedef char T; +typedef unsigned char UT; + +void foo (T *out, T *op_1, T *op_2, int n) +{ + int i; + + for (i = 0; i < n; i++) + { + T x = op_1[i]; + T y = op_2[i]; + T sum = (UT) x + (UT) y; + + out[i] = (x ^ y) < 0 ? sum : (sum ^ x) >= 0 ? sum : x < 0 ? MIN : MAX; + } +} + +/* { dg-final { scan-assembler "paddsb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr112600-3b.c b/gcc/testsuite/gcc.target/i386/pr112600-3b.c new file mode 100644 index 00000000000..746c422ceb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112600-3b.c @@ -0,0 +1,25 @@ +/* PR middle-end/112600 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +#define MIN -32768 +#define MAX 32767 + +typedef short T; +typedef unsigned short UT; + +void foo (T *out, T *op_1, T *op_2, int n) +{ + int i; + + for (i = 0; i < n; i++) + { + T x = op_1[i]; + T y = op_2[i]; + T sum = (UT) x + (UT) y; + + out[i] = (x ^ y) < 0 ? sum : (sum ^ x) >= 0 ? sum : x < 0 ? MIN : MAX; + } +} + +/* { dg-final { scan-assembler "paddsw" } } */