This patch aims at PR58762.

Currently GCC could not vectorize abs() operation for integers on x86
with only SSE2 support. For int type, the reason is that the expand on
abs() is not defined for vector type. This patch defines such an
expand so that abs(int) will be vectorized with only SSE2.

For abs(char/short), type conversions are needed as the current abs()
function/operation does not accept argument of char/short type.
Therefore when we want to get the absolute value of a char_val using
abs (char_val), it will be converted into abs ((int) char_val). It
then can be vectorized, but the generated code is not efficient as
lots of packings and unpackings are envolved. But if we convert
(char) abs ((int) char_val) to abs (char_val), the vectorizer will be
able to generate better code. Same for short.

This conversion also enables vectorizing abs(char/short) operation
with PABSB and PABSW instructions in SSE3.

With only SSE2 support, I developed three methods to expand
abs(char/short/int) seperately:

1. For 32 bit int value x, we can get abs (x) from (((signed) x >>
(W-1)) ^ x) - ((signed) x >> (W-1)). This is better than max (x, -x),
which needs bit masking.

2. For 16 bit int value x, we can get abs (x) from max (x, -x), as
SSE2 provides PMAXSW instruction.

3. For 8 bit int value x, we can get abs (x) from min ((unsigned char)
x, (unsigned char) (-x)), as SSE2 provides PMINUB instruction.


The patch is pasted below. Please point out any problem in my patch
and analysis.


thanks,
Cong




diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8a38316..e0f33ee 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2013-10-22  Cong Hou  <co...@google.com>
+
+ PR target/58762
+ * convert.c (convert_to_integer): Convert (char) abs ((int) char_val)
+ into abs (char_val).  Also convert (short) abs ((int) short_val)
+ into abs (short_val).
+ * config/i386/i386-protos.h (ix86_expand_sse2_absvxsi2): New function.
+ * config/i386/i386.c (ix86_expand_sse2_absvxsi2): New function.
+ * config/i386/sse.md: Add SSE2 support to abs (char/int/short).
+
 2013-10-14  David Malcolm  <dmalc...@redhat.com>

  * dumpfile.h (gcc::dump_manager): New class, to hold state
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 3ab2f3a..e85f663 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -238,6 +238,7 @@ extern void ix86_expand_mul_widen_evenodd (rtx,
rtx, rtx, bool, bool);
 extern void ix86_expand_mul_widen_hilo (rtx, rtx, rtx, bool, bool);
 extern void ix86_expand_sse2_mulv4si3 (rtx, rtx, rtx);
 extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx, rtx);
+extern void ix86_expand_sse2_absvxsi2 (rtx, rtx);

 /* In i386-c.c  */
 extern void ix86_target_macros (void);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 02cbbbd..8050e02 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -41696,6 +41696,53 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
        gen_rtx_MULT (mode, op1, op2));
 }

+void
+ix86_expand_sse2_absvxsi2 (rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx tmp0, tmp1;
+
+  switch (mode)
+    {
+      /* For 32-bit signed integer X, the best way to calculate the absolute
+ value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)).  */
+      case V4SImode:
+ tmp0 = expand_simple_binop (mode, ASHIFTRT, op1,
+    GEN_INT (GET_MODE_BITSIZE
+ (GET_MODE_INNER (mode)) - 1),
+    NULL, 0, OPTAB_DIRECT);
+ if (tmp0)
+  tmp1 = expand_simple_binop (mode, XOR, op1, tmp0,
+      NULL, 0, OPTAB_DIRECT);
+ if (tmp0 && tmp1)
+  expand_simple_binop (mode, MINUS, tmp1, tmp0,
+       op0, 0, OPTAB_DIRECT);
+ break;
+
+      /* For 16-bit signed integer X, the best way to calculate the absolute
+ value of X is max (X, -X), as SSE2 provides the PMAXSW insn.  */
+      case V8HImode:
+ tmp0 = expand_unop (mode, neg_optab, op1, NULL_RTX, 0);
+ if (tmp0)
+  expand_simple_binop (mode, SMAX, op1, tmp0, op0, 0,
+       OPTAB_DIRECT);
+ break;
+
+      /* For 8-bit signed integer X, the best way to calculate the absolute
+ value of X is min ((unsigned char) X, (unsigned char) (-X)),
+ as SSE2 provides the PMINUB insn.  */
+      case V16QImode:
+ tmp0 = expand_unop (mode, neg_optab, op1, NULL_RTX, 0);
+ if (tmp0)
+  expand_simple_binop (V16QImode, UMIN, op1, tmp0, op0, 0,
+       OPTAB_DIRECT);
+ break;
+
+      default:
+ break;
+    }
+}
+
 /* Expand an insert into a vector register through pinsr insn.
    Return true if successful.  */

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c3f6c94..bd90f2d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8721,7 +8721,7 @@
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
    (set_attr "mode" "DI")])

-(define_insn "abs<mode>2"
+(define_insn "*abs<mode>2"
   [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
  (abs:VI124_AVX2_48_AVX512F
   (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
@@ -8733,6 +8733,20 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<sseinsnmode>")])

+(define_expand "abs<mode>2"
+  [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
+ (abs:VI124_AVX2_48_AVX512F
+  (match_operand:VI124_AVX2_48_AVX512F 1 "register_operand")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE2 && !TARGET_SSSE3)
+    ix86_expand_sse2_absvxsi2 (operands[0], operands[1]);
+  else if (TARGET_SSSE3)
+    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+    gen_rtx_ABS (<MODE>mode, operands[1])));
+  DONE;
+})
+
 (define_insn "abs<mode>2"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
  (abs:MMXMODEI
diff --git a/gcc/convert.c b/gcc/convert.c
index b07f0ef..8c60038 100644
--- a/gcc/convert.c
+++ b/gcc/convert.c
@@ -798,6 +798,20 @@ convert_to_integer (tree type, tree expr)
       ? TREE_OPERAND (expr, 2)
       : convert (type, TREE_OPERAND (expr, 2)));

+ case ABS_EXPR:
+  {
+    /* Convert (char) abs ((int) char_val) into abs (char_val).
+       Convert (short) abs ((int) short_val) into abs (short_val).  */
+    tree op = TREE_OPERAND (expr, 0);
+    if (optimize && CONVERT_EXPR_P (op))
+      {
+ tree op2 = TREE_OPERAND (op, 0);
+ if (TREE_TYPE (op2) == type)
+  return build1 (TREE_CODE (expr), type, op2);
+      }
+    break;
+  }
+
  default:
   break;
  }
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 075d071..cf5b942 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2013-10-22  Cong Hou  <co...@google.com>
+
+ PR target/58762
+ * gcc.dg/vect/pr58762.c: New test.
+
 2013-10-14  Tobias Burnus  <bur...@net-b.de>

  PR fortran/58658
diff --git a/gcc/testsuite/gcc.dg/vect/pr58762.c
b/gcc/testsuite/gcc.dg/vect/pr58762.c
new file mode 100644
index 0000000..4e62de6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr58762.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void test1 (char* a, char* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+void test2 (short* a, short* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+void test3 (int* a, int* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect"
+       { target i?86-*-* x86_64-*-* ia64-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8a38316..e0f33ee 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2013-10-22  Cong Hou  <co...@google.com>
+
+       PR target/58762
+       * convert.c (convert_to_integer): Convert (char) abs ((int) char_val)
+       into abs (char_val).  Also convert (short) abs ((int) short_val)
+       into abs (short_val).
+       * config/i386/i386-protos.h (ix86_expand_sse2_absvxsi2): New function.
+       * config/i386/i386.c (ix86_expand_sse2_absvxsi2): New function.
+       * config/i386/sse.md: Add SSE2 support to abs (char/int/short).
+
 2013-10-14  David Malcolm  <dmalc...@redhat.com>
 
        * dumpfile.h (gcc::dump_manager): New class, to hold state
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 3ab2f3a..e85f663 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -238,6 +238,7 @@ extern void ix86_expand_mul_widen_evenodd (rtx, rtx, rtx, 
bool, bool);
 extern void ix86_expand_mul_widen_hilo (rtx, rtx, rtx, bool, bool);
 extern void ix86_expand_sse2_mulv4si3 (rtx, rtx, rtx);
 extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx, rtx);
+extern void ix86_expand_sse2_absvxsi2 (rtx, rtx);
 
 /* In i386-c.c  */
 extern void ix86_target_macros (void);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 02cbbbd..8050e02 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -41696,6 +41696,53 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
                       gen_rtx_MULT (mode, op1, op2));
 }
 
+void
+ix86_expand_sse2_absvxsi2 (rtx op0, rtx op1)
+{
+  enum machine_mode mode = GET_MODE (op0);
+  rtx tmp0, tmp1;
+
+  switch (mode)
+    {
+      /* For 32-bit signed integer X, the best way to calculate the absolute
+        value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)).  */
+      case V4SImode:
+       tmp0 = expand_simple_binop (mode, ASHIFTRT, op1,
+                                   GEN_INT (GET_MODE_BITSIZE
+                                                (GET_MODE_INNER (mode)) - 1),
+                                   NULL, 0, OPTAB_DIRECT);
+       if (tmp0)
+         tmp1 = expand_simple_binop (mode, XOR, op1, tmp0,
+                                     NULL, 0, OPTAB_DIRECT);
+       if (tmp0 && tmp1)
+         expand_simple_binop (mode, MINUS, tmp1, tmp0,
+                              op0, 0, OPTAB_DIRECT);
+       break;
+
+      /* For 16-bit signed integer X, the best way to calculate the absolute
+        value of X is max (X, -X), as SSE2 provides the PMAXSW insn.  */
+      case V8HImode:
+       tmp0 = expand_unop (mode, neg_optab, op1, NULL_RTX, 0);
+       if (tmp0)
+         expand_simple_binop (mode, SMAX, op1, tmp0, op0, 0,
+                              OPTAB_DIRECT);
+       break;
+
+      /* For 8-bit signed integer X, the best way to calculate the absolute
+        value of X is min ((unsigned char) X, (unsigned char) (-X)),
+        as SSE2 provides the PMINUB insn.  */
+      case V16QImode:
+       tmp0 = expand_unop (mode, neg_optab, op1, NULL_RTX, 0);
+       if (tmp0)
+         expand_simple_binop (V16QImode, UMIN, op1, tmp0, op0, 0,
+                              OPTAB_DIRECT);
+       break;
+
+      default:
+       break;
+    }
+}
+
 /* Expand an insert into a vector register through pinsr insn.
    Return true if successful.  */
 
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c3f6c94..bd90f2d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8721,7 +8721,7 @@
    (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
    (set_attr "mode" "DI")])
 
-(define_insn "abs<mode>2"
+(define_insn "*abs<mode>2"
   [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
        (abs:VI124_AVX2_48_AVX512F
          (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
@@ -8733,6 +8733,20 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "abs<mode>2"
+  [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
+       (abs:VI124_AVX2_48_AVX512F
+         (match_operand:VI124_AVX2_48_AVX512F 1 "register_operand")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE2 && !TARGET_SSSE3)
+    ix86_expand_sse2_absvxsi2 (operands[0], operands[1]);
+  else if (TARGET_SSSE3)
+    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+                           gen_rtx_ABS (<MODE>mode, operands[1])));
+  DONE;
+})
+
 (define_insn "abs<mode>2"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
        (abs:MMXMODEI
diff --git a/gcc/convert.c b/gcc/convert.c
index b07f0ef..8c60038 100644
--- a/gcc/convert.c
+++ b/gcc/convert.c
@@ -798,6 +798,20 @@ convert_to_integer (tree type, tree expr)
                              ? TREE_OPERAND (expr, 2)
                              : convert (type, TREE_OPERAND (expr, 2)));
 
+       case ABS_EXPR:
+         {
+           /* Convert (char) abs ((int) char_val) into abs (char_val).
+              Convert (short) abs ((int) short_val) into abs (short_val).  */
+           tree op = TREE_OPERAND (expr, 0);
+           if (optimize && CONVERT_EXPR_P (op))
+             {
+               tree op2 = TREE_OPERAND (op, 0);
+               if (TREE_TYPE (op2) == type)
+                 return build1 (TREE_CODE (expr), type, op2);
+             }
+           break;
+         }
+
        default:
          break;
        }
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 075d071..cf5b942 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2013-10-22  Cong Hou  <co...@google.com>
+
+       PR target/58762
+       * gcc.dg/vect/pr58762.c: New test.
+
 2013-10-14  Tobias Burnus  <bur...@net-b.de>
 
        PR fortran/58658
diff --git a/gcc/testsuite/gcc.dg/vect/pr58762.c 
b/gcc/testsuite/gcc.dg/vect/pr58762.c
new file mode 100644
index 0000000..4e62de6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr58762.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void test1 (char* a, char* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+void test2 (short* a, short* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+void test3 (int* a, int* b)
+{
+  int i;
+  for (i = 0; i < 10000; ++i)
+    a[i] = abs (b[i]);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect"
+       { target i?86-*-* x86_64-*-* ia64-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

Reply via email to