match.pd/95424: Simplify 1 / X for integer X

This patch implements an optimization for the following C++ code:

int f(int x) {
    return 1 / x;
}

int f(unsigned int x) {
    return 1 / x;
}

Before this patch, x86-64 gcc -std=c++20 -O3 produces the following
assembly:

f(int):
    xor edx, edx
    mov eax, 1
    idiv edi
    ret
f(unsigned int):
    xor edx, edx
    mov eax, 1
    div edi
    ret

In comparison, clang++ -std=c++20 -O3 produces the following assembly:

f(int):
    lea ecx, [rdi + 1]
    xor eax, eax
    cmp ecx, 3
    cmovb eax, edi
    ret
f(unsigned int):
    xor eax, eax
    cmp edi, 1
    sete al
    ret

Clang's output is more efficient as it avoids expensive div operations.

With this patch, GCC now produces the following assembly:
f(int):
    lea eax, [rdi + 1]
    cmp eax, 2
    mov eax, 0
    cmovbe eax, edi
    ret
f(unsigned int):
    xor eax, eax
    cmp edi, 1
    sete al
    ret

which is virtualy identical to Clang's assembly output. Any slight
differences
in the output for f(int) is related to another possible missed optimization.

gcc/ChangeLog:

        * match.pd: Simplify 1 / X where X is an integer.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/divide-6.c: New test.
        * gcc.dg/tree-ssa/divide-7.c: New test.

diff --git a/gcc/match.pd b/gcc/match.pd
index 84c9b918041..5edae1818bb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -422,7 +422,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (div:C @0 (negate @0))
    (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
  && TYPE_OVERFLOW_UNDEFINED (type))
-    { build_minus_one_cst (type); })))
+    { build_minus_one_cst (type); }))
+
+ /* 1 / X -> X == 1 for unsigned integer X
+    1 / X -> X >= -1 && X <= 1 ? X : 0 for signed integer X
+    But not for 1 / 0 so that we can get proper warnings and errors. */
+ (simplify
+   (div integer_onep@0 @1)
+   (switch
+     (if (!integer_zerop (@1)
+          && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+          && TYPE_UNSIGNED (TREE_TYPE (@1)))
+      (eq @0 @1))
+     (if (!integer_zerop (@1)
+          && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+          && !TYPE_UNSIGNED (TREE_TYPE (@1)))
+      (cond (bit_and (ge @1 { build_minus_one_cst (integer_type_node); })
+                     (le @1 { build_one_cst (integer_type_node); }))
+        @1 { build_zero_cst (type); })))))

 /* For unsigned integral types, FLOOR_DIV_EXPR is the same as
    TRUNC_DIV_EXPR.  Rewrite into the latter in this case.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-6.c
b/gcc/testsuite/gcc.dg/tree-ssa/divide-6.c
new file mode 100644
index 00000000000..a9fc4c04058
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-6.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+unsigned int f(unsigned int x) {
+  return 1 / x;
+}
+
+/* { dg-final { scan-tree-dump-not "1 / x_..D.;" "optimized" } } */
+/* { dg-final { scan-tree-dump "x_..D. == 1;" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-7.c
b/gcc/testsuite/gcc.dg/tree-ssa/divide-7.c
new file mode 100644
index 00000000000..285279af7c2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-7.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+int f(int x) {
+  return 1 / x;
+}
+
+/* { dg-final { scan-tree-dump-not "1 / x_..D.;" "optimized" } } */
+/* { dg-final { scan-tree-dump ".. <= 2 ? x_..D. : 0;" "optimized" } } */

Reply via email to