This is patch #2.  It adds support for the new modulus instructions that are
being added in ISA 3.0 (power9):

I have built this patch (along with patches #3 and #4) with a bootstrap build
on a power8 little endian system.  There were no regressions in the test
suite.  Is this patch ok to install in the trunk once patch #1 has been
installed.

[gcc]
2015-11-08  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * config/rs6000/rs6000.c (rs6000_rtx_costs): Update costs for
        modulus instructions if we have hardware support.

        * config/rs6000/rs6000.md (mod<mode>3): Add support for ISA 3.0
        modulus instructions.
        (umod<mode>3): Likewise.
        (divmod peephole): Likewise.
        (udivmod peephole): Likewise.

[gcc/testsuite]
2015-11-08  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * lib/target-supports.exp (check_p9vector_hw_available): Add
        checks for power9 availability.
        (check_effective_target_powerpc_p9vector_ok): Likewise.
        (check_vect_support_and_set_flags): Likewise.

        * gcc.target/powerpc/mod-1.c: New test.
        * gcc.target/powerpc/mod-2.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/testsuite/gcc.target/powerpc/mod-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/mod-1.c    (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/mod-1.c    (revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O3" } */
+
+int ismod (int a, int b) { return a%b; }
+long lsmod (long a, long b) { return a%b; }
+unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
+unsigned long lumod (unsigned long a, unsigned long b) { return a%b; }
+
+/* { dg-final { scan-assembler-times "modsw " 1 } } */
+/* { dg-final { scan-assembler-times "modsd " 1 } } */
+/* { dg-final { scan-assembler-times "moduw " 1 } } */
+/* { dg-final { scan-assembler-times "modud " 1 } } */
+/* { dg-final { scan-assembler-not   "mullw "   } } */
+/* { dg-final { scan-assembler-not   "mulld "   } } */
+/* { dg-final { scan-assembler-not   "divw "    } } */
+/* { dg-final { scan-assembler-not   "divd "    } } */
+/* { dg-final { scan-assembler-not   "divwu "   } } */
+/* { dg-final { scan-assembler-not   "divdu "   } } */
Index: gcc/testsuite/gcc.target/powerpc/mod-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/mod-2.c    (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/mod-2.c    (revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O3" } */
+
+int ismod (int a, int b) { return a%b; }
+unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
+
+/* { dg-final { scan-assembler-times "modsw " 1 } } */
+/* { dg-final { scan-assembler-times "moduw " 1 } } */
+/* { dg-final { scan-assembler-not   "mullw "   } } */
+/* { dg-final { scan-assembler-not   "divw "    } } */
+/* { dg-final { scan-assembler-not   "divwu "   } } */
Index: gcc/testsuite/lib/target-supports.exp
===================================================================
--- gcc/testsuite/lib/target-supports.exp       (revision 229970)
+++ gcc/testsuite/lib/target-supports.exp       (working copy)
@@ -1635,6 +1635,30 @@ proc check_p8vector_hw_available { } {
     }]
 }
 
+# Return 1 if the target supports executing power9 vector instructions, 0
+# otherwise.  Cache the result.
+
+proc check_p9vector_hw_available { } {
+    return [check_cached_effective_target p9vector_hw_available {
+       # Some simulators are known to not support VSX/power8 instructions.
+       # For now, disable on Darwin
+       if { [istarget powerpc-*-eabi] || [istarget powerpc*-*-eabispe] || 
[istarget *-*-darwin*]} {
+           expr 0
+       } else {
+           set options "-mpower9-vector"
+           check_runtime_nocache p9vector_hw_available {
+               int main()
+               {
+                 long e = -1;
+                 vector double v = (vector double) { 0.0, 0.0 };
+                 asm ("xsxexpdp %0,%1" : "+r" (e) : "wa" (v));
+                 return e;
+               }
+           } $options
+       }
+    }]
+}
+
 # Return 1 if the target supports executing VSX instructions, 0
 # otherwise.  Cache the result.
 
@@ -3358,6 +3382,31 @@ proc check_effective_target_powerpc_p8ve
     }
 }
 
+# Return 1 if this is a PowerPC target supporting -mpower9-vector
+
+proc check_effective_target_powerpc_p9vector_ok { } {
+    if { ([istarget powerpc*-*-*]
+         && ![istarget powerpc-*-linux*paired*])
+        || [istarget rs6000-*-*] } {
+       # AltiVec is not supported on AIX before 5.3.
+       if { [istarget powerpc*-*-aix4*]
+            || [istarget powerpc*-*-aix5.1*] 
+            || [istarget powerpc*-*-aix5.2*] } {
+           return 0
+       }
+       return [check_no_compiler_messages powerpc_p9vector_ok object {
+           int main (void) {
+               long e = -1;
+               vector double v = (vector double) { 0.0, 0.0 };
+               asm ("xsxexpdp %0,%1" : "+r" (e) : "wa" (v));
+               return e;
+           }
+       } "-mpower9-vector"]
+    } else {
+       return 0
+    }
+}
+
 # Return 1 if this is a PowerPC target supporting -mvsx
 
 proc check_effective_target_powerpc_vsx_ok { } {
@@ -5459,6 +5508,7 @@ proc is-effective-target { arg } {
          "vmx_hw"         { set selected [check_vmx_hw_available] }
          "vsx_hw"         { set selected [check_vsx_hw_available] }
          "p8vector_hw"    { set selected [check_p8vector_hw_available] }
+         "p9vector_hw"    { set selected [check_p9vector_hw_available] }
          "ppc_recip_hw"   { set selected [check_ppc_recip_hw_available] }
          "dfp_hw"         { set selected [check_dfp_hw_available] }
          "htm_hw"         { set selected [check_htm_hw_available] }
@@ -5483,6 +5533,7 @@ proc is-effective-target-keyword { arg }
          "vmx_hw"         { return 1 }
          "vsx_hw"         { return 1 }
          "p8vector_hw"    { return 1 }
+         "p9vector_hw"    { return 1 }
          "ppc_recip_hw"   { return 1 }
          "dfp_hw"         { return 1 }
          "htm_hw"         { return 1 }
@@ -6186,7 +6237,9 @@ proc check_vect_support_and_set_flags { 
         }
 
         lappend DEFAULT_VECTCFLAGS "-maltivec"
-        if [check_p8vector_hw_available] {
+        if [check_p9vector_hw_available] {
+            lappend DEFAULT_VECTCFLAGS "-mpower9-vector"
+        } elseif [check_p8vector_hw_available] {
             lappend DEFAULT_VECTCFLAGS "-mpower8-vector"
         } elseif [check_vsx_hw_available] {
             lappend DEFAULT_VECTCFLAGS "-mvsx" "-mno-allow-movmisalign"
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 229972)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -31844,8 +31844,8 @@ rs6000_rtx_costs (rtx x, machine_mode mo
          else
            *total = rs6000_cost->divsi;
        }
-      /* Add in shift and subtract for MOD. */
-      if (code == MOD || code == UMOD)
+      /* Add in shift and subtract for MOD unless we have a mod instruction. */
+      if (!TARGET_MODULO && (code == MOD || code == UMOD))
        *total += COSTS_N_INSNS (2);
       return false;
 
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 229972)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -2885,9 +2885,9 @@ (define_insn_and_split "*div<mode>3_sra_
    (set_attr "cell_micro" "not")])
 
 (define_expand "mod<mode>3"
-  [(use (match_operand:GPR 0 "gpc_reg_operand" ""))
-   (use (match_operand:GPR 1 "gpc_reg_operand" ""))
-   (use (match_operand:GPR 2 "reg_or_cint_operand" ""))]
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+       (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+                (match_operand:GPR 2 "reg_or_cint_operand" "")))]
   ""
 {
   int i;
@@ -2897,16 +2897,93 @@ (define_expand "mod<mode>3"
   if (GET_CODE (operands[2]) != CONST_INT
       || INTVAL (operands[2]) <= 0
       || (i = exact_log2 (INTVAL (operands[2]))) < 0)
-    FAIL;
+    {
+      if (!TARGET_MODULO)
+       FAIL;
 
-  temp1 = gen_reg_rtx (<MODE>mode);
-  temp2 = gen_reg_rtx (<MODE>mode);
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+    }
+  else
+    {
+      temp1 = gen_reg_rtx (<MODE>mode);
+      temp2 = gen_reg_rtx (<MODE>mode);
 
-  emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
-  emit_insn (gen_ashl<mode>3 (temp2, temp1, GEN_INT (i)));
-  emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
-  DONE;
+      emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
+      emit_insn (gen_ashl<mode>3 (temp2, temp1, GEN_INT (i)));
+      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
+      DONE;
+    }
 })
+
+;; In order to enable using a peephole2 for combining div/mod to eliminate the
+;; mod, prefer putting the result of mod into a different register
+(define_insn "*mod<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r")
+        (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+                (match_operand:GPR 2 "gpc_reg_operand" "r")))]
+  "TARGET_MODULO"
+  "mods<wd> %0,%1,%2"
+  [(set_attr "type" "div")
+   (set_attr "size" "<bits>")])
+
+
+(define_insn "umod<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r")
+        (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+                 (match_operand:GPR 2 "gpc_reg_operand" "r")))]
+  "TARGET_MODULO"
+  "modu<wd> %0,%1,%2"
+  [(set_attr "type" "div")
+   (set_attr "size" "<bits>")])
+
+;; On machines with modulo support, do a combined div/mod the old fashioned
+;; method, since the multiply/subtract is faster than doing the mod instruction
+;; after a divide.
+
+(define_peephole2
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+       (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+                (match_operand:GPR 2 "gpc_reg_operand" "")))
+   (set (match_operand:GPR 3 "gpc_reg_operand" "")
+       (mod:GPR (match_dup 1)
+                (match_dup 2)))]
+  "TARGET_MODULO
+   && ! reg_mentioned_p (operands[0], operands[1])
+   && ! reg_mentioned_p (operands[0], operands[2])
+   && ! reg_mentioned_p (operands[3], operands[1])
+   && ! reg_mentioned_p (operands[3], operands[2])"
+  [(set (match_dup 0)
+       (div:GPR (match_dup 1)
+                (match_dup 2)))
+   (set (match_dup 3)
+       (mult:GPR (match_dup 0)
+                 (match_dup 2)))
+   (set (match_dup 3)
+       (minus:GPR (match_dup 1)
+                  (match_dup 3)))])
+
+(define_peephole2
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "")
+       (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "")
+                 (match_operand:GPR 2 "gpc_reg_operand" "")))
+   (set (match_operand:GPR 3 "gpc_reg_operand" "")
+       (umod:GPR (match_dup 1)
+                 (match_dup 2)))]
+  "TARGET_MODULO
+   && ! reg_mentioned_p (operands[0], operands[1])
+   && ! reg_mentioned_p (operands[0], operands[2])
+   && ! reg_mentioned_p (operands[3], operands[1])
+   && ! reg_mentioned_p (operands[3], operands[2])"
+  [(set (match_dup 0)
+       (div:GPR (match_dup 1)
+                (match_dup 2)))
+   (set (match_dup 3)
+       (mult:GPR (match_dup 0)
+                 (match_dup 2)))
+   (set (match_dup 3)
+       (minus:GPR (match_dup 1)
+                  (match_dup 3)))])
+
 
 ;; Logical instructions
 ;; The logical instructions are mostly combined by using match_operator,

Reply via email to