This is patch #2. It adds support for the new modulus instructions that are being added in ISA 3.0 (power9):
I have built this patch (along with patches #3 and #4) with a bootstrap build on a power8 little endian system. There were no regressions in the test suite. Is this patch ok to install in the trunk once patch #1 has been installed. [gcc] 2015-11-08 Michael Meissner <meiss...@linux.vnet.ibm.com> * config/rs6000/rs6000.c (rs6000_rtx_costs): Update costs for modulus instructions if we have hardware support. * config/rs6000/rs6000.md (mod<mode>3): Add support for ISA 3.0 modulus instructions. (umod<mode>3): Likewise. (divmod peephole): Likewise. (udivmod peephole): Likewise. [gcc/testsuite] 2015-11-08 Michael Meissner <meiss...@linux.vnet.ibm.com> * lib/target-supports.exp (check_p9vector_hw_available): Add checks for power9 availability. (check_effective_target_powerpc_p9vector_ok): Likewise. (check_vect_support_and_set_flags): Likewise. * gcc.target/powerpc/mod-1.c: New test. * gcc.target/powerpc/mod-2.c: Likewise. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/testsuite/gcc.target/powerpc/mod-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/mod-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/mod-1.c (revision 0) @@ -0,0 +1,21 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O3" } */ + +int ismod (int a, int b) { return a%b; } +long lsmod (long a, long b) { return a%b; } +unsigned int iumod (unsigned int a, unsigned int b) { return a%b; } +unsigned long lumod (unsigned long a, unsigned long b) { return a%b; } + +/* { dg-final { scan-assembler-times "modsw " 1 } } */ +/* { dg-final { scan-assembler-times "modsd " 1 } } */ +/* { dg-final { scan-assembler-times "moduw " 1 } } */ +/* { dg-final { scan-assembler-times "modud " 1 } } */ +/* { dg-final { scan-assembler-not "mullw " } } */ +/* { dg-final { scan-assembler-not "mulld " } } */ +/* { dg-final { scan-assembler-not "divw " } } */ +/* { dg-final { scan-assembler-not "divd " } } */ +/* { dg-final { scan-assembler-not "divwu " } } */ +/* { dg-final { scan-assembler-not "divdu " } } */ Index: gcc/testsuite/gcc.target/powerpc/mod-2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/mod-2.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/mod-2.c (revision 0) @@ -0,0 +1,14 @@ +/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O3" } */ + +int ismod (int a, int b) { return a%b; } +unsigned int iumod (unsigned int a, unsigned int b) { return a%b; } + +/* { dg-final { scan-assembler-times "modsw " 1 } } */ +/* { dg-final { scan-assembler-times "moduw " 1 } } */ +/* { dg-final { scan-assembler-not "mullw " } } */ +/* { dg-final { scan-assembler-not "divw " } } */ +/* { dg-final { scan-assembler-not "divwu " } } */ Index: gcc/testsuite/lib/target-supports.exp =================================================================== --- gcc/testsuite/lib/target-supports.exp (revision 229970) +++ gcc/testsuite/lib/target-supports.exp (working copy) @@ -1635,6 +1635,30 @@ proc check_p8vector_hw_available { } { }] } +# Return 1 if the target supports executing power9 vector instructions, 0 +# otherwise. Cache the result. + +proc check_p9vector_hw_available { } { + return [check_cached_effective_target p9vector_hw_available { + # Some simulators are known to not support VSX/power8 instructions. + # For now, disable on Darwin + if { [istarget powerpc-*-eabi] || [istarget powerpc*-*-eabispe] || [istarget *-*-darwin*]} { + expr 0 + } else { + set options "-mpower9-vector" + check_runtime_nocache p9vector_hw_available { + int main() + { + long e = -1; + vector double v = (vector double) { 0.0, 0.0 }; + asm ("xsxexpdp %0,%1" : "+r" (e) : "wa" (v)); + return e; + } + } $options + } + }] +} + # Return 1 if the target supports executing VSX instructions, 0 # otherwise. Cache the result. @@ -3358,6 +3382,31 @@ proc check_effective_target_powerpc_p8ve } } +# Return 1 if this is a PowerPC target supporting -mpower9-vector + +proc check_effective_target_powerpc_p9vector_ok { } { + if { ([istarget powerpc*-*-*] + && ![istarget powerpc-*-linux*paired*]) + || [istarget rs6000-*-*] } { + # AltiVec is not supported on AIX before 5.3. + if { [istarget powerpc*-*-aix4*] + || [istarget powerpc*-*-aix5.1*] + || [istarget powerpc*-*-aix5.2*] } { + return 0 + } + return [check_no_compiler_messages powerpc_p9vector_ok object { + int main (void) { + long e = -1; + vector double v = (vector double) { 0.0, 0.0 }; + asm ("xsxexpdp %0,%1" : "+r" (e) : "wa" (v)); + return e; + } + } "-mpower9-vector"] + } else { + return 0 + } +} + # Return 1 if this is a PowerPC target supporting -mvsx proc check_effective_target_powerpc_vsx_ok { } { @@ -5459,6 +5508,7 @@ proc is-effective-target { arg } { "vmx_hw" { set selected [check_vmx_hw_available] } "vsx_hw" { set selected [check_vsx_hw_available] } "p8vector_hw" { set selected [check_p8vector_hw_available] } + "p9vector_hw" { set selected [check_p9vector_hw_available] } "ppc_recip_hw" { set selected [check_ppc_recip_hw_available] } "dfp_hw" { set selected [check_dfp_hw_available] } "htm_hw" { set selected [check_htm_hw_available] } @@ -5483,6 +5533,7 @@ proc is-effective-target-keyword { arg } "vmx_hw" { return 1 } "vsx_hw" { return 1 } "p8vector_hw" { return 1 } + "p9vector_hw" { return 1 } "ppc_recip_hw" { return 1 } "dfp_hw" { return 1 } "htm_hw" { return 1 } @@ -6186,7 +6237,9 @@ proc check_vect_support_and_set_flags { } lappend DEFAULT_VECTCFLAGS "-maltivec" - if [check_p8vector_hw_available] { + if [check_p9vector_hw_available] { + lappend DEFAULT_VECTCFLAGS "-mpower9-vector" + } elseif [check_p8vector_hw_available] { lappend DEFAULT_VECTCFLAGS "-mpower8-vector" } elseif [check_vsx_hw_available] { lappend DEFAULT_VECTCFLAGS "-mvsx" "-mno-allow-movmisalign" Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 229972) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -31844,8 +31844,8 @@ rs6000_rtx_costs (rtx x, machine_mode mo else *total = rs6000_cost->divsi; } - /* Add in shift and subtract for MOD. */ - if (code == MOD || code == UMOD) + /* Add in shift and subtract for MOD unless we have a mod instruction. */ + if (!TARGET_MODULO && (code == MOD || code == UMOD)) *total += COSTS_N_INSNS (2); return false; Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (revision 229972) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -2885,9 +2885,9 @@ (define_insn_and_split "*div<mode>3_sra_ (set_attr "cell_micro" "not")]) (define_expand "mod<mode>3" - [(use (match_operand:GPR 0 "gpc_reg_operand" "")) - (use (match_operand:GPR 1 "gpc_reg_operand" "")) - (use (match_operand:GPR 2 "reg_or_cint_operand" ""))] + [(set (match_operand:GPR 0 "gpc_reg_operand" "") + (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "") + (match_operand:GPR 2 "reg_or_cint_operand" "")))] "" { int i; @@ -2897,16 +2897,93 @@ (define_expand "mod<mode>3" if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) <= 0 || (i = exact_log2 (INTVAL (operands[2]))) < 0) - FAIL; + { + if (!TARGET_MODULO) + FAIL; - temp1 = gen_reg_rtx (<MODE>mode); - temp2 = gen_reg_rtx (<MODE>mode); + operands[2] = force_reg (<MODE>mode, operands[2]); + } + else + { + temp1 = gen_reg_rtx (<MODE>mode); + temp2 = gen_reg_rtx (<MODE>mode); - emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2])); - emit_insn (gen_ashl<mode>3 (temp2, temp1, GEN_INT (i))); - emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2)); - DONE; + emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2])); + emit_insn (gen_ashl<mode>3 (temp2, temp1, GEN_INT (i))); + emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2)); + DONE; + } }) + +;; In order to enable using a peephole2 for combining div/mod to eliminate the +;; mod, prefer putting the result of mod into a different register +(define_insn "*mod<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r") + (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")))] + "TARGET_MODULO" + "mods<wd> %0,%1,%2" + [(set_attr "type" "div") + (set_attr "size" "<bits>")]) + + +(define_insn "umod<mode>3" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r") + (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")))] + "TARGET_MODULO" + "modu<wd> %0,%1,%2" + [(set_attr "type" "div") + (set_attr "size" "<bits>")]) + +;; On machines with modulo support, do a combined div/mod the old fashioned +;; method, since the multiply/subtract is faster than doing the mod instruction +;; after a divide. + +(define_peephole2 + [(set (match_operand:GPR 0 "gpc_reg_operand" "") + (div:GPR (match_operand:GPR 1 "gpc_reg_operand" "") + (match_operand:GPR 2 "gpc_reg_operand" ""))) + (set (match_operand:GPR 3 "gpc_reg_operand" "") + (mod:GPR (match_dup 1) + (match_dup 2)))] + "TARGET_MODULO + && ! reg_mentioned_p (operands[0], operands[1]) + && ! reg_mentioned_p (operands[0], operands[2]) + && ! reg_mentioned_p (operands[3], operands[1]) + && ! reg_mentioned_p (operands[3], operands[2])" + [(set (match_dup 0) + (div:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (mult:GPR (match_dup 0) + (match_dup 2))) + (set (match_dup 3) + (minus:GPR (match_dup 1) + (match_dup 3)))]) + +(define_peephole2 + [(set (match_operand:GPR 0 "gpc_reg_operand" "") + (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "") + (match_operand:GPR 2 "gpc_reg_operand" ""))) + (set (match_operand:GPR 3 "gpc_reg_operand" "") + (umod:GPR (match_dup 1) + (match_dup 2)))] + "TARGET_MODULO + && ! reg_mentioned_p (operands[0], operands[1]) + && ! reg_mentioned_p (operands[0], operands[2]) + && ! reg_mentioned_p (operands[3], operands[1]) + && ! reg_mentioned_p (operands[3], operands[2])" + [(set (match_dup 0) + (div:GPR (match_dup 1) + (match_dup 2))) + (set (match_dup 3) + (mult:GPR (match_dup 0) + (match_dup 2))) + (set (match_dup 3) + (minus:GPR (match_dup 1) + (match_dup 3)))]) + ;; Logical instructions ;; The logical instructions are mostly combined by using match_operator,