This patch adds support for the EXTSWSLI instruction that is being added to PowerPC ISA 3.0 (power9).
I have built this patch (along with patches #2 and #3) with a bootstrap build on a power8 little endian system. There were no regressions in the test suite. Is this patch ok to install in the trunk once patch #1 has been installed. [gcc] 2015-11-08 Michael Meissner <meiss...@linux.vnet.ibm.com> * config/rs6000/predicates.md (u6bit_cint_operand): New predicate, recognize 0..63. * config/rs6000/rs6000.c (rs6000_rtx_costs): Adjust the costs if the EXTSWSLI instruction is generated. * config/rs6000/rs6000.h (TARGET_EXTSWSLI): Add support for ISA 3.0 EXTSWSLI instruction. * config/rs6000/rs6000.md (ashdi3_extswsli): Likewise. (ashdi3_extswsli_dot): Likewise. (ashdi3_extswsli_dot2): Likewise. [gcc/testsuite] 2015-11-08 Michael Meissner <meiss...@linux.vnet.ibm.com> * gcc.target/powerpc/extswsli-1.c: New file to test EXTSWSLI instruction generation. * gcc.target/powerpc/extswsli-2.c: Likewise. * gcc.target/powerpc/extswsli-3.c: Likewise. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/predicates.md =================================================================== --- gcc/config/rs6000/predicates.md (revision 229970) +++ gcc/config/rs6000/predicates.md (working copy) @@ -142,6 +142,11 @@ (define_predicate "u5bit_cint_operand" (and (match_code "const_int") (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 31"))) +;; Return 1 if op is a unsigned 6-bit constant integer. +(define_predicate "u6bit_cint_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 63"))) + ;; Return 1 if op is a signed 8-bit constant integer. ;; Integer multiplication complete more quickly (define_predicate "s8bit_cint_operand" Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 229974) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -31927,6 +31927,17 @@ rs6000_rtx_costs (rtx x, machine_mode mo return false; case ASHIFT: + /* The EXTSWSLI instruction is a combined instruction. Don't count both + the sign extend and shift separately within the insn. */ + if (TARGET_EXTSWSLI && mode == DImode + && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND + && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode) + { + *total = 0; + return false; + } + /* fall through */ + case ASHIFTRT: case LSHIFTRT: case ROTATE: Index: gcc/config/rs6000/rs6000.h =================================================================== --- gcc/config/rs6000/rs6000.h (revision 229974) +++ gcc/config/rs6000/rs6000.h (working copy) @@ -566,6 +566,7 @@ extern int rs6000_vector_align[]; #define TARGET_FCTIDUZ TARGET_POPCNTD #define TARGET_FCTIWUZ TARGET_POPCNTD #define TARGET_CTZ TARGET_MODULO +#define TARGET_EXTSWSLI (TARGET_MODULO && TARGET_POWERPC64) #define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) #define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (revision 229974) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -3933,6 +3933,127 @@ (define_insn_and_split "*ashl<mode>3_dot (set_attr "dot" "yes") (set_attr "length" "4,8")]) +;; Pretend we have a memory form of extswsli until register allocation is done +;; so that we use LWZ to load the value from memory, instead of LWA. +(define_insn_and_split "ashdi3_extswsli" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (ashift:DI + (sign_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "r,m")) + (match_operand:DI 2 "u6bit_cint_operand" "n,n")))] + "TARGET_EXTSWSLI" + "@ + extswsli %0,%1,%2 + #" + "&& reload_completed && MEM_P (operands[1])" + [(set (match_dup 3) + (match_dup 1)) + (set (match_dup 0) + (ashift:DI (sign_extend:DI (match_dup 3)) + (match_dup 2)))] +{ + operands[3] = gen_lowpart (SImode, operands[0]); +} + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "no")]) + + +(define_insn_and_split "*ashdi3_extswsli_dot" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y,?x,??y") + (compare:CC + (ashift:DI + (sign_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "r,r,m,m")) + (match_operand:DI 2 "u6bit_cint_operand" "n,n,n,n")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r,r,r,r"))] + "TARGET_EXTSWSLI" + "@ + extswsli. %0,%1,%2 + # + # + #" + "&& reload_completed + && (cc_reg_not_cr0_operand (operands[3], CCmode) + || memory_operand (operands[1], SImode))" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx shift = operands[2]; + rtx cr = operands[3]; + rtx src2; + + if (!MEM_P (src)) + src2 = src; + else + { + src2 = gen_lowpart (SImode, dest); + emit_move_insn (src2, src); + } + + if (REGNO (cr) == CR0_REGNO) + { + emit_insn (gen_ashdi3_extswsli_dot2 (dest, src2, shift, cr)); + DONE; + } + + emit_insn (gen_ashdi3_extswsli (dest, src2, shift)); + emit_insn (gen_rtx_SET (cr, gen_rtx_COMPARE (CCmode, dest, const0_rtx))); + DONE; +} + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "no") + (set_attr "dot" "yes") + (set_attr "length" "4,8,8,12")]) + +(define_insn_and_split "ashdi3_extswsli_dot2" + [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y,?x,??y") + (compare:CC + (ashift:DI + (sign_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "r,r,m,m")) + (match_operand:DI 2 "u6bit_cint_operand" "n,n,n,n")) + (const_int 0))) + (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r") + (ashift:DI (sign_extend:DI (match_dup 1)) + (match_dup 2)))] + "TARGET_EXTSWSLI" + "@ + extswsli. %0,%1,%2 + # + # + #" + "&& reload_completed + && (cc_reg_not_cr0_operand (operands[3], CCmode) + || memory_operand (operands[1], SImode))" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx shift = operands[2]; + rtx cr = operands[3]; + rtx src2; + + if (!MEM_P (src)) + src2 = src; + else + { + src2 = gen_lowpart (SImode, dest); + emit_move_insn (src2, src); + } + + if (REGNO (cr) == CR0_REGNO) + { + emit_insn (gen_ashdi3_extswsli_dot2 (dest, src2, shift, cr)); + DONE; + } + + emit_insn (gen_ashdi3_extswsli (dest, src2, shift)); + emit_insn (gen_rtx_SET (cr, gen_rtx_COMPARE (CCmode, dest, const0_rtx))); + DONE; +} + [(set_attr "type" "shift") + (set_attr "maybe_var_shift" "no") + (set_attr "dot" "yes") + (set_attr "length" "4,8,8,12")]) (define_insn "lshr<mode>3" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") Index: gcc/testsuite/gcc.target/powerpc/extswsli-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/extswsli-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/extswsli-1.c (revision 0) @@ -0,0 +1,21 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O3" } */ + +static int mem; +int *ptr = &mem; + +long +add (long *p, int reg) +{ + __asm__ (" #foo %0" : "+r" (reg)); + return p[reg] + p[mem]; +} + +/* { dg-final { scan-assembler-times "extswsli " 2 } } */ +/* { dg-final { scan-assembler-times "lwz " 1 } } */ +/* { dg-final { scan-assembler-not "lwa " } } */ +/* { dg-final { scan-assembler-not "sldi " } } */ +/* { dg-final { scan-assembler-not "extsw " } } */ Index: gcc/testsuite/gcc.target/powerpc/extswsli-2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/extswsli-2.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/extswsli-2.c (revision 0) @@ -0,0 +1,38 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O3" } */ + +long +func1 (int reg, int *is_zero) +{ + long value; + + __asm__ (" #foo %0" : "+r" (reg)); + value = ((long)reg) << 4; + + if (!value) + *is_zero = 1; + + return value; +} + +long +func2 (int *ptr, int *is_zero) +{ + int reg = *ptr; + long value = ((long)reg) << 4; + + if (!value) + *is_zero = 1; + + return value; +} + +/* { dg-final { scan-assembler "extswsli\\. " } } */ +/* { dg-final { scan-assembler "lwz " } } */ +/* { dg-final { scan-assembler-not "lwa " } } */ +/* { dg-final { scan-assembler-not "sldi " } } */ +/* { dg-final { scan-assembler-not "sldi\\. " } } */ +/* { dg-final { scan-assembler-not "extsw " } } */ Index: gcc/testsuite/gcc.target/powerpc/extswsli-3.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/extswsli-3.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/extswsli-3.c (revision 0) @@ -0,0 +1,23 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O3" } */ + +long +do_ext_add (int *p, long a, long b) +{ + long l = *p; + long l2 = l << 4; + return l2 + ((l2 == 0) ? a : b); +} + +long +do_ext (int *p, long a, long b) +{ + long l = *p; + long l2 = l << 4; + return ((l2 == 0) ? a : b); +} + +/* { dg-final { scan-assembler "extswsli\\. "} } */