Of course it would help, if I actually attached the patches: -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 241924) +++ gcc/config/rs6000/rs6000.md (.../gcc/config/rs6000) (working copy) @@ -325,6 +325,9 @@ (define_mode_iterator INT [QI HI SI DI T ; Any supported integer mode that fits in one register. (define_mode_iterator INT1 [QI HI SI (DI "TARGET_POWERPC64")]) +; Integer modes supported in VSX registers with ISA 3.0 instructions +(define_mode_iterator INT_ISA3 [QI HI SI DI]) + ; Everything we can extend QImode to. (define_mode_iterator EXTQI [SI (DI "TARGET_POWERPC64")]) @@ -334,7 +337,7 @@ (define_mode_iterator EXTHI [SI (DI "TAR ; Everything we can extend SImode to. (define_mode_iterator EXTSI [(DI "TARGET_POWERPC64")]) -; QImode or HImode for small atomic ops +; QImode or HImode for small integer moves and small atomic ops (define_mode_iterator QHI [QI HI]) ; QImode, HImode, SImode for fused ops only for GPR loads @@ -735,13 +738,15 @@ (define_code_attr SMINMAX [(smin "SM ;; complex forms. Basic data transfer is done later. (define_insn "zero_extendqi<mode>2" - [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,r") - (zero_extend:EXTQI (match_operand:QI 1 "reg_or_mem_operand" "m,r")))] + [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,r,?*wJwK,?*wK") + (zero_extend:EXTQI (match_operand:QI 1 "reg_or_mem_operand" "m,r,Z,*wK")))] "" "@ lbz%U1%X1 %0,%1 - rlwinm %0,%1,0,0xff" - [(set_attr "type" "load,shift")]) + rlwinm %0,%1,0,0xff + lxsibzx %x0,%y1 + vextractub %0,%1,7" + [(set_attr "type" "load,shift,fpload,vecperm")]) (define_insn_and_split "*zero_extendqi<mode>2_dot" [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") @@ -786,13 +791,15 @@ (define_insn_and_split "*zero_extendqi<m (define_insn "zero_extendhi<mode>2" - [(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r,r") - (zero_extend:EXTHI (match_operand:HI 1 "reg_or_mem_operand" "m,r")))] + [(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r,r,?*wJwK,?*wK") + (zero_extend:EXTHI (match_operand:HI 1 "reg_or_mem_operand" "m,r,Z,wK")))] "" "@ lhz%U1%X1 %0,%1 - rlwinm %0,%1,0,0xffff" - [(set_attr "type" "load,shift")]) + rlwinm %0,%1,0,0xffff + lxsihzx %x0,%y1 + vextractuh %0,%1,6" + [(set_attr "type" "load,shift,fpload,vecperm")]) (define_insn_and_split "*zero_extendhi<mode>2_dot" [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") @@ -893,11 +900,13 @@ (define_insn_and_split "*zero_extendsi<m (define_insn "extendqi<mode>2" - [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r") - (sign_extend:EXTQI (match_operand:QI 1 "gpc_reg_operand" "r")))] + [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*wK") + (sign_extend:EXTQI (match_operand:QI 1 "gpc_reg_operand" "r,?*wK")))] "" - "extsb %0,%1" - [(set_attr "type" "exts")]) + "@ + extsb %0,%1 + vextsb2d %0,%1" + [(set_attr "type" "exts,vecperm")]) (define_insn_and_split "*extendqi<mode>2_dot" [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y") @@ -948,14 +957,30 @@ (define_expand "extendhi<mode>2" "") (define_insn "*extendhi<mode>2" - [(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r,r") - (sign_extend:EXTHI (match_operand:HI 1 "reg_or_mem_operand" "m,r")))] + [(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r,r,?*wK,?*wK") + (sign_extend:EXTHI (match_operand:HI 1 "reg_or_mem_operand" "m,r,Z,wK")))] "rs6000_gen_cell_microcode" "@ lha%U1%X1 %0,%1 - extsh %0,%1" - [(set_attr "type" "load,exts") - (set_attr "sign_extend" "yes")]) + extsh %0,%1 + # + vextsh2d %0,%1" + [(set_attr "type" "load,exts,fpload,vecperm") + (set_attr "sign_extend" "yes") + (set_attr "length" "4,4,8,4")]) + +(define_split + [(set (match_operand:EXTHI 0 "altivec_register_operand" "") + (sign_extend:EXTHI + (match_operand:HI 1 "indexed_or_indirect_operand" "")))] + "TARGET_P9_VECTOR && reload_completed" + [(set (match_dup 2) + (match_dup 1)) + (set (match_dup 0) + (sign_extend:EXTHI (match_dup 2)))] +{ + operands[2] = gen_rtx_REG (HImode, REGNO (operands[1])); +}) (define_insn "*extendhi<mode>2_noload" [(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r") @@ -5299,30 +5324,33 @@ (define_insn_and_split "*floatunssidf2_i (set_attr "type" "fp")]) ;; ISA 3.0 adds instructions lxsi[bh]zx to directly load QImode and HImode to -;; vector registers. At the moment, QI/HImode are not allowed in floating -;; point or vector registers, so we use UNSPEC's to use the load byte and -;; half-word instructions. +;; vector registers. These insns favor doing the sign/zero extension in +;; the vector registers, rather then loading up a GPR, doing a sign/zero +;; extension and then a direct move. (define_expand "float<QHI:mode><FP_ISA3:mode>2" [(parallel [(set (match_operand:FP_ISA3 0 "vsx_register_operand") (float:FP_ISA3 (match_operand:QHI 1 "input_operand"))) (clobber (match_scratch:DI 2)) - (clobber (match_scratch:DI 3))])] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" + (clobber (match_scratch:DI 3)) + (clobber (match_scratch:<QHI:MODE> 4))])] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 + && TARGET_VSX_SMALL_INTEGER" { if (MEM_P (operands[1])) operands[1] = rs6000_address_for_fpconvert (operands[1]); }) (define_insn_and_split "*float<QHI:mode><FP_ISA3:mode>2_internal" - [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "=<Fv>,<Fv>") + [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "=<Fv>,<Fv>,<Fv>") (float:FP_ISA3 - (match_operand:QHI 1 "reg_or_indexed_operand" "r,Z"))) - (clobber (match_scratch:DI 2 "=wi,v")) - (clobber (match_scratch:DI 3 "=r,X"))] + (match_operand:QHI 1 "reg_or_indexed_operand" "wK,r,Z"))) + (clobber (match_scratch:DI 2 "=wK,wi,wK")) + (clobber (match_scratch:DI 3 "=X,r,X")) + (clobber (match_scratch:<QHI:MODE> 4 "=X,X,wK"))] "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 - && TARGET_UPPER_REGS_DI" + && TARGET_UPPER_REGS_DI && TARGET_VSX_SMALL_INTEGER" "#" "&& reload_completed" [(const_int 0)] @@ -5333,26 +5361,20 @@ (define_insn_and_split "*float<QHI:mode> if (!MEM_P (input)) { - rtx tmp = operands[3]; - emit_insn (gen_extend<QHI:mode>di2 (tmp, input)); - emit_move_insn (di, tmp); + if (altivec_register_operand (input, <QHI:MODE>mode)) + emit_insn (gen_extend<QHI:mode>di2 (di, input)); + else + { + rtx tmp = operands[3]; + emit_insn (gen_extend<QHI:mode>di2 (tmp, input)); + emit_move_insn (di, tmp); + } } else { - machine_mode vmode; - rtx di_vector; - - emit_insn (gen_p9_lxsi<QHI:wd>zx (di, input)); - - if (<MODE>mode == QImode) - vmode = V16QImode; - else if (<MODE>mode == HImode) - vmode = V8HImode; - else - gcc_unreachable (); - - di_vector = gen_rtx_REG (vmode, REGNO (di)); - emit_insn (gen_vsx_sign_extend_<QHI:mode>_di (di, di_vector)); + rtx tmp = operands[4]; + emit_move_insn (tmp, input); + emit_insn (gen_extend<QHI:mode>di2 (di, tmp)); } emit_insn (gen_floatdi<FP_ISA3:mode>2 (result, di)); @@ -5365,19 +5387,21 @@ (define_expand "floatuns<QHI:mode><FP_IS (match_operand:QHI 1 "input_operand" ""))) (clobber (match_scratch:DI 2 "")) (clobber (match_scratch:DI 3 ""))])] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 + && TARGET_VSX_SMALL_INTEGER" { if (MEM_P (operands[1])) operands[1] = rs6000_address_for_fpconvert (operands[1]); }) (define_insn_and_split "*floatuns<QHI:mode><FP_ISA3:mode>2_internal" - [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "=<Fv>,<Fv>") + [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "=<Fv>,<Fv>,<Fv>") (unsigned_float:FP_ISA3 - (match_operand:QHI 1 "reg_or_indexed_operand" "r,Z"))) - (clobber (match_scratch:DI 2 "=wi,wi")) - (clobber (match_scratch:DI 3 "=r,X"))] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" + (match_operand:QHI 1 "reg_or_indexed_operand" "wJwK,r,Z"))) + (clobber (match_scratch:DI 2 "=wK,wi,wJwK")) + (clobber (match_scratch:DI 3 "=X,r,X"))] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 + && TARGET_VSX_SMALL_INTEGER" "#" "&& reload_completed" [(const_int 0)] @@ -5385,15 +5409,15 @@ (define_insn_and_split "*floatuns<QHI:mo rtx result = operands[0]; rtx input = operands[1]; rtx di = operands[2]; - rtx tmp = operands[3]; - if (!MEM_P (input)) + if (MEM_P (input) || altivec_register_operand (input, <QHI:MODE>mode)) + emit_insn (gen_zero_extend<QHI:mode>di2 (di, input)); + else { + rtx tmp = operands[3]; emit_insn (gen_zero_extend<QHI:mode>di2 (tmp, input)); emit_move_insn (di, tmp); } - else - emit_insn (gen_p9_lxsi<QHI:wd>zx (di, input)); emit_insn (gen_floatdi<FP_ISA3:mode>2 (result, di)); DONE; @@ -5508,19 +5532,43 @@ (define_insn "*fix_trunc<mode>di2_fctidz [(set_attr "type" "fp")]) (define_expand "fix_trunc<SFDF:mode><QHI:mode>2" - [(use (match_operand:QHI 0 "rs6000_nonimmediate_operand" "")) - (use (match_operand:SFDF 1 "vsx_register_operand" ""))] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" + [(parallel [(set (match_operand:<QHI:MODE> 0 "nonimmediate_operand") + (fix:QHI (match_operand:SFDF 1 "gpc_reg_operand"))) + (clobber (match_scratch:DI 2))])] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT + && TARGET_VSX_SMALL_INTEGER" +{ + if (MEM_P (operands[0])) + operands[0] = rs6000_address_for_fpconvert (operands[0]); +}) + +(define_insn_and_split "*fix_trunc<SFDF:mode><QHI:mode>2_internal" + [(set (match_operand:<QHI:MODE> 0 "reg_or_indexed_operand" "=wIwJ,rZ") + (fix:QHI + (match_operand:SFDF 1 "gpc_reg_operand" "<SFDF:Fv>,<SFDF:Fv>"))) + (clobber (match_scratch:DI 2 "=X,wi"))] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT + && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(const_int 0)] { - rtx op0 = operands[0]; - rtx op1 = operands[1]; - rtx di_tmp = gen_reg_rtx (DImode); + rtx dest = operands[0]; + rtx src = operands[1]; - if (MEM_P (op0)) - op0 = rs6000_address_for_fpconvert (op0); + if (vsx_register_operand (dest, <QHI:MODE>mode)) + { + rtx di_dest = gen_rtx_REG (DImode, REGNO (dest)); + emit_insn (gen_fix_trunc<SFDF:mode>di2 (di_dest, src)); + } + else + { + rtx tmp = operands[2]; + rtx tmp2 = gen_rtx_REG (<QHI:MODE>mode, REGNO (tmp)); - emit_insn (gen_fctiwz_<SFDF:mode> (di_tmp, op1)); - emit_insn (gen_p9_stxsi<QHI:wd>x (op0, di_tmp)); + emit_insn (gen_fix_trunc<SFDF:mode>di2 (tmp, src)); + emit_move_insn (dest, tmp2); + } DONE; }) @@ -5597,22 +5645,45 @@ (define_insn "*fixuns_trunc<mode>di2_fct [(set_attr "type" "fp")]) (define_expand "fixuns_trunc<SFDF:mode><QHI:mode>2" - [(use (match_operand:QHI 0 "rs6000_nonimmediate_operand" "")) - (use (match_operand:SFDF 1 "vsx_register_operand" ""))] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" + [(parallel [(set (match_operand:<QHI:MODE> 0 "nonimmediate_operand") + (unsigned_fix:QHI (match_operand:SFDF 1 "gpc_reg_operand"))) + (clobber (match_scratch:DI 2))])] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT + && TARGET_VSX_SMALL_INTEGER" +{ + if (MEM_P (operands[0])) + operands[0] = rs6000_address_for_fpconvert (operands[0]); +}) + +(define_insn_and_split "*fixuns_trunc<SFDF:mode><QHI:mode>2_internal" + [(set (match_operand:<QHI:MODE> 0 "reg_or_indexed_operand" "=wIwJ,rZ") + (unsigned_fix:QHI + (match_operand:SFDF 1 "gpc_reg_operand" "<SFDF:Fv>,<SFDF:Fv>"))) + (clobber (match_scratch:DI 2 "=X,wi"))] + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT + && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(const_int 0)] { - rtx op0 = operands[0]; - rtx op1 = operands[1]; - rtx di_tmp = gen_reg_rtx (DImode); + rtx dest = operands[0]; + rtx src = operands[1]; - if (MEM_P (op0)) - op0 = rs6000_address_for_fpconvert (op0); + if (vsx_register_operand (dest, <QHI:MODE>mode)) + { + rtx di_dest = gen_rtx_REG (DImode, REGNO (dest)); + emit_insn (gen_fixuns_trunc<SFDF:mode>di2 (di_dest, src)); + } + else + { + rtx tmp = operands[2]; + rtx tmp2 = gen_rtx_REG (<QHI:MODE>mode, REGNO (tmp)); - emit_insn (gen_fctiwuz_<SFDF:mode> (di_tmp, op1)); - emit_insn (gen_p9_stxsi<QHI:wd>x (op0, di_tmp)); + emit_insn (gen_fixuns_trunc<SFDF:mode>di2 (tmp, src)); + emit_move_insn (dest, tmp2); + } DONE; }) - ; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ)) ; rather than (set (subreg:SI (reg)) (fix:SI ...)) ; because the first makes it clear that operand 0 is not live @@ -6676,41 +6747,55 @@ (define_split (const_int 0)))] "") -(define_insn "*movhi_internal" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r,r,*c*l,*h") - (match_operand:HI 1 "input_operand" "r,m,r,i,*h,r,0"))] - "gpc_reg_operand (operands[0], HImode) - || gpc_reg_operand (operands[1], HImode)" - "@ - mr %0,%1 - lhz%U1%X1 %0,%1 - sth%U0%X0 %1,%0 - li %0,%w1 - mf%1 %0 - mt%0 %1 - nop" - [(set_attr "type" "*,load,store,*,mfjmpr,mtjmpr,*")]) - (define_expand "mov<mode>" [(set (match_operand:INT 0 "general_operand" "") (match_operand:INT 1 "any_operand" ""))] "" "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }") -(define_insn "*movqi_internal" - [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,m,r,r,*c*l,*h") - (match_operand:QI 1 "input_operand" "r,m,r,i,*h,r,0"))] - "gpc_reg_operand (operands[0], QImode) - || gpc_reg_operand (operands[1], QImode)" +;; MR LHZ/LBZ LXSI*ZX STH/STB STXSI*X LI +;; XXLOR load 0 load -1 VSPLTI* # MFVSRWZ +;; MTVSRWZ MF%1 MT%1 NOP +(define_insn "*mov<mode>_internal" + [(set (match_operand:QHI 0 "nonimmediate_operand" + "=r, r, ?*wJwK, m, Z, r, + ?*wJwK, ?*wJwK, ?*wJwK, ?*wK, ?*wK, r, + ?*wJwK, r, *c*l, *h") + + (match_operand:QHI 1 "input_operand" + "r, m, Z, r, wJwK, i, + wJwK, O, wM, wB, wS, ?*wJwK, + r, *h, r, 0"))] + + "gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode)" "@ mr %0,%1 - lbz%U1%X1 %0,%1 - stb%U0%X0 %1,%0 + l<wd>z%U1%X1 %0,%1 + lxsi<wd>zx %x0,%y1 + st<wd>%U0%X0 %1,%0 + stxsi<wd>x %1,%y0 li %0,%1 + xxlor %x0,%x1,%x1 + xxspltib %x0,0 + xxspltib %x0,255 + vspltis<wd> %0,%1 + # + mfvsrwz %0,%x1 + mtvsrwz %x0,%1 mf%1 %0 mt%0 %1 nop" - [(set_attr "type" "*,load,store,*,mfjmpr,mtjmpr,*")]) + [(set_attr "type" + "*, load, fpload, store, fpstore, *, + vecsimple, vecperm, vecperm, vecperm, vecperm, mftgpr, + mffgpr, mfjmpr, mtjmpr, *") + + (set_attr "length" + "4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 8, 4, + 4, 4, 4, 4")]) + ;; Here is how to move condition codes around. When we store CC data in ;; an integer register or memory, we store just the high-order 4 bits. @@ -8134,7 +8219,7 @@ (define_insn "*movdi_internal64" xxlor %x0,%x1,%x1 xxspltib %x0,0 xxspltib %x0,255 - vspltisw %0,%1 + # xxlxor %x0,%x0,%x0 xxlorc %x0,%x0,%x0 # @@ -8228,9 +8313,11 @@ (define_split DONE; }) +;; Split integer constants that can be loaded with XXSPLTIB and a +;; sign extend operation. (define_split - [(set (match_operand:DI 0 "altivec_register_operand" "") - (match_operand:DI 1 "xxspltib_constant_split" ""))] + [(set (match_operand:INT_ISA3 0 "altivec_register_operand" "") + (match_operand:INT_ISA3 1 "xxspltib_constant_split" ""))] "TARGET_UPPER_REGS_DI && TARGET_P9_VECTOR && reload_completed" [(const_int 0)] { @@ -8240,7 +8327,15 @@ (define_split rtx op0_v16qi = gen_rtx_REG (V16QImode, r); emit_insn (gen_xxspltib_v16qi (op0_v16qi, op1)); - emit_insn (gen_vsx_sign_extend_qi_di (operands[0], op0_v16qi)); + if (<MODE>mode == DImode) + emit_insn (gen_vsx_sign_extend_qi_di (operands[0], op0_v16qi)); + else if (<MODE>mode == SImode) + emit_insn (gen_vsx_sign_extend_qi_si (operands[0], op0_v16qi)); + else if (<MODE>mode == HImode) + { + rtx op0_v8hi = gen_rtx_REG (V8HImode, r); + emit_insn (gen_altivec_vupkhsb (op0_v8hi, op0_v16qi)); + } DONE; }) Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 241924) +++ gcc/config/rs6000/rs6000.c (.../gcc/config/rs6000) (working copy) @@ -2019,8 +2019,14 @@ rs6000_hard_regno_mode_ok (int regno, ma if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD) return 1; - if (TARGET_VSX_SMALL_INTEGER && mode == SImode) - return 1; + if (TARGET_VSX_SMALL_INTEGER) + { + if (mode == SImode) + return 1; + + if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) + return 1; + } } if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT @@ -3403,7 +3409,14 @@ rs6000_init_hard_regno_mode_ok (bool glo reg_addr[SFmode].scalar_in_vmx_p = true; if (TARGET_VSX_SMALL_INTEGER) - reg_addr[SImode].scalar_in_vmx_p = true; + { + reg_addr[SImode].scalar_in_vmx_p = true; + if (TARGET_P9_VECTOR) + { + reg_addr[HImode].scalar_in_vmx_p = true; + reg_addr[QImode].scalar_in_vmx_p = true; + } + } } /* Setup the fusion operations. */ @@ -20607,8 +20620,14 @@ rs6000_secondary_reload_simple_move (enu } /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ - if (TARGET_VSX_SMALL_INTEGER && mode == SImode) - return true; + if (TARGET_VSX_SMALL_INTEGER) + { + if (mode == SImode) + return true; + + if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) + return true; + } /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ if (mode == SDmode) @@ -21413,6 +21432,33 @@ rs6000_preferred_reload_class (rtx x, en if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode)) return ALTIVEC_REGS; + /* If this is an integer constant that can easily be loaded into + vector registers, allow it. */ + if (CONST_INT_P (x)) + { + HOST_WIDE_INT value = INTVAL (x); + + /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA + 2.06 can generate it in the Altivec registers with + VSPLTI<x>. */ + if (value == -1) + { + if (TARGET_P8_VECTOR) + return rclass; + else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS) + return ALTIVEC_REGS; + else + return NO_REGS; + } + + /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and + a sign extend in the Altivec registers. */ + if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR + && TARGET_VSX_SMALL_INTEGER + && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)) + return ALTIVEC_REGS; + } + /* Force constant to memory. */ return NO_REGS; } Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 241924) +++ gcc/config/rs6000/vsx.md (.../gcc/config/rs6000) (working copy) @@ -338,7 +338,6 @@ (define_c_enum "unspec" UNSPEC_VSX_XVCVDPSXDS UNSPEC_VSX_XVCVDPUXDS UNSPEC_VSX_SIGN_EXTEND - UNSPEC_P9_MEMORY UNSPEC_VSX_VSLO UNSPEC_VSX_EXTRACT UNSPEC_VSX_SXEXPDP @@ -2519,72 +2518,29 @@ (define_expand "vec_perm_const<mode>" ;; types are currently allowed in a vector register, so we extract to a DImode ;; and either do a direct move or store. (define_expand "vsx_extract_<mode>" - [(parallel [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand") + [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand") (vec_select:<VS_scalar> (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") (parallel [(match_operand:QI 2 "const_int_operand")]))) - (clobber (match_dup 3))])] + (clobber (match_scratch:VSX_EXTRACT_I 3))])] "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" { - machine_mode smode = ((<MODE>mode != V4SImode && TARGET_VEXTRACTUB) - ? DImode : <MODE>mode); - operands[3] = gen_rtx_SCRATCH (smode); -}) - -;; Under ISA 3.0, we can use the byte/half-word/word integer stores if we are -;; extracting a vector element and storing it to memory, rather than using -;; direct move to a GPR and a GPR store. -(define_insn_and_split "*vsx_extract_<mode>_p9" - [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r,Z") - (vec_select:<VS_scalar> - (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v,v") - (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")]))) - (clobber (match_scratch:DI 3 "=v,v"))] - "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" - "#" - "&& (reload_completed || MEM_P (operands[0]))" - [(const_int 0)] -{ - rtx dest = operands[0]; - rtx src = operands[1]; - rtx element = operands[2]; - rtx di_tmp = operands[3]; - - if (GET_CODE (di_tmp) == SCRATCH) - di_tmp = gen_reg_rtx (DImode); - - emit_insn (gen_vsx_extract_<mode>_di (di_tmp, src, element)); - - if (REG_P (dest)) - emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), di_tmp); - else if (SUBREG_P (dest)) - emit_move_insn (gen_rtx_REG (DImode, subreg_regno (dest)), di_tmp); - else if (MEM_P (operands[0])) + /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ + if (TARGET_VSX_SMALL_INTEGER && TARGET_P9_VECTOR) { - if (can_create_pseudo_p ()) - dest = rs6000_address_for_fpconvert (dest); - - if (<MODE>mode == V16QImode) - emit_insn (gen_p9_stxsibx (dest, di_tmp)); - else if (<MODE>mode == V8HImode) - emit_insn (gen_p9_stxsihx (dest, di_tmp)); - else - gcc_unreachable (); + emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1], + operands[2])); + DONE; } - else - gcc_unreachable (); - - DONE; -} - [(set_attr "type" "vecsimple,fpstore")]) +}) -(define_insn "vsx_extract_<mode>_di" - [(set (match_operand:DI 0 "gpc_reg_operand" "=<VSX_EX>") - (zero_extend:DI - (vec_select:<VS_scalar> - (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>") - (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))))] - "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" +(define_insn "vsx_extract_<mode>_p9" + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=<VSX_EX>") + (vec_select:<VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>") + (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB + && TARGET_VSX_SMALL_INTEGER" { /* Note, the element number has already been adjusted for endianness, so we don't have to adjust it here. */ @@ -2599,13 +2555,51 @@ (define_insn "vsx_extract_<mode>_di" } [(set_attr "type" "vecsimple")]) +;; Optimize zero extracts to eliminate the AND after the extract. +(define_insn_and_split "*vsx_extract_<mode>_di_p9" + [(set (match_operand:DI 0 "gpc_reg_operand" "=<VSX_EX>") + (zero_extend:DI + (vec_select:<VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>") + (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB + && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(set (match_dup 3) + (vec_select:<VS_scalar> + (match_dup 1) + (parallel [(match_dup 2)])))] +{ + operands[3] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0])); +}) + +;; Optimize stores to use the ISA 3.0 scalar store instructions +(define_insn_and_split "*vsx_extract_<mode>_store_p9" + [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z") + (vec_select:<VS_scalar> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>") + (parallel [(match_operand:QI 2 "const_int_operand" "n")]))) + (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>"))] + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB + && TARGET_VSX_SMALL_INTEGER" + "#" + "&& reload_completed" + [(set (match_dup 3) + (vec_select:<VS_scalar> + (match_dup 1) + (parallel [(match_dup 2)]))) + (set (match_dup 0) + (match_dup 3))]) + (define_insn_and_split "*vsx_extract_si" [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z") (vec_select:SI (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv") (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))] - "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT + && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)" "#" "&& reload_completed" [(const_int 0)] @@ -2624,10 +2618,10 @@ (define_insn_and_split "*vsx_extract_si value = INTVAL (element); if (value != 1) { - if (TARGET_VEXTRACTUB) + if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER) { - rtx di_tmp = gen_rtx_REG (DImode, REGNO (vec_tmp)); - emit_insn (gen_vsx_extract_v4si_di (di_tmp,src, element)); + rtx si_tmp = gen_rtx_REG (SImode, REGNO (vec_tmp)); + emit_insn (gen_vsx_extract_v4si_p9 (si_tmp,src, element)); } else emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element)); @@ -2663,7 +2657,8 @@ (define_insn_and_split "*vsx_extract_<m (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v") (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))] - "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT + && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)" "#" "&& reload_completed" [(const_int 0)] @@ -3253,26 +3248,6 @@ (define_insn "*vsx_sign_extend_si_v2di" [(set_attr "type" "vecexts")]) -;; ISA 3.0 memory operations -(define_insn "p9_lxsi<wd>zx" - [(set (match_operand:DI 0 "vsx_register_operand" "=wi") - (unspec:DI [(zero_extend:DI - (match_operand:QHI 1 "indexed_or_indirect_operand" "Z"))] - UNSPEC_P9_MEMORY))] - "TARGET_P9_VECTOR" - "lxsi<wd>zx %x0,%y1" - [(set_attr "type" "fpload")]) - -(define_insn "p9_stxsi<wd>x" - [(set (match_operand:QHI 0 "reg_or_indexed_operand" "=r,Z") - (unspec:QHI [(match_operand:DI 1 "vsx_register_operand" "wi,wi")] - UNSPEC_P9_MEMORY))] - "TARGET_P9_VECTOR" - "@ - mfvsrd %0,%x1 - stxsi<wd>x %x1,%y0" - [(set_attr "type" "mffgpr,fpstore")]) - ;; ISA 3.0 Binary Floating-Point Support ;; VSX Scalar Extract Exponent Double-Precision Index: gcc/testsuite/gcc.target/powerpc/vsx-qimode.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vsx-qimode.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vsx-qimode.c (.../gcc/testsuite/gcc.target/powerpc) (revision 242012) @@ -0,0 +1,22 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2 -mvsx-small-integer" } */ + +double load_asm_d_constraint (signed char *p) +{ + double ret; + __asm__ ("xxlor %x0,%x1,%x1\t# load d constraint" : "=d" (ret) : "d" (*p)); + return ret; +} + +void store_asm_d_constraint (signed char *p, double x) +{ + signed char i; + __asm__ ("xxlor %x0,%x1,%x1\t# store d constraint" : "=d" (i) : "d" (x)); + *p = i; +} + +/* { dg-final { scan-assembler "lxsibzx" } } */ +/* { dg-final { scan-assembler "stxsibx" } } */ Index: gcc/testsuite/gcc.target/powerpc/vsx-himode.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vsx-himode.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vsx-himode.c (.../gcc/testsuite/gcc.target/powerpc) (revision 242012) @@ -0,0 +1,22 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2 -mvsx-small-integer" } */ + +double load_asm_d_constraint (short *p) +{ + double ret; + __asm__ ("xxlor %x0,%x1,%x1\t# load d constraint" : "=d" (ret) : "d" (*p)); + return ret; +} + +void store_asm_d_constraint (short *p, double x) +{ + short i; + __asm__ ("xxlor %x0,%x1,%x1\t# store d constraint" : "=d" (i) : "d" (x)); + *p = i; +} + +/* { dg-final { scan-assembler "lxsihzx" } } */ +/* { dg-final { scan-assembler "stxsihx" } } */ Index: gcc/testsuite/gcc.target/powerpc/vsx-himode2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vsx-himode2.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vsx-himode2.c (.../gcc/testsuite/gcc.target/powerpc) (revision 242012) @@ -0,0 +1,15 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2 -mvsx-small-integer" } */ + +unsigned int foo (unsigned short u) +{ + unsigned int ret; + __asm__ ("xxlor %x0,%x1,%x1\t# v, v constraints" : "=v" (ret) : "v" (u)); + return ret; +} + +/* { dg-final { scan-assembler "mtvsrwz" } } */ +/* { dg-final { scan-assembler "mfvsrwz" } } */ Index: gcc/testsuite/gcc.target/powerpc/vsx-himode3.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vsx-himode3.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vsx-himode3.c (.../gcc/testsuite/gcc.target/powerpc) (revision 242012) @@ -0,0 +1,22 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2 -mvsx-small-integer" } */ + +double load_asm_v_constraint (short *p) +{ + double ret; + __asm__ ("xxlor %x0,%x1,%x1\t# load v constraint" : "=d" (ret) : "v" (*p)); + return ret; +} + +void store_asm_v_constraint (short *p, double x) +{ + short i; + __asm__ ("xxlor %x0,%x1,%x1\t# store v constraint" : "=v" (i) : "d" (x)); + *p = i; +} + +/* { dg-final { scan-assembler "lxsihzx" } } */ +/* { dg-final { scan-assembler "stxsihx" } } */ Index: gcc/testsuite/gcc.target/powerpc/vsx-qimode2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vsx-qimode2.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vsx-qimode2.c (.../gcc/testsuite/gcc.target/powerpc) (revision 242012) @@ -0,0 +1,15 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2 -mvsx-small-integer" } */ + +unsigned int foo (unsigned char u) +{ + unsigned int ret; + __asm__ ("xxlor %x0,%x1,%x1\t# v, v constraints" : "=v" (ret) : "v" (u)); + return ret; +} + +/* { dg-final { scan-assembler "mtvsrwz" } } */ +/* { dg-final { scan-assembler "mfvsrwz" } } */ Index: gcc/testsuite/gcc.target/powerpc/vsx-qimode3.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/vsx-qimode3.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/vsx-qimode3.c (.../gcc/testsuite/gcc.target/powerpc) (revision 242012) @@ -0,0 +1,22 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2 -mvsx-small-integer" } */ + +double load_asm_v_constraint (signed char *p) +{ + double ret; + __asm__ ("xxlor %x0,%x1,%x1\t# load v constraint" : "=d" (ret) : "v" (*p)); + return ret; +} + +void store_asm_v_constraint (signed char *p, double x) +{ + signed char i; + __asm__ ("xxlor %x0,%x1,%x1\t# store v constraint" : "=v" (i) : "d" (x)); + *p = i; +} + +/* { dg-final { scan-assembler "lxsibzx" } } */ +/* { dg-final { scan-assembler "stxsibx" } } */
Property changes on: gcc/testsuite/gcc.target/powerpc/p9-minmax-1.c ___________________________________________________________________ Modified: svn:mergeinfo Merged /trunk/gcc/testsuite/gcc.target/powerpc/p9-minmax-1.c:r241733-241924 Property changes on: gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c ___________________________________________________________________ Modified: svn:mergeinfo Merged /trunk/gcc/testsuite/gcc.target/powerpc/p9-minmax-2.c:r241733-241924 Index: gcc/testsuite/gcc.target/powerpc/p9-extract-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p9-extract-1.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 241924) +++ gcc/testsuite/gcc.target/powerpc/p9-extract-1.c (.../gcc/testsuite/gcc.target/powerpc) (working copy) @@ -17,7 +17,7 @@ int extract_schar_3 (vector signed char /* { dg-final { scan-assembler "vextractub" } } */ /* { dg-final { scan-assembler "vextractuh" } } */ /* { dg-final { scan-assembler "xxextractuw" } } */ -/* { dg-final { scan-assembler "mfvsrd" } } */ +/* { dg-final { scan-assembler "mfvsr" } } */ /* { dg-final { scan-assembler-not "stxvd2x" } } */ /* { dg-final { scan-assembler-not "stxv" } } */ /* { dg-final { scan-assembler-not "lwa" } } */