This patch adds the initial support for putting DI, DF, and SF values in the upper registers (traditional Altivec registers) using the -mupper-regs-df and -mupper-regs-sf patches. Those switches will not be enabled by default until the rest of the changes are made. This patch passes the bootstrap test and make check test. I tested all of the targets I tested previously (power4-8, G4/G5, SPE, cell, e5500/e5600, and paired floating point), and all machines generate the same code. Is it ok to install this patch?
[gcc] 2013-09-24 Michael Meissner <meiss...@linux.vnet.ibm.com> * config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Allow DFmode, DImode, and SFmode in the upper VSX registers based on the -mupper-regs-{df,sf} flags. Fix wu constraint to be ALTIVEC_REGS if -mpower8-vector. Combine -mvsx-timode handling with the rest of the VSX register handling. * config/rs6000/rs6000.md (f32_lv): Use %x0 for VSX regsters. (f32_sv): Likewise. (zero_extendsidi2_lfiwzx): Add support for loading into the Altivec registers with -mpower8-vector. Use wu/wv constraints to only do VSX memory options on Altivec registers. (extendsidi2_lfiwax): Likewise. (extendsfdf2_fpr): Likewise. (mov<mode>_hardfloat, SF/SD modes): Likewise. (mov<mode>_hardfloat32, DF/DD modes): Likewise. (mov<mode>_hardfloat64, DF/DD modes): Likewise. (movdi_internal64): Likewise. [gcc/testsuite] 2013-09-24 Michael Meissner <meiss...@linux.vnet.ibm.com> * gcc.target/powerpc/p8vector-ldst.c: New test for -mupper-regs-sf and -mupper-regs-df. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 202855) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -1626,19 +1626,28 @@ rs6000_hard_regno_mode_ok (int regno, en /* VSX registers that overlap the FPR registers are larger than for non-VSX implementations. Don't allow an item to be split between a FP register - and an Altivec register. */ - if (VECTOR_MEM_VSX_P (mode)) + and an Altivec register. Allow TImode in all VSX registers if the user + asked for it. */ + if (TARGET_VSX && VSX_REGNO_P (regno) + && (VECTOR_MEM_VSX_P (mode) + || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode) + || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode)) + || (TARGET_VSX_TIMODE && mode == TImode))) { if (FP_REGNO_P (regno)) return FP_REGNO_P (last_regno); if (ALTIVEC_REGNO_P (regno)) - return ALTIVEC_REGNO_P (last_regno); - } + { + if (mode == SFmode && !TARGET_UPPER_REGS_SF) + return 0; - /* Allow TImode in all VSX registers if the user asked for it. */ - if (mode == TImode && TARGET_VSX_TIMODE && VSX_REGNO_P (regno)) - return 1; + if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF) + return 0; + + return ALTIVEC_REGNO_P (last_regno); + } + } /* The GPRs can hold any mode, but values bigger than one register cannot go past R31. */ @@ -2413,7 +2422,7 @@ rs6000_init_hard_regno_mode_ok (bool glo if (TARGET_P8_VECTOR) { - rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS; rs6000_constraints[RS6000_CONSTRAINT_wy] = rs6000_constraints[RS6000_CONSTRAINT_ww] = (TARGET_UPPER_REGS_SF) ? VSX_REGS : FLOAT_REGS; Index: gcc/config/rs6000/rs6000.md =================================================================== --- gcc/config/rs6000/rs6000.md (revision 202846) +++ gcc/config/rs6000/rs6000.md (working copy) @@ -314,13 +314,13 @@ (define_mode_attr real_value_to_target [ (define_mode_attr f32_lr [(SF "f") (SD "wz")]) (define_mode_attr f32_lm [(SF "m") (SD "Z")]) (define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")]) -(define_mode_attr f32_lv [(SF "lxsspx %0,%y1") (SD "lxsiwzx %0,%y1")]) +(define_mode_attr f32_lv [(SF "lxsspx %x0,%y1") (SD "lxsiwzx %x0,%y1")]) ; Definitions for store from 32-bit fpr register (define_mode_attr f32_sr [(SF "f") (SD "wx")]) (define_mode_attr f32_sm [(SF "m") (SD "Z")]) (define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")]) -(define_mode_attr f32_sv [(SF "stxsspx %1,%y0") (SD "stxsiwzx %1,%y0")]) +(define_mode_attr f32_sv [(SF "stxsspx %x1,%y0") (SD "stxsiwzx %x1,%y0")]) ; Definitions for 32-bit fpr direct move (define_mode_attr f32_dm [(SF "wn") (SD "wm")]) @@ -541,7 +541,7 @@ (define_split "") (define_insn "*zero_extendsidi2_lfiwzx" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wm") + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wu") (zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))] "TARGET_POWERPC64 && TARGET_LFIWZX" "@ @@ -711,7 +711,7 @@ (define_expand "extendsidi2" "") (define_insn "*extendsidi2_lfiwax" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wm") + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wu") (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))] "TARGET_POWERPC64 && TARGET_LFIWAX" "@ @@ -5066,13 +5066,16 @@ (define_expand "extendsfdf2" "") (define_insn_and_split "*extendsfdf2_fpr" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d") - (float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m")))] + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,wy,?wy,wv") + (float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wz,Z")))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" "@ # fmr %0,%1 - lfs%U1%X1 %0,%1" + lfs%U1%X1 %0,%1 + # + xxlor %x0,%x1,%x1 + lxsspx %x0,%y1" "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])" [(const_int 0)] { @@ -5088,7 +5091,16 @@ (define_insn_and_split "*extendsfdf2_fpr (if_then_else (match_test "update_address_mem (operands[1], VOIDmode)") (const_string "fpload_u") - (const_string "fpload")))])]) + (const_string "fpload"))) + (const_string "fp") + (const_string "vecsimple") + (if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "fpload_ux") + (if_then_else + (match_test "update_address_mem (operands[1], VOIDmode)") + (const_string "fpload_u") + (const_string "fpload")))])]) (define_expand "truncdfsf2" [(set (match_operand:SF 0 "gpc_reg_operand" "") @@ -9290,8 +9302,8 @@ (define_split }") (define_insn "mov<mode>_hardfloat" - [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wm,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r") - (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wm,r,<f32_dm>,r,h,0,G,Fn"))] + [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wu,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r") + (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wu,r,<f32_dm>,r,h,0,G,Fn"))] "(gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)) && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)" @@ -9492,8 +9504,8 @@ (define_split ;; reloading. (define_insn "*mov<mode>_hardfloat32" - [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,!r,!r,!r") - (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,G,H,F"))] + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,!r,!r,!r") + (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,G,H,F"))] "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode))" @@ -9502,11 +9514,8 @@ (define_insn "*mov<mode>_hardfloat32" lfd%U1%X1 %0,%1 fmr %0,%1 lxsd%U1x %x0,%y1 - lxsd%U1x %x0,%y1 - stxsd%U0x %x1,%y0 stxsd%U0x %x1,%y0 xxlor %x0,%x1,%x1 - xxlor %x0,%x1,%x1 xxlxor %x0,%x0,%x0 # # @@ -9535,27 +9544,18 @@ (define_insn "*mov<mode>_hardfloat32" (const_string "fpload_ux") (const_string "fpload")) (if_then_else - (match_test "update_indexed_address_mem (operands[1], VOIDmode)") - (const_string "fpload_ux") - (const_string "fpload")) - (if_then_else - (match_test "update_indexed_address_mem (operands[0], VOIDmode)") - (const_string "fpstore_ux") - (const_string "fpstore")) - (if_then_else (match_test "update_indexed_address_mem (operands[0], VOIDmode)") (const_string "fpstore_ux") (const_string "fpstore")) (const_string "vecsimple") (const_string "vecsimple") - (const_string "vecsimple") (const_string "store") (const_string "load") (const_string "two") (const_string "fp") (const_string "fp") (const_string "*")]) - (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8,8,8,8,12,16")]) + (set_attr "length" "4,4,4,4,4,4,4,8,8,8,8,12,16")]) (define_insn "*mov<mode>_softfloat32" [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,r,r,r") @@ -9572,8 +9572,8 @@ (define_insn "*mov<mode>_softfloat32" ; ld/std require word-aligned displacements -> 'Y' constraint. ; List Y->r and r->Y before r->r for reload. (define_insn "*mov<mode>_hardfloat64" - [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm") - (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))] + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm") + (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))] "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode))" @@ -9582,11 +9582,8 @@ (define_insn "*mov<mode>_hardfloat64" lfd%U1%X1 %0,%1 fmr %0,%1 lxsd%U1x %x0,%y1 - lxsd%U1x %x0,%y1 - stxsd%U0x %x1,%y0 stxsd%U0x %x1,%y0 xxlor %x0,%x1,%x1 - xxlor %x0,%x1,%x1 xxlxor %x0,%x0,%x0 std%U0%X0 %1,%0 ld%U1%X1 %0,%1 @@ -9622,20 +9619,11 @@ (define_insn "*mov<mode>_hardfloat64" (const_string "fpload_ux") (const_string "fpload")) (if_then_else - (match_test "update_indexed_address_mem (operands[1], VOIDmode)") - (const_string "fpload_ux") - (const_string "fpload")) - (if_then_else - (match_test "update_indexed_address_mem (operands[0], VOIDmode)") - (const_string "fpstore_ux") - (const_string "fpstore")) - (if_then_else (match_test "update_indexed_address_mem (operands[0], VOIDmode)") (const_string "fpstore_ux") (const_string "fpstore")) (const_string "vecsimple") (const_string "vecsimple") - (const_string "vecsimple") (if_then_else (match_test "update_indexed_address_mem (operands[0], VOIDmode)") (const_string "store_ux") @@ -9661,7 +9649,7 @@ (define_insn "*mov<mode>_hardfloat64" (const_string "mffgpr") (const_string "mftgpr") (const_string "mffgpr")]) - (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")]) + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")]) (define_insn "*mov<mode>_softfloat64" [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h") @@ -10324,8 +10312,8 @@ (define_split { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) (define_insn "*movdi_internal64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm") - (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r,*wm,r"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wv,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm") + (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wv,Z,wa,*h,r,0,O,*wg,r,*wm,r"))] "TARGET_POWERPC64 && (gpc_reg_operand (operands[0], DImode) || gpc_reg_operand (operands[1], DImode))" Index: gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c (revision 0) @@ -0,0 +1,42 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf" } */ + +float load_sf (float *p) +{ + float f = *p; + __asm__ ("# reg %x0" : "+v" (f)); + return f; +} + +double load_df (double *p) +{ + double d = *p; + __asm__ ("# reg %x0" : "+v" (d)); + return d; +} + +double load_dfsf (float *p) +{ + double d = (double) *p; + __asm__ ("# reg %x0" : "+v" (d)); + return d; +} + +void store_sf (float *p, float f) +{ + __asm__ ("# reg %x0" : "+v" (f)); + *p = f; +} + +void store_df (double *p, double d) +{ + __asm__ ("# reg %x0" : "+v" (d)); + *p = d; +} + +/* { dg-final { scan-assembler-times "lxsspx" 2 } } */ +/* { dg-final { scan-assembler-times "lxsdx" 1 } } */ +/* { dg-final { scan-assembler-times "stxsspx" 1 } } */ +/* { dg-final { scan-assembler-times "stxsdx" 1 } } */