This patch fixes a bug with vector extracts using a PC-relative address and a variable offset with using -mcpu=future.
Consider the code: #include <altivec.h> static vector double vd; vector double *p = &vd; double get (unsigned int n) { return vec_extract (vd, n); } If you compile this code with -O2 -mcpu=future -mpcrel you get: get: pla 9,.LANCHOR0@pcrel lfdx 1,9,9 blr This is because there is only one base register temporary, and the current code tries to first create the offset and then use the same temporary to hold the address of the PC-relative value. After combine the insn is: (insn 14 9 15 2 (parallel [ (set (reg/i:DF 33 1) (unspec:DF [ (mem/c:V2DF (symbol_ref:DI ("*.LANCHOR0") [flags 0x182]) [1 vd+0 S16 A128]) (reg:DI 123 [ n ]) ] UNSPEC_VSX_EXTRACT)) (clobber (scratch:DI)) (clobber (scratch:V2DI)) ]) "foo.c":9:1 1314 {vsx_extract_v2df_var} Split2 changes this to: (insn 20 8 21 2 (set (reg:DI 3 3 [orig:123 n ] [123]) (and:DI (reg:DI 3 3 [orig:123 n ] [123]) (const_int 1 [0x1]))) "foo.c":9:1 193 {anddi3_mask} (nil)) (insn 21 20 22 2 (set (reg:DI 9 9 [126]) (ashift:DI (reg:DI 3 3 [orig:123 n ] [123]) (const_int 3 [0x3]))) "foo.c":9:1 256 {ashldi3} (nil)) (insn 22 21 23 2 (set (reg:DI 9 9 [126]) (symbol_ref:DI ("*.LANCHOR0") [flags 0x182])) "foo.c":9:1 680 {*pcrel_local_addr} (nil)) (insn 23 22 15 2 (set (reg/i:DF 33 1) (mem/c:DF (plus:DI (reg:DI 9 9 [126]) (reg:DI 9 9 [126])) [1 S8 A8])) "foo.c":9:1 512 {*movdf_hardfloat64} (nil)) I.e. setting GPR r9 first to the offset << 3, and then wiping out the offset and setting in the address of the PC-relative structure. This patch changes all of the variable extract insns and the function in rs6000.c that processes them to have a second base register temporary only if we have prefixed addresses. The code generated then becomes: get: extsw 3,3 pla 10,.LANCHOR0@pcrel rldicl 3,3,0,63 sldi 9,3,3 lfdx 1,10,9 I use the em and ep constraints to keep the alternatives separate. Using em prevents the register allocator from skipping the alternative with ep in it because it has an extra scratch register. I have bootstrapped the compiler on a little endian power8 system and ran make check without regression. Can I check this in once patch V10 #4 is checked in? 2019-12-10 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/rs6000-protos.h (rs6000_split_vec_extract_var): Update calling signature. * config/rs6000/rs6000.c (rs6000_split_vec_extract_var): Add additional tmp base register argument. If the memory is prefixed, put the address into the new tmp base register. * config/rs6000/vsx.md (vsx_extract_<mode>_var, VSX_D iterator): Add new temporary for loading up the address of prefixed memory operands. (vsx_extract_v4sf_var): Add new temporary for loading up the address of prefixed memory operands. (vsx_extract_<mode>_var, VSX_EXTRACT_I iterator): Add new temporary for loading up the address of prefixed memory operands. (vsx_extract_<mode>_<VS_scalar>mode_var): Add new temporary for loading up the address of prefixed memory operands. Index: gcc/config/rs6000/rs6000-protos.h =================================================================== --- gcc/config/rs6000/rs6000-protos.h (revision 279182) +++ gcc/config/rs6000/rs6000-protos.h (working copy) @@ -59,7 +59,7 @@ extern void rs6000_expand_float128_conve extern void rs6000_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); extern void rs6000_expand_vector_extract (rtx, rtx, rtx); -extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx); +extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx, rtx); extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode); extern void altivec_expand_vec_perm_le (rtx op[4]); extern void rs6000_expand_extract_even (rtx, rtx, rtx); Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 279182) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -6861,7 +6861,7 @@ rs6000_adjust_vec_address (rtx scalar_re void rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, - rtx tmp_altivec) + rtx tmp_altivec, rtx tmp_prefixed) { machine_mode mode = GET_MODE (src); machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src)); @@ -6878,6 +6878,16 @@ rs6000_split_vec_extract_var (rtx dest, int num_elements = GET_MODE_NUNITS (mode); rtx num_ele_m1 = GEN_INT (num_elements - 1); + /* If we have a prefixed address, we need to load the address into a + separate register and then add the variable offset to that + address. */ + if (prefixed_memory (src, mode)) + { + gcc_assert (REG_P (tmp_prefixed)); + rs6000_emit_move (tmp_prefixed, XEXP (src, 0), Pmode); + src = change_address (src, mode, tmp_prefixed); + } + emit_insn (gen_anddi3 (element, element, num_ele_m1)); gcc_assert (REG_P (tmp_gpr)); emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element, Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (revision 279182) +++ gcc/config/rs6000/vsx.md (working copy) @@ -3247,21 +3247,24 @@ (define_insn "vsx_vslo_<mode>" ;; Variable V2DI/V2DF extract (define_insn_and_split "vsx_extract_<mode>_var" - [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,wa,r") - (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m") - (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] - UNSPEC_VSX_EXTRACT)) - (clobber (match_scratch:DI 3 "=r,&b,&b")) - (clobber (match_scratch:V2DI 4 "=&v,X,X"))] + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,wa,r,wa,r") + (unspec:<VS_scalar> + [(match_operand:VSX_D 1 "input_operand" "v,em,em,ep,ep") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r,r,r")] + UNSPEC_VSX_EXTRACT)) + (clobber (match_scratch:DI 3 "=r,&b,&b,&b,&b")) + (clobber (match_scratch:V2DI 4 "=&v,X,X,X,X")) + (clobber (match_scratch:DI 5 "=X,X,X,&b,&b"))] "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" "#" "&& reload_completed" [(const_int 0)] { rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], - operands[3], operands[4]); + operands[3], operands[4], operands[5]); DONE; -}) +} + [(set_attr "isa" "*,*,*,fut,fut")]) ;; Extract a SF element from V4SF (define_insn_and_split "vsx_extract_v4sf" @@ -3317,21 +3320,23 @@ (define_insn_and_split "*vsx_extract_v4s ;; Variable V4SF extract (define_insn_and_split "vsx_extract_v4sf_var" - [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,wa,?r") - (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m") - (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] + [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,wa,?r,wa,?r") + (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,em,em,ep,ep") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r,r,r")] UNSPEC_VSX_EXTRACT)) - (clobber (match_scratch:DI 3 "=r,&b,&b")) - (clobber (match_scratch:V2DI 4 "=&v,X,X"))] + (clobber (match_scratch:DI 3 "=r,&b,&b,&b,&b")) + (clobber (match_scratch:V2DI 4 "=&v,X,X,X,X")) + (clobber (match_scratch:DI 5 "=X,X,X,&b,&b"))] "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" "#" "&& reload_completed" [(const_int 0)] { rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], - operands[3], operands[4]); + operands[3], operands[4], operands[5]); DONE; -}) +} + [(set_attr "isa" "*,*,*,fut,fut")]) ;; Expand the builtin form of xxpermdi to canonical rtl. (define_expand "vsx_xxpermdi_<mode>" @@ -3679,33 +3684,35 @@ (define_insn_and_split "*vsx_extract_<mo ;; Variable V16QI/V8HI/V4SI extract (define_insn_and_split "vsx_extract_<mode>_var" - [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r") + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r,r") (unspec:<VS_scalar> - [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,v,m") - (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] + [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,v,em,ep") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r,r")] UNSPEC_VSX_EXTRACT)) - (clobber (match_scratch:DI 3 "=r,r,&b")) - (clobber (match_scratch:V2DI 4 "=X,&v,X"))] + (clobber (match_scratch:DI 3 "=r,r,&b,&b")) + (clobber (match_scratch:V2DI 4 "=X,&v,X,X")) + (clobber (match_scratch:DI 5 "=X,X,X,&b"))] "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" "#" "&& reload_completed" [(const_int 0)] { rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], - operands[3], operands[4]); + operands[3], operands[4], operands[5]); DONE; } - [(set_attr "isa" "p9v,*,*")]) + [(set_attr "isa" "p9v,*,*,fut")]) (define_insn_and_split "*vsx_extract_<mode>_<VS_scalar>mode_var" - [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r") + [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r,r") (zero_extend:<VS_scalar> (unspec:<VSX_EXTRACT_I:VS_scalar> - [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,v,m") - (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] + [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,v,em,ep") + (match_operand:DI 2 "gpc_reg_operand" "r,r,r,r")] UNSPEC_VSX_EXTRACT))) - (clobber (match_scratch:DI 3 "=r,r,&b")) - (clobber (match_scratch:V2DI 4 "=X,&v,X"))] + (clobber (match_scratch:DI 3 "=r,r,&b,&b")) + (clobber (match_scratch:V2DI 4 "=X,&v,X,X")) + (clobber (match_scratch:DI 5 "=X,X,X,&b"))] "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT" "#" "&& reload_completed" @@ -3714,10 +3721,11 @@ (define_insn_and_split "*vsx_extract_<mo machine_mode smode = <VS_scalar>mode; rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])), operands[1], operands[2], - operands[3], operands[4]); + operands[3], operands[4], + operands[5]); DONE; } - [(set_attr "isa" "p9v,*,*")]) + [(set_attr "isa" "p9v,*,*,fut")]) ;; VSX_EXTRACT optimizations ;; Optimize double d = (double) vec_extract (vi, <n>) -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797