This patch optimizes vector extracts where the vector is pointed to by an address with an offset larger than 16-bits to fold the add into the final address.
I.e. #include <altivec.h> double get (vector double *p, unsigned int h) { return vec_extract (p[50000], 1); } I have bootstraped this patch on a little endian power8 system and ran make check with no regressions. Can I check this patch in? 2019-12-10 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/rs6000.c (rs6000_adjust_vec_address): Add support for the offset being 34-bits when -mcpu=future is used. Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 279199) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -6766,9 +6766,17 @@ rs6000_adjust_vec_address (rtx scalar_re HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset); rtx offset_rtx = GEN_INT (offset); - if (IN_RANGE (offset, -32768, 32767) + /* 16-bit offset. */ + if (SIGNED_16BIT_OFFSET_P (offset) && (scalar_size < 8 || (offset & 0x3) == 0)) new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx); + + /* 34-bit offset if we have prefixed addresses. */ + else if (TARGET_PREFIXED_ADDR && SIGNED_34BIT_OFFSET_P (offset)) + new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx); + + /* Offset overflowed, move offset to the temporary (which will likely + be split), and do X-FORM addressing. */ else { emit_move_insn (base_tmp, offset_rtx); @@ -6799,6 +6807,12 @@ rs6000_adjust_vec_address (rtx scalar_re emit_insn (insn); } + /* Make sure we don't overwrite the temporary if the element being + extracted is variable, and we've put the offset into base_tmp + previously. */ + else if (rtx_equal_p (base_tmp, element_offset)) + emit_insn (gen_add2_insn (base_tmp, op1)); + else { emit_move_insn (base_tmp, op1); -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797