This patch could optimize (works for char/short/int/void*): 6: r119:TI=[r118:DI+0x10] 7: [r118:DI]=r119:TI 8: r121:DI=[r118:DI+0x8]
=> 6: r119:TI=[r118:DI+0x10] 16: r122:DI=r119:TI#8 Final ASM will be as below without partial load after full store(stxv+ld): ld 10,16(3) mr 9,3 ld 3,24(3) std 10,0(9) std 3,8(9) blr It could achieve ~25% performance improvement for typical cases on Power9. Bootstrap and regression tested on Power9-LE. BTW, for AArch64, one ldr is replaced by mov with this patch, though no performance change observerd... ldp x2, x3, [x0, 16] stp x2, x3, [x0] ldr x0, [x0, 8] => mov x1, x0 ldp x2, x0, [x0, 16] stp x2, x0, [x1] gcc/ChangeLog: 2020-07-21 Xionghu Luo <luo...@linux.ibm.com> PR rtl-optimization/71309 * dse.c (get_stored_val): Use subreg before extract if shifting from high part. gcc/testsuite/ChangeLog: 2020-07-21 Xionghu Luo <luo...@linux.ibm.com> PR rtl-optimization/71309 * gcc.target/powerpc/pr71309.c: New test. * gcc.target/powerpc/fold-vec-extract-short.p7.c: Add -mbig. --- gcc/dse.c | 26 ++++++++++++--- .../powerpc/fold-vec-extract-short.p7.c | 2 +- gcc/testsuite/gcc.target/powerpc/pr71309.c | 33 +++++++++++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr71309.c diff --git a/gcc/dse.c b/gcc/dse.c index bbe792e48e8..13f952ee5ff 100644 --- a/gcc/dse.c +++ b/gcc/dse.c @@ -1855,7 +1855,7 @@ get_stored_val (store_info *store_info, machine_mode read_mode, { machine_mode store_mode = GET_MODE (store_info->mem); poly_int64 gap; - rtx read_reg; + rtx read_reg = NULL; /* To get here the read is within the boundaries of the write so shift will never be negative. Start out with the shift being in @@ -1872,9 +1872,27 @@ get_stored_val (store_info *store_info, machine_mode read_mode, { poly_int64 shift = gap * BITS_PER_UNIT; poly_int64 access_size = GET_MODE_SIZE (read_mode) + gap; - read_reg = find_shift_sequence (access_size, store_info, read_mode, - shift, optimize_bb_for_speed_p (bb), - require_cst); + rtx rhs_subreg = NULL; + + if (known_eq (GET_MODE_BITSIZE (store_mode), shift * 2)) + { + scalar_int_mode inner_mode = smallest_int_mode_for_size (shift); + poly_uint64 sub_off + = ((!BYTES_BIG_ENDIAN) + ? GET_MODE_SIZE (store_mode) - GET_MODE_SIZE (inner_mode) + : 0); + + rhs_subreg = simplify_gen_subreg (inner_mode, store_info->rhs, + store_mode, sub_off); + if (rhs_subreg) + read_reg + = extract_low_bits (read_mode, inner_mode, copy_rtx (rhs_subreg)); + } + + if (read_reg == NULL) + read_reg + = find_shift_sequence (access_size, store_info, read_mode, shift, + optimize_bb_for_speed_p (bb), require_cst); } else if (store_mode == BLKmode) { diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p7.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p7.c index 8616e7b11ad..b5cefe7dc12 100644 --- a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p7.c +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-short.p7.c @@ -3,7 +3,7 @@ /* { dg-do compile { target { powerpc*-*-linux* } } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-mdejagnu-cpu=power7 -O2" } */ +/* { dg-options "-mdejagnu-cpu=power7 -O2 -mbig" } */ // six tests total. Targeting P7 BE. // p7 (be) vars: li, addi, stxvw4x, rldic, addi, lhax/lhzx diff --git a/gcc/testsuite/gcc.target/powerpc/pr71309.c b/gcc/testsuite/gcc.target/powerpc/pr71309.c new file mode 100644 index 00000000000..94d727a8ed9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr71309.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power9" } */ + +#define TYPE void* +#define TYPE2 void* + +struct path { + TYPE2 mnt; + TYPE dentry; +}; + +struct nameidata { + struct path path; + struct path root; +}; + +__attribute__ ((noinline)) +TYPE foo(struct nameidata *nd) +{ + TYPE d; + TYPE2 d2; + + nd->path = nd->root; + d = nd->path.dentry; + d2 = nd->path.mnt; + return d; +} + +/* { dg-final { scan-assembler-not {\mlxv\M} } } */ +/* { dg-final { scan-assembler-not {\mstxv\M} } } */ +/* { dg-final { scan-assembler-times {\mld\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */ -- 2.27.0.90.geebb51ba8c