ping
From: Wilco Dijkstra Sent: 11 November 2016 13:14 To: Richard Earnshaw; GCC Patches Cc: nd Subject: Re: [PATCH][AArch64] Improve TI mode address offsets Richard Earnshaw wrote: > Has this patch been truncated? The last line above looks to be part-way > through a hunk. Oops sorry, it seems the last few lines are missing. Here is the full version: diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4066,7 +4066,8 @@ aarch64_classify_address (struct aarch64_address_info *info, instruction memory accesses. */ if (mode == TImode || mode == TFmode) return (aarch64_offset_7bit_signed_scaled_p (DImode, offset) - && offset_9bit_signed_unscaled_p (mode, offset)); + && (offset_9bit_signed_unscaled_p (mode, offset) + || offset_12bit_unsigned_scaled_p (mode, offset))); /* A 7bit offset check because OImode will emit a ldp/stp instruction (only big endian will get here). @@ -4270,18 +4271,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x, /* Split an out-of-range address displacement into a base and offset. Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise to increase opportunities for sharing the base address of different sizes. - For TI/TFmode and unaligned accesses use a 256-byte range. */ + For unaligned accesses and TI/TF mode use the signed 9-bit range. */ static bool aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode) { - HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff; + HOST_WIDE_INT offset = INTVAL (*disp); + HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc); - if (mode == TImode || mode == TFmode || - (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0) - mask = 0xff; + if (mode == TImode || mode == TFmode + || (offset & (GET_MODE_SIZE (mode) - 1)) != 0) + base = (offset + 0x100) & ~0x1ff; - *off = GEN_INT (INTVAL (*disp) & ~mask); - *disp = GEN_INT (INTVAL (*disp) & mask); + *off = GEN_INT (base); + *disp = GEN_INT (offset - base); return true; } @@ -5148,12 +5150,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode) x = gen_rtx_PLUS (Pmode, base, offset_rtx); } - /* Does it look like we'll need a load/store-pair operation? */ + /* Does it look like we'll need a 16-byte load/store-pair operation? */ HOST_WIDE_INT base_offset; - if (GET_MODE_SIZE (mode) > 16 - || mode == TImode) - base_offset = ((offset + 64 * GET_MODE_SIZE (mode)) - & ~((128 * GET_MODE_SIZE (mode)) - 1)); + if (GET_MODE_SIZE (mode) > 16) + base_offset = (offset + 0x400) & ~0x7f0; /* For offsets aren't a multiple of the access size, the limit is -256...255. */ else if (offset & (GET_MODE_SIZE (mode) - 1)) @@ -5167,6 +5167,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode) /* Small negative offsets are supported. */ else if (IN_RANGE (offset, -256, 0)) base_offset = 0; + else if (mode == TImode || mode == TFmode) + base_offset = (offset + 0x100) & ~0x1ff; /* Use 12-bit offset by access size. */ else base_offset = offset & (~0xfff * GET_MODE_SIZE (mode)); diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1094,9 +1094,9 @@ (define_insn "*movti_aarch64" [(set (match_operand:TI 0 - "nonimmediate_operand" "=r, *w,r ,*w,r ,Ump,Ump,*w,m") + "nonimmediate_operand" "=r, *w,r ,*w,r,m,m,*w,m") (match_operand:TI 1 - "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r ,Z , m,*w"))] + "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))] "(register_operand (operands[0], TImode) || aarch64_reg_or_zero (operands[1], TImode))" "@ @@ -1211,9 +1211,9 @@ (define_insn "*movtf_aarch64" [(set (match_operand:TF 0 - "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump,Ump") + "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m") (match_operand:TF 1 - "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,Ump,?r ,Y"))] + "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))] "TARGET_FLOAT && (register_operand (operands[0], TFmode) || aarch64_reg_or_fp_zero (operands[1], TFmode))" "@