On Tue, 18 Jun 2013 17:10:37 +0100 Richard Earnshaw <rearn...@arm.com> wrote:
> On 18/06/13 16:42, Julian Brown wrote: > > Hi, > > > > The following patch removed pool_range/neg_pool_range attributes > > from several instructions as a cleanup, which I believe to have been > > incorrect: > > > > http://gcc.gnu.org/ml/gcc-patches/2010-07/msg01036.html > > > > On a Mentor-local branch, this caused problems with instructions > > like: > > > > (insn 77 53 87 (set (reg:SI 8 r8 [orig:197 s.4 ] [197]) > > (zero_extend:SI (mem/u/c:HI (symbol_ref/u:SI ("*.LC0") > > [flags 0x2]) [7 S2 A16]))) [...] 161 {*arm_zero_extendhisi2_v6} > > (nil)) > > > > The reasoning behind the cleanup was that the instructions in > > question have no immediate constraints -- but the minipool code is > > used for more than just immediates, e.g. in the above case where a > > symbol reference ("m") is loaded. > > > > I don't have a test case for the problem on mainline at present, > > but I believe it is still a latent bug. Tested with the default > > multilibs (ARM & Thumb mode) on arm-none-eabi, with no regressions. > > (The patch has also been tested with more multilibs on our local > > branches for a while, and I did ensure previously that it did not > > adversely affect Bernd's patch linked above.) > > > > OK to apply? > > > > Pushing extending loads (particularly sign-extending loads) into the > minipools adversely affects freedom of pool placement (since the > offset ranges are limited). > > And they shouldn't be happening anyway. I really don't think this is > the right solution to the problem you have. How about this as a fix instead? IIUC, all the instances of this bug that have been reported relate to insns following the pattern: (set (reg) (sign/zero_extend:SI (symbol_ref:QI/HI))) where the symbol_ref is a match_operand with an "m" constraint. These kinds of addresses are explicitly permitted by arm_legitimate_address_outer_p and thumb2_legitimate_address_p by the last clause in the functions, "if (GET_MODE_CLASS (mode) != MODE_FLOAT && ...", etc. So, I think the obvious thing to do is to forbid sub-SImode-sized quantities in that clause. Perhaps tellingly, the thumb1 version of the function already forbids sizes other than 4 for the mode size in the equivalent clause. For non-Thumb1, we probably want to retain DImode (etc.) minipool entries. The change to generated code for a re-reduced version of the testcase from: https://bugzilla.redhat.com/show_bug.cgi?id=927565 (attached), for an insn which breaks before the attached patch is applied, 210r.reload: before: (insn 7 30 33 2 (set (reg:SI 12 ip [orig:110 D.4194 ] [110]) (zero_extend:SI (mem/u/c:QI (symbol_ref/u:SI ("*.LC0") [flags 0x2]) [0 S1 A8]))) minipool.c:11 182 {*arm_zero_extendqisi2_v6} (nil)) after (with a reload): (insn 221 30 7 2 (set (reg:SI 0 r0) (symbol_ref/u:SI ("*.LC0") [flags 0x2])) minipool.c:11 197 {*arm_movsi_insn} (nil)) (insn 7 221 33 2 (set (reg:SI 12 ip [orig:110 D.4194 ] [110]) (zero_extend:SI (mem/u/c:QI (reg:SI 0 r0) [0 S1 A8]))) minipool.c:11 182 {*arm_zero_extendqisi2_v6} (nil)) (Compiled with -march=armv7-a -O2.) Tested cross to arm-none-eabi, default & mthumb multilibs. I also removed some additional pool_range/neg_pool_ranges from similar extension instructions, for consistency. OK to apply? Thanks, Julian ChangeLog gcc/ * config/arm/arm.c (arm_legitimate_address_outer_p) (thumb2_legitimate_address_p): Don't allow symbol refs with mode size smaller than a word. * config/arm/arm.md (thumb1_zero_extendqisi2, *arm_extendhisi2) (*arm_extendhisi2_v6, *arm_extendqihi_insn, *arm_extendqisi) (*arm_extendqisi_v6): Remove pool_range/neg_pool_range attributes.
Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c (revision 200204) +++ gcc/config/arm/arm.c (working copy) @@ -5947,6 +5947,7 @@ arm_legitimate_address_outer_p (enum mac #endif else if (GET_MODE_CLASS (mode) != MODE_FLOAT + && GET_MODE_SIZE (mode) >= UNITS_PER_WORD && code == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x) && ! (flag_pic @@ -6022,6 +6023,7 @@ thumb2_legitimate_address_p (enum machin } else if (GET_MODE_CLASS (mode) != MODE_FLOAT + && GET_MODE_SIZE (mode) >= UNITS_PER_WORD && code == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x) && ! (flag_pic Index: gcc/config/arm/arm.md =================================================================== --- gcc/config/arm/arm.md (revision 200204) +++ gcc/config/arm/arm.md (working copy) @@ -5432,8 +5432,7 @@ # ldrb\\t%0, %1" [(set_attr "length" "4,2") - (set_attr "type" "alu_shift,load_byte") - (set_attr "pool_range" "*,32")] + (set_attr "type" "alu_shift,load_byte")] ) (define_insn "*thumb1_zero_extendqisi2_v6" @@ -5700,9 +5699,7 @@ ldr%(sh%)\\t%0, %1" [(set_attr "length" "8,4") (set_attr "type" "alu_shift,load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable" "yes")] ) ;; ??? Check Thumb-2 pool range @@ -5714,9 +5711,7 @@ sxth%?\\t%0, %1 ldr%(sh%)\\t%0, %1" [(set_attr "type" "simple_alu_shift,load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable" "yes")] ) (define_insn "*arm_extendhisi2addsi" @@ -5758,9 +5753,7 @@ "TARGET_ARM && arm_arch4" "ldr%(sb%)\\t%0, %1" [(set_attr "type" "load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "256") - (set_attr "neg_pool_range" "244")] + (set_attr "predicable" "yes")] ) (define_expand "extendqisi2" @@ -5800,9 +5793,7 @@ ldr%(sb%)\\t%0, %1" [(set_attr "length" "8,4") (set_attr "type" "alu_shift,load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable" "yes")] ) (define_insn "*arm_extendqisi_v6" @@ -5814,9 +5805,7 @@ sxtb%?\\t%0, %1 ldr%(sb%)\\t%0, %1" [(set_attr "type" "simple_alu_shift,load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable" "yes")] ) (define_insn "*arm_extendqisi2addsi"
typedef unsigned int uint_32t; typedef struct { uint_32t ks[60]; } aes_decrypt_ctx; extern const uint_32t t_rc[(5 * (16 / 4 - 2))]; extern const uint_32t t_fl[4][256]; extern const uint_32t t_im[4][256]; int aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]) { uint_32t ss[9]; cx->ks[((56) - ((7)) + 2 * (((7)) & 3))] = ( t_im[0][((((ss[7])) >> (8 * ((0)))) & 0xff)] ^ t_im[1][((((ss[7])) >> (8 * ((1)))) & 0xff)] ^ t_im[2][((((ss[7])) >> (8 * ((2)))) & 0xff)] ^ t_im[3][((((ss[7])) >> (8 * ((3)))) & 0xff)]); { ss[0] ^= ( t_fl[0][((((ss[7])) >> (8 * (((8+0 -3)&3)))) & 0xff)] ^ t_fl[1][((((ss[7])) >> (8 * (((8+1 -3)&3)))) & 0xff)] ^ t_fl[2][((((ss[7])) >> (8 * (((8+2 -3)&3)))) & 0xff)] ^ t_fl[3][((((ss[7])) >> (8 * (((8+3 -3)&3)))) & 0xff)]) ^ t_rc[0]; cx->ks[((56) - ((8*(0))+ 8) + 2 * (((8*(0))+ 8) & 3))] = ( t_im[0][((((ss[0])) >> (8 * ((0)))) & 0xff)] ^ t_im[1][((((ss[0])) >> (8 * ((1)))) & 0xff)] ^ t_im[2][((((ss[0])) >> (8 * ((2)))) & 0xff)] ^ t_im[3][((((ss[0])) >> (8 * ((3)))) & 0xff)]); ss[1] ^= ss[0]; cx->ks[((56) - ((8*(0))+ 9) + 2 * (((8*(0))+ 9) & 3))] = ( t_im[0][((((ss[1])) >> (8 * ((0)))) & 0xff)] ^ t_im[1][((((ss[1])) >> (8 * ((1)))) & 0xff)] ^ t_im[2][((((ss[1])) >> (8 * ((2)))) & 0xff)] ^ t_im[3][((((ss[1])) >> (8 * ((3)))) & 0xff)]); ss[4] ^= ( t_fl[0][((((ss[3])) >> (8 * (((8+0 -0)&3)))) & 0xff)] ^ t_fl[1][((((ss[3])) >> (8 * (((8+1 -0)&3)))) & 0xff)] ^ t_fl[2][((((ss[3])) >> (8 * (((8+2 -0)&3)))) & 0xff)] ^ t_fl[3][((((ss[3])) >> (8 * (((8+3 -0)&3)))) & 0xff)]); cx->ks[((56) - ((8*(0))+12) + 2 * (((8*(0))+12) & 3))] = ( t_im[0][((((ss[4])) >> (8 * ((0)))) & 0xff)] ^ t_im[1][((((ss[4])) >> (8 * ((1)))) & 0xff)] ^ t_im[2][((((ss[4])) >> (8 * ((2)))) & 0xff)] ^ t_im[3][((((ss[4])) >> (8 * ((3)))) & 0xff)]); ss[5] ^= ss[4]; cx->ks[((56) - ((8*(0))+13) + 2 * (((8*(0))+13) & 3))] = ( t_im[0][((((ss[5])) >> (8 * ((0)))) & 0xff)] ^ t_im[1][((((ss[5])) >> (8 * ((1)))) & 0xff)] ^ t_im[2][((((ss[5])) >> (8 * ((2)))) & 0xff)] ^ t_im[3][((((ss[5])) >> (8 * ((3)))) & 0xff)]); ss[6] ^= ss[5]; cx->ks[((56) - ((8*(0))+14) + 2 * (((8*(0))+14) & 3))] = ( t_im[0][((((ss[6])) >> (8 * ((0)))) & 0xff)] ^ t_im[1][((((ss[6])) >> (8 * ((1)))) & 0xff)] ^ t_im[2][((((ss[6])) >> (8 * ((2)))) & 0xff)] ^ t_im[3][((((ss[6])) >> (8 * ((3)))) & 0xff)]); ss[7] ^= ss[6]; cx->ks[((56) - ((8*(0))+15) + 2 * (((8*(0))+15) & 3))] = ( t_im[0][((((ss[7])) >> (8 * ((0)))) & 0xff)] ^ t_im[1][((((ss[7])) >> (8 * ((1)))) & 0xff)] ^ t_im[2][((((ss[7])) >> (8 * ((2)))) & 0xff)] ^ t_im[3][((((ss[7])) >> (8 * ((3)))) & 0xff)]); }; }