Hi! This patch changes get_best_mode, so that it doesn't suggest using larger modes if it means it could clobber variables located after the containing one.
On the attached testcase on x86_64, the structure is 12 bytes wide, and the last bitfield occupies 14 bits in the bytes 8 and 9 from the beginning of the structure, then there are 2 padding bytes. get_best_mode returns DImode as preferred way to store the structure, which means it reads and stores not just the 2 remaining bits in the 9th byte and two padding bytes, but also for unrelated bytes afterwards. The aliasing code, when comparing mem access that stores to that bitfield using DImode and another store to the following 32-bit variable, sees both stores are to different variables and say they must not alias. So we end up incorrect: movq e+8(%rip), %rax movl $2, f(%rip) andq $-16384, %rax movq %rax, e+8(%rip) instead of: movl e+8(%rip), %eax movl $2, f(%rip) andl $-16384, %eax movl %eax, e+8(%rip) (with the patch) or: movq e+8(%rip), %rax andq $-16384, %rax movq %rax, e+8(%rip) movl $2, f(%rip) (if aliasing didn't say accesses to e and f don't alias). This patch fixes this by passing get_best_mode the type of the containing object, so get_best_mode can better decide what mode to use (the intent is mainly for Aldy to do something more strict for C++0x memory model, currently it just looks at TYPE_SIZE). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and after a while to 4.6? I've also done instrumented bootstrap/regtest on both of these targets, which showed that the patch only makes difference in get_best_mode return value on libmudflap/testsuite/libmudflap.c/fail38-frag.c (main) (both -m32 and -m64) and on the two newly added testcases (only -m64). 2011-04-01 Jakub Jelinek <ja...@redhat.com> PR middle-end/48124 * stor-layout.c (get_best_mode): Add TYPE argument, if non-NULL and bitpos + tmode's bitsize is bigger than TYPE_SIZE, don't use tmode as wider_mode. * rtl.h (get_best_mode): New prototype. * machmode.h (get_best_mode): Remove prototype. * expmed.c (store_fixed_bit_field, store_split_bit_field, store_bit_field_1): Add ORIG_MEM argument, pass it down and pass its MEM_EXPR's type to get_best_mode. (store_bit_field): Pass str_rtx as ORIG_MEM to store_bit_field_1 if it is a MEM. (extract_bit_field_1, extract_fixed_bit_field): Pass NULL as last argument to get_best_mode. * expr.c (optimize_bitfield_assignment_op): Pass MEM_EXPR (str_rtx) type as last argument to get_best_mode. * fold-const.c (optimize_bit_field_compare, fold_truthop): Pass NULL as last argument to get_best_mode. * gcc.c-torture/execute/pr48124.c: New test. * gcc.dg/pr48124.c: New test. --- gcc/stor-layout.c.jj 2011-03-11 12:16:39.000000000 +0100 +++ gcc/stor-layout.c 2011-03-31 15:58:05.000000000 +0200 @@ -2445,7 +2445,8 @@ fixup_unsigned_type (tree type) enum machine_mode get_best_mode (int bitsize, int bitpos, unsigned int align, - enum machine_mode largest_mode, int volatilep) + enum machine_mode largest_mode, int volatilep, + tree type) { enum machine_mode mode; unsigned int unit = 0; @@ -2484,7 +2485,12 @@ get_best_mode (int bitsize, int bitpos, && unit <= BITS_PER_WORD && unit <= MIN (align, BIGGEST_ALIGNMENT) && (largest_mode == VOIDmode - || unit <= GET_MODE_BITSIZE (largest_mode))) + || unit <= GET_MODE_BITSIZE (largest_mode)) + && (type == NULL_TREE + || !host_integerp (TYPE_SIZE (type), 1) + || bitpos + (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (tmode) + <= (unsigned HOST_WIDE_INT) + tree_low_cst (TYPE_SIZE (type), 1))) wide_mode = tmode; } --- gcc/rtl.h.jj 2011-03-31 08:51:04.000000000 +0200 +++ gcc/rtl.h 2011-03-31 14:07:18.000000000 +0200 @@ -2525,6 +2525,11 @@ extern bool expensive_function_p (int); extern unsigned int variable_tracking_main (void); /* In stor-layout.c. */ + +/* Find the best mode to use to access a bit field. */ +extern enum machine_mode get_best_mode (int, int, unsigned int, + enum machine_mode, int, tree); + extern void get_mode_bounds (enum machine_mode, int, enum machine_mode, rtx *, rtx *); --- gcc/machmode.h.jj 2011-01-06 10:21:52.000000000 +0100 +++ gcc/machmode.h 2011-03-31 14:06:54.000000000 +0200 @@ -246,11 +246,6 @@ extern enum machine_mode int_mode_for_mo extern enum machine_mode mode_for_vector (enum machine_mode, unsigned); -/* Find the best mode to use to access a bit field. */ - -extern enum machine_mode get_best_mode (int, int, unsigned int, - enum machine_mode, int); - /* Determine alignment, 1<=result<=BIGGEST_ALIGNMENT. */ extern CONST_MODE_BASE_ALIGN unsigned char mode_base_align[NUM_MACHINE_MODES]; --- gcc/expmed.c.jj 2011-03-31 08:50:43.000000000 +0200 +++ gcc/expmed.c 2011-03-31 16:21:25.000000000 +0200 @@ -47,9 +47,9 @@ struct target_expmed *this_target_expmed static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT, rtx); + unsigned HOST_WIDE_INT, rtx, rtx); static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT, rtx); + unsigned HOST_WIDE_INT, rtx, rtx); static rtx extract_fixed_bit_field (enum machine_mode, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, @@ -334,7 +334,7 @@ mode_for_extraction (enum extraction_pat static bool store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, - rtx value, bool fallback_p) + rtx value, bool fallback_p, rtx orig_mem) { unsigned int unit = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; @@ -542,7 +542,7 @@ store_bit_field_1 (rtx str_rtx, unsigned if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD), bitnum + bit_offset, word_mode, - value_word, fallback_p)) + value_word, fallback_p, orig_mem)) { delete_insns_since (last); return false; @@ -714,10 +714,18 @@ store_bit_field_1 (rtx str_rtx, unsigned if (GET_MODE (op0) == BLKmode || (op_mode != MAX_MACHINE_MODE && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))) - bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), + bestmode = get_best_mode (bitsize, + bitnum + (orig_mem && MEM_EXPR (orig_mem) + && MEM_OFFSET (orig_mem) + ? INTVAL (MEM_OFFSET (orig_mem)) + : 0), + MEM_ALIGN (op0), (op_mode == MAX_MACHINE_MODE ? VOIDmode : op_mode), - MEM_VOLATILE_P (op0)); + MEM_VOLATILE_P (op0), + orig_mem && MEM_EXPR (orig_mem) + ? TREE_TYPE (MEM_EXPR (orig_mem)) + : NULL_TREE); else bestmode = GET_MODE (op0); @@ -743,7 +751,7 @@ store_bit_field_1 (rtx str_rtx, unsigned the unit. */ tempreg = copy_to_reg (xop0); if (store_bit_field_1 (tempreg, bitsize, xbitpos, - fieldmode, orig_value, false)) + fieldmode, orig_value, false, NULL_RTX)) { emit_move_insn (xop0, tempreg); return true; @@ -755,7 +763,7 @@ store_bit_field_1 (rtx str_rtx, unsigned if (!fallback_p) return false; - store_fixed_bit_field (op0, offset, bitsize, bitpos, value); + store_fixed_bit_field (op0, offset, bitsize, bitpos, value, orig_mem); return true; } @@ -769,7 +777,8 @@ store_bit_field (rtx str_rtx, unsigned H unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, rtx value) { - if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true)) + if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true, + MEM_P (str_rtx) ? str_rtx : NULL_RTX)) gcc_unreachable (); } @@ -785,7 +794,7 @@ store_bit_field (rtx str_rtx, unsigned H static void store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset, unsigned HOST_WIDE_INT bitsize, - unsigned HOST_WIDE_INT bitpos, rtx value) + unsigned HOST_WIDE_INT bitpos, rtx value, rtx orig_mem) { enum machine_mode mode; unsigned int total_bits = BITS_PER_WORD; @@ -806,7 +815,7 @@ store_fixed_bit_field (rtx op0, unsigned /* Special treatment for a bit field split across two registers. */ if (bitsize + bitpos > BITS_PER_WORD) { - store_split_bit_field (op0, bitsize, bitpos, value); + store_split_bit_field (op0, bitsize, bitpos, value, NULL_RTX); return; } } @@ -827,15 +836,21 @@ store_fixed_bit_field (rtx op0, unsigned && flag_strict_volatile_bitfields > 0) mode = GET_MODE (op0); else - mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, - MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); + mode = get_best_mode (bitsize, + bitpos + offset * BITS_PER_UNIT + + (orig_mem && MEM_EXPR (orig_mem) + && MEM_OFFSET (orig_mem) + ? INTVAL (MEM_OFFSET (orig_mem)) : 0), + MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0), + orig_mem && MEM_EXPR (orig_mem) + ? TREE_TYPE (MEM_EXPR (orig_mem)) : NULL_TREE); if (mode == VOIDmode) { /* The only way this should occur is if the field spans word boundaries. */ store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT, - value); + value, orig_mem); return; } @@ -955,7 +970,7 @@ store_fixed_bit_field (rtx op0, unsigned static void store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, - unsigned HOST_WIDE_INT bitpos, rtx value) + unsigned HOST_WIDE_INT bitpos, rtx value, rtx orig_mem) { unsigned int unit; unsigned int bitsdone = 0; @@ -1060,7 +1075,7 @@ store_split_bit_field (rtx op0, unsigned /* OFFSET is in UNITs, and UNIT is in bits. store_fixed_bit_field wants offset in bytes. */ store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize, - thispos, part); + thispos, part, orig_mem); bitsdone += thissize; } } @@ -1511,7 +1526,7 @@ extract_bit_field_1 (rtx str_rtx, unsign bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), (ext_mode == MAX_MACHINE_MODE ? VOIDmode : ext_mode), - MEM_VOLATILE_P (op0)); + MEM_VOLATILE_P (op0), NULL_TREE); else bestmode = GET_MODE (op0); @@ -1635,7 +1650,8 @@ extract_fixed_bit_field (enum machine_mo } else mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, - MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0)); + MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0), + NULL_TREE); if (mode == VOIDmode) /* The only way this should occur is if the field spans word --- gcc/expr.c.jj 2011-03-31 08:51:03.000000000 +0200 +++ gcc/expr.c 2011-03-31 16:23:36.000000000 +0200 @@ -3997,8 +3997,13 @@ optimize_bitfield_assignment_op (unsigne if (str_bitsize == 0 || str_bitsize > BITS_PER_WORD) str_mode = word_mode; - str_mode = get_best_mode (bitsize, bitpos, - MEM_ALIGN (str_rtx), str_mode, 0); + str_mode = get_best_mode (bitsize, + bitpos + (MEM_EXPR (str_rtx) + && MEM_OFFSET (str_rtx) + ? INTVAL (MEM_OFFSET (str_rtx)) : 0), + MEM_ALIGN (str_rtx), str_mode, 0, + MEM_EXPR (str_rtx) + ? TREE_TYPE (MEM_EXPR (str_rtx)) : NULL_TREE); if (str_mode == VOIDmode) return false; str_bitsize = GET_MODE_BITSIZE (str_mode); --- gcc/fold-const.c.jj 2011-03-31 08:51:02.000000000 +0200 +++ gcc/fold-const.c 2011-03-31 15:29:59.000000000 +0200 @@ -3411,7 +3411,7 @@ optimize_bit_field_compare (location_t l const_p ? TYPE_ALIGN (TREE_TYPE (linner)) : MIN (TYPE_ALIGN (TREE_TYPE (linner)), TYPE_ALIGN (TREE_TYPE (rinner))), - word_mode, lvolatilep || rvolatilep); + word_mode, lvolatilep || rvolatilep, NULL_TREE); if (nmode == VOIDmode) return 0; @@ -5237,7 +5237,7 @@ fold_truthop (location_t loc, enum tree_ end_bit = MAX (ll_bitpos + ll_bitsize, rl_bitpos + rl_bitsize); lnmode = get_best_mode (end_bit - first_bit, first_bit, TYPE_ALIGN (TREE_TYPE (ll_inner)), word_mode, - volatilep); + volatilep, NULL_TREE); if (lnmode == VOIDmode) return 0; @@ -5302,7 +5302,7 @@ fold_truthop (location_t loc, enum tree_ end_bit = MAX (lr_bitpos + lr_bitsize, rr_bitpos + rr_bitsize); rnmode = get_best_mode (end_bit - first_bit, first_bit, TYPE_ALIGN (TREE_TYPE (lr_inner)), word_mode, - volatilep); + volatilep, NULL_TREE); if (rnmode == VOIDmode) return 0; --- gcc/testsuite/gcc.c-torture/execute/pr48124.c.jj 2011-04-01 10:53:07.000000000 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr48124.c 2011-04-01 10:53:26.000000000 +0200 @@ -0,0 +1,32 @@ +/* PR middle-end/48124 */ + +extern void abort (void); + +struct S +{ + signed a : 26; + signed b : 16; + signed c : 10; + volatile signed d : 14; +}; + +static struct S e = { 0, 0, 0, 1 }; +static int f = 1; + +void __attribute__((noinline)) +foo (void) +{ + e.d = 0; + f = 2; +} + +int +main () +{ + if (e.a || e.b || e.c || e.d != 1 || f != 1) + abort (); + foo (); + if (e.a || e.b || e.c || e.d || f != 2) + abort (); + return 0; +} --- gcc/testsuite/gcc.dg/pr48124.c.jj 2011-04-01 10:52:55.000000000 +0200 +++ gcc/testsuite/gcc.dg/pr48124.c 2011-04-01 10:53:40.000000000 +0200 @@ -0,0 +1,34 @@ +/* PR middle-end/48124 */ +/* { dg-do run } */ +/* { dg-options "-Os -fno-toplevel-reorder" } */ + +extern void abort (void); + +struct S +{ + signed a : 26; + signed b : 16; + signed c : 10; + volatile signed d : 14; +}; + +static struct S e = { 0, 0, 0, 1 }; +static int f = 1; + +void __attribute__((noinline)) +foo (void) +{ + e.d = 0; + f = 2; +} + +int +main () +{ + if (e.a || e.b || e.c || e.d != 1 || f != 1) + abort (); + foo (); + if (e.a || e.b || e.c || e.d || f != 2) + abort (); + return 0; +} Jakub