Hi,
this is a regression present on the mainline for the ARM in the form of an
assertion failure at -O2 in the back-end, which rightfully refuses to generate
a load from an unaligned memory location (the ARM target is strict-alignment).
It comes from a __builtin_memcpy generating:
MEM <unsigned char[8]> [(char * {ref-all})&b] = MEM <unsigned char[8]> [(char
* {ref-all})a.0_1];
in the .optimized dump, where b is a small union. As a result, store_field is
invoked with:
(gdb) p debug_rtx(target)
(reg/v:DI 113 [ b ])
and sends:
(gdb) p debug_rtx(temp)
(mem:BLK (reg/f:SI 115) [0 MEM <unsigned char[8]> [(char * {ref-all})a.0_1]+0
S8 A8])
to store_bit_field with 64 bits and BLKmode. The unaligned memory reference
is then generated by store_integral_bit_field, which does:
rtx value_word = operand_subword_force (value, wordnum, value_mode);
thus creating an unaligned word mode (SImode) load.
store_integral_bit_field is ready to handle BLKmode fields, there is even a
subtlety with their handling on big-endian targets, see PR middle-end/50325,
but not if they are unaligned, so the fix is simply to call extract_bit_field
for them in order to generate an unaligned load. As a bonus, this subsumes
the big-endian specific path that was added under PR middle-end/50325.
Bootstrapped/regtested on x86-64/Linux and SPARC/Solaris, OK for mainline?
2019-03-13 Eric Botcazou <ebotca...@adacore.com>
PR middle-end/92071
* expmed.c (store_integral_bit_field): For fields larger than a word,
call extract_bit_field on the value if the mode is BLKmode. Remove
specific path for big-endian targets and tidy things up a little bit.
2019-03-13 Eric Botcazou <ebotca...@adacore.com>
* gcc.c-torture/compile/20200313-1.c: New test.
--
Eric Botcazou
diff --git a/gcc/expmed.c b/gcc/expmed.c
index 04610276209..e7c03fbf92c 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -933,8 +933,7 @@ store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
However, only do that if the value is not BLKmode. */
const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
- unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
- unsigned int i;
+ const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
rtx_insn *last;
/* This is the mode we must force value to, so that there will be enough
@@ -950,35 +949,31 @@ store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
last = get_last_insn ();
- for (i = 0; i < nwords; i++)
+ for (int i = 0; i < nwords; i++)
{
- /* If I is 0, use the low-order word in both field and target;
- if I is 1, use the next to lowest word; and so on. */
- unsigned int wordnum = (backwards
- ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD
- - i - 1
- : i);
- unsigned int bit_offset = (backwards ^ reverse
- ? MAX ((int) bitsize - ((int) i + 1)
- * BITS_PER_WORD,
- 0)
- : (int) i * BITS_PER_WORD);
- rtx value_word = operand_subword_force (value, wordnum, value_mode);
- unsigned HOST_WIDE_INT new_bitsize =
- MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
-
- /* If the remaining chunk doesn't have full wordsize we have
- to make sure that for big-endian machines the higher order
- bits are used. */
- if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
- {
- int shift = BITS_PER_WORD - new_bitsize;
- rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
- value_word = simplify_expand_binop (word_mode, lshr_optab,
- value_word, shift_rtx,
- NULL_RTX, true,
- OPTAB_LIB_WIDEN);
- }
+ /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
+ except maybe for the last iteration. */
+ const unsigned HOST_WIDE_INT new_bitsize
+ = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
+ /* Bit offset from the starting bit number in the target. */
+ const unsigned int bit_offset
+ = backwards ^ reverse
+ ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
+ : i * BITS_PER_WORD;
+ /* Starting word number in the value. */
+ const unsigned int wordnum
+ = backwards
+ ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
+ : i;
+ /* The chunk of the value in word_mode. We use bit-field extraction
+ in BLKmode to handle unaligned memory references and to shift the
+ last chunk right on big-endian machines if need be. */
+ rtx value_word
+ = fieldmode == BLKmode
+ ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
+ 1, NULL_RTX, word_mode, word_mode, false,
+ NULL)
+ : operand_subword_force (value, wordnum, value_mode);
if (!store_bit_field_1 (op0, new_bitsize,
bitnum + bit_offset,
/* PR middle-end/92071 */
/* Testcase by David Binderman <dcb...@hotmail.com> */
void *a;
union U { double c; char d[8]; };
void bar (union U);
void
foo (void)
{
union U b;
__builtin_memcpy (b.d, a, 8);
bar (b);
}