https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61949

--- Comment #21 from Richard Biener <rguenth at gcc dot gnu.org> ---
Ok, can reproduce but I need -msse2 in addition to -O2 (but executing ./cc1 so
your diver may add that).

That's memcpy expanded as

  uint128_t _36;
...
  _36 = MEM[(char * {ref-all})&buffer];
  MEM[(char * {ref-all})resbuf_31(D)] = _36;

via folding I guess.

              tree type = lang_hooks.types.type_for_size (ilen * 8, 1);
              if (type
                  && TYPE_MODE (type) != BLKmode
                  && (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT
                      == ilen * 8)
                  /* If the destination pointer is not aligned we must be able
                     to emit an unaligned store.  */
                  && (dest_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type))
                      || !SLOW_UNALIGNED_ACCESS (TYPE_MODE (type), dest_align)
                      || (optab_handler (movmisalign_optab, TYPE_MODE (type))
                          != CODE_FOR_nothing)))
                {
                  tree srctype = type;
                  tree desttype = type;
                  if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))

src_align is 32, dest_align is 8.

So when expanding we see

 <mem_ref 0x7ffff6a83708
    type <integer_type 0x7ffff6a22690 unsigned TI
        size <integer_cst 0x7ffff68a3018 constant 128>
        unit size <integer_cst 0x7ffff68a3030 constant 16>
        align 32 symtab 0 alias set -1 canonical type 0x7ffff68a9bd0 precision
128 min <integer_cst 0x7ffff68a3240 0> max <integer_cst 0x7ffff689b780
0xffffffffffffffffffffffffffffffff>>

    arg 0 <addr_expr 0x7ffff6a81f60
        type <pointer_type 0x7ffff6a1d9d8 type <array_type 0x7ffff6a1d738>
            public unsigned SI
            size <integer_cst 0x7ffff6887ca8 constant 32>
            unit size <integer_cst 0x7ffff6887cc0 constant 4>
            align 32 symtab 0 alias set -1 canonical type 0x7ffff6a1da80>

        arg 0 <var_decl 0x7ffff6a823f0 buffer type <array_type 0x7ffff6a1d738>
            used BLK file /var/gcc/reghunt/trunk/libiberty/md5.c line 84 col 14
size <integer_cst 0x7ffff68a3018 128> unit size <integer_cst 0x7ffff68a3030 16>
            align 128 context <function_decl 0x7ffff6a19000 md5_finish_ctx>
abstract_origin <var_decl 0x7ffff69d1a20 buffer>
            (mem/c:BLK (plus:SI (reg/f:SI 82 virtual-stack-vars)
        (const_int -16 [0xfffffffffffffff0])) [1 buffer+0 S16 A128])>
        /var/gcc/reghunt/trunk/libiberty/md5.c:91:19 start:
/var/gcc/reghunt/trunk/libiberty/md5.c:91:19 finish:
/var/gcc/reghunt/trunk/libiberty/md5.c:91:24>
    arg 1 <integer_cst 0x7ffff6a39198 type <pointer_type 0x7ffff6a225e8>
constant 0>>

so the decl for 'buffer' actually got 128bit alignment from expansion!  (thus
my pointing at incoming stack alignment guarantees on solaris)

You can see the same from md5_read_ctx:

md5_read_ctx:
        subl    $28, %esp
        movl    32(%esp), %edx
        movl    36(%esp), %eax
        movl    (%edx), %ecx
        movl    %ecx, (%esp)
        movl    4(%edx), %ecx
        movl    %ecx, 4(%esp)
        movl    8(%edx), %ecx
        movl    12(%edx), %edx
        movl    %ecx, 8(%esp)
        movl    %edx, 12(%esp)
        movaps  (%esp), %xmm0
        movups  %xmm0, (%eax)
        addl    $28, %esp

so either sth goes wrong just for the inline in finish_ctx or somehow we
set the desired alignment of this kind of locals too high.

Reply via email to