https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113025

--- Comment #6 from Xi Ruoyao <xry111 at gcc dot gnu.org> ---
Works for me:

#include <xmmintrin.h>
#include <stdint.h>

#define LOAD_SI128(ptr) \
        ( ((uintptr_t)(ptr) & 15) == 0 ) ? _mm_load_si128((__m128i*)(ptr)) :
_mm_loadu_si128((__m128i*)(ptr))

extern char x[16];
__m128i y;

void
test ()
{
  y = LOAD_SI128 (&x);
}

compiled to:

test:
.LFB532:
        .cfi_startproc
        movdqu  x(%rip), %xmm0
        movaps  %xmm0, y(%rip)
        ret
        .cfi_endproc

Note that if x is not extern, GCC will generate:

test:
.LFB532:
        .cfi_startproc
        movdqa  x(%rip), %xmm0
        movaps  %xmm0, y(%rip)
        ret
        .cfi_endproc

but it's legal because GCC places x at 16-byte boundary:

        .align 16
        .type   x, @object
        .size   x, 16
x:
        .zero   16

Reply via email to