Hi, this is my simple test proggie:
#include <stdio.h> #include <emmintrin.h> union __u128i { unsigned int u[4]; __m128i v; }; static const union __u128i mask = {{0xffffffffU, 0x00000000U, 0xffffffffU, 0x00000000U}}; static void test1() { union __u128i input = {{1, 2, 3, 4}}; int i; input.v = _mm_and_si128(input.v, mask.v); for(i=0;i<4;i++) printf("%u\n", input.u[i]); } static void test2() { void *input; int i; input = _mm_malloc(4*sizeof(int), 16); for(i=0;i<4;i++) ((unsigned int*)input)[i] = i+1; *(__m128i*)input = _mm_and_si128(*(__m128i*)input, mask.v); for(i=0;i<4;i++) printf("%u\n",((unsigned int*)input)[i]); _mm_free(input); } int main(){ test1(); printf("\n"); test2(); return 0; } test1 is always ok, but test2 fails with -O3 (produces output 1 0 0 0 for test2), but -O2 makes it behave. I am on x86_64. -m32 has interesting effects: -O3 -Wall -m32 -msse2 -march=i486 is ok (output 1 0 3 0, as expected) -O3 -Wall -m32 -msse2 -march=i586 is broken (output 0 2 3 4) -O3 -Wall -m32 -msse2 -march=athlon is broken (output 1 0 0 0) Is this the aliasing issues I read about with 4.1 compiler? Is there an easy work-around? In fact I need to pass data as aligned buffer, so I need test2 to behave correctly. :-( Do you need anything else? Cheers, -- (°= =°) //\ Prakash Punnoor /\\ V_/ \_V
pgpqqR78NXdXu.pgp
Description: PGP signature