Hi,

this is my simple test proggie:


#include <stdio.h>

#include <emmintrin.h>

union __u128i {
        unsigned int u[4];
        __m128i v;
};

static const union __u128i mask = {{0xffffffffU, 0x00000000U, 0xffffffffU, 
0x00000000U}};


static void test1()
{
        union __u128i input = {{1, 2, 3, 4}};
        int i;
        
        input.v = _mm_and_si128(input.v, mask.v);
        
        for(i=0;i<4;i++)
                printf("%u\n", input.u[i]);
        
}

static void test2()
{
        void *input;
        int i;
        
        input = _mm_malloc(4*sizeof(int), 16);
        
        for(i=0;i<4;i++)
                ((unsigned int*)input)[i] = i+1;
        
        *(__m128i*)input = _mm_and_si128(*(__m128i*)input, mask.v);
        
        for(i=0;i<4;i++)
                printf("%u\n",((unsigned int*)input)[i]);
        
        _mm_free(input);
}

int main(){
        test1();
        printf("\n");
        test2();
        
        return 0;
}

test1 is always ok, but test2 fails with -O3 (produces output 1 0 0 0 for 
test2), but -O2 makes it behave.


I  am on x86_64. -m32 has interesting effects:

-O3 -Wall -m32 -msse2 -march=i486 is ok (output 1 0 3 0, as expected)
-O3 -Wall -m32 -msse2 -march=i586 is broken (output 0 2 3 4)
-O3 -Wall -m32 -msse2 -march=athlon is broken (output 1 0 0 0)

Is this the aliasing issues I read about with 4.1 compiler? Is there an easy 
work-around? In fact I need to pass data as aligned buffer, so I need test2 
to behave correctly. :-(

Do you need anything else?

Cheers,
-- 
(°=                 =°)
//\ Prakash Punnoor /\\
V_/                 \_V

Attachment: pgpqqR78NXdXu.pgp
Description: PGP signature

Reply via email to