The last mysterious error message went away when the same code was compiled on a machine with a more recent gcc (4.4.1). Shortly after I hit the next roadblock.
Here is foo.c (a modified version of sse2-cmpsd-1.c from the version 4.5.1 testsuite): >8>8<8>8>8<8>8>8<8>8>8<8>8>8<8>8>8<8>8>8<8>8>8<8>8>8<8 #ifndef CHECK_H #define CHECK_H "sse2-check.h" #endif #ifndef TEST #define TEST sse2_test #endif #include CHECK_H #include <emmintrin.h> static __m128d __attribute__((noinline, unused)) test (__m128d s1, __m128d s2) { printf("test s1.x"); _mm_dump_fd(s1); printf("test s2.x"); _mm_dump_fd(s2); return _mm_add_pd (s1, s2); } static void TEST (void) { union128d u, s1, s2; double e[2]; s1.x = _mm_set_pd (2134.3343,1234.635654); s2.x = _mm_set_pd (41124.234,2344.2354); printf("s1 0 1 %lf %lf\n",s1.a[0],s1.a[1]); printf("s2 0 1 %lf %lf\n",s2.a[0],s2.a[1]); printf("s1.x"); _mm_dump_fd(s1.x); printf("s2.x"); _mm_dump_fd(s2.x); u.x = test (s1.x, s2.x); e[0] = s1.a[0] + s2.a[0]; e[1] = s1.a[1] + s2.a[1]; printf("s1.x"); _mm_dump_fd(s1.x); printf("s2.x"); _mm_dump_fd(s2.x); printf("expected e0 e1 %lf %lf\n",e[0],e[1]); printf("result r0 r1 %lf %lf\n",u.a[0],u.a[1]); if (check_union128d (u, e)) abort (); } >8>8<8>8>8<8>8>8<8>8>8<8>8>8<8>8>8<8>8>8<8>8>8<8>8>8<8 When compiled with -mno-sse2 the run fails. Bizarrely, it seems to be passing data into the test function incorrectly, notice that in test the low double in s2 is the high double in s1, instead of the original low double in s2 from outside the calling function. This erroneous value propagates into my inline code where it is added (correctly, but of course to the wrong final sum since the inputs were wrong). gcc -Wall -msse -mno-sse2 -I. -lm -DSOFT_SSE2 -DEMMSOFTDBG -O1 -o foo_wno foo.c ./foo_wno mm_set_pd, in 2134.334300 1234.635654 mm_set_pd, in 41124.234000 2344.235400 s1 0 1 1234.635654 2134.334300 s2 0 1 2344.235400 41124.234000 s1.xDEBUG m_d_fd: 1234.635654 2134.334300 s2.xDEBUG m_d_fd: 2344.235400 41124.234000 test s1.xDEBUG m_d_fd: 1234.635654 2134.334300 test s2.xDEBUG m_d_fd: 2134.334300 41124.234000 IN _mm_add_pd __ADEBUG m_d_fd: 1234.635654 2134.334300 __BDEBUG m_d_fd: 2134.334300 41124.234000 s1.xDEBUG m_d_fd: 1234.635654 2134.334300 s2.xDEBUG m_d_fd: 2344.235400 41124.234000 expected e0 e1 3578.871054 43258.568300 result r0 r1 3368.969954 43258.568300 Aborted when -msse2 is enabled however, the parameters are passed appropriately into test (and my inlined function), and the program works. Here the pass to the test function is correct, and that propagates into my inline function correctly too: gcc -Wall -msse -msse2 -I. -lm -DSOFT_SSE2 -DEMMSOFTDBG -O1 -o foo_nono foo.c [r...@newsaf i386]# ./foo_nono mm_set_pd, in 2134.334300 1234.635654 mm_set_pd, in 41124.234000 2344.235400 s1 0 1 1234.635654 2134.334300 s2 0 1 2344.235400 41124.234000 s1.xDEBUG m_d_fd: 1234.635654 2134.334300 s2.xDEBUG m_d_fd: 2344.235400 41124.234000 test s1.xDEBUG m_d_fd: 1234.635654 2134.334300 test s2.xDEBUG m_d_fd: 2344.235400 41124.234000 IN _mm_add_pd __ADEBUG m_d_fd: 1234.635654 2134.334300 __BDEBUG m_d_fd: 2344.235400 41124.234000 s1.xDEBUG m_d_fd: 1234.635654 2134.334300 s2.xDEBUG m_d_fd: 2344.235400 41124.234000 expected e0 e1 3578.871054 43258.568300 result r0 r1 3578.871054 43258.568300 Regards, David Mathog mat...@caltech.edu Manager, Sequence Analysis Facility, Biology Division, Caltech