------- Additional Comments From pinskia at gcc dot gnu dot org 2005-08-26 03:36 ------- Reduced as far as I can get this: typedef float __v4sf __attribute__ ((__vector_size__ (16))); typedef float __m128 __attribute__ ((__vector_size__ (16))); static __inline __m128 _mm_cmpeq_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B); } static __inline __m128 _mm_setr_ps (float __Z, float __Y, float __X, float __W) { return __extension__ (__m128)(__v4sf){__Z, __Y, __X, __W }; } typedef long long __v2di __attribute__ ((__vector_size__ (16))); static __inline __m128 _mm_and_si128 (__m128 __A, __m128 __B) { return (__m128)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B); } static __inline __m128 _mm_or_si128 (__m128 __A, __m128 __B) { return (__m128)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B); } typedef union { __m128 xmmi; int si[4]; } __attribute__ ((aligned(16))) um128; um128 u; static inline int sse_max_abs_indexf(float *v, int step, int n) { __m128 m1, mm; __m128 mim, mi, msk; um128 u, ui; int n4, step2, step3; mm = __builtin_ia32_andps((__m128)(__v4sf){0.0, v[step], v[step2], v[step3]}, u.xmmi); if (n4) { int i; for (i = 0; i < n4; ++i) ; msk = (__m128)_mm_cmpeq_ps(m1, mm); mim = _mm_or_si128(_mm_and_si128(msk, mi), mim); } ui.xmmi = (__m128)mim; return ui.si[n]; } static void sse_swap_rowf(float *r1, float *r2, int n) { int n4 = (n / 4) * 4; float *r14end = r1 + n4; while (r1 < r14end) { *r1 = *r2; r1++; } } void ludcompf(float *m, int nw, int *prow, int n) { int i, s = 0; float *pm; for (i = 0, pm = m; i < n - 1; ++i, pm += nw) { int vi = sse_max_abs_indexf(pm + i, nw, n - i); float *pt; int j; if (vi != 0) { sse_swap_rowf(pm, pm + vi * nw, nw); swap_index(prow, i, i + vi); } for (j = i + 1, pt = pm + nw; j < n; ++j, pt += nw) sse_add_rowf(pt + i + 1, pm + i + 1, -1.0, n - i - 1); } }
-- What |Removed |Added ---------------------------------------------------------------------------- Status|UNCONFIRMED |NEW Ever Confirmed| |1 Last reconfirmed|0000-00-00 00:00:00 |2005-08-26 03:36:35 date| | http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23570