Hi,
I uploaded an NMU of your package to make sure a fixed version goes into etch
(and to allow me to remove xorg-x11 (6.9) from etch).
Thanks for your work.
Cheers,
Andi
diff -Nur liboil-0.3.9/debian/changelog liboil-0.3.9~/debian/changelog
--- liboil-0.3.9/debian/changelog 2006-06-22 20:56:47.000000000 +0200
+++ liboil-0.3.9~/debian/changelog 2006-06-22 20:37:17.000000000 +0200
@@ -1,3 +1,15 @@
+liboil (0.3.9-1.1) unstable; urgency=low
+
+ * Non-maintainer upload.
+ * fix possible unalignment on i386 - this change not perfect
+ and should also contain a test suite, but is still better
+ than nothing at all. Thanks to Christian Aichinger for his
+ good work on this and the patch. Closes: #368991
+ (also keeping the patch around in the diff, so that it's
+ obvious what was changed)
+
+ -- Andreas Barth <[EMAIL PROTECTED]> Thu, 22 Jun 2006 19:31:26 +0200
+
liboil (0.3.9-1) unstable; urgency=low
* New upstream release.
diff -Nur liboil-0.3.9/liboil/sse/composite_sse_2pix.c
liboil-0.3.9~/liboil/sse/composite_sse_2pix.c
--- liboil-0.3.9/liboil/sse/composite_sse_2pix.c 2005-12-21
02:27:54.000000000 +0100
+++ liboil-0.3.9~/liboil/sse/composite_sse_2pix.c 2006-06-22
20:36:42.000000000 +0200
@@ -32,6 +32,42 @@
#include <emmintrin.h>
#include <liboil/liboilcolorspace.h>
+/* Work around non-aligned stack frames (which causes the intristics to crash
+ * by making sure the stack frame is always aligned
+ */
+#if defined(__i386__)
+#define OIL_SSE_WRAPPER(name,ret, ...) \
+ ret name(__VA_ARGS__) __attribute__((used)); \
+ ret name ## _wrap (__VA_ARGS__) { \
+ OIL_SSE_WRAPPER_CALL(name); \
+ }
+
+#define OIL_SSE_WRAPPER_CALL(name) \
+ asm volatile( \
+ "\n\t" \
+ "subl $0x10,%%esp\n\t" \
+ "andl $0xfffffff0,%%esp\n\t" \
+ \
+ "movdqu 8(%%ebp),%%xmm0\n\t" \
+ "movdqa %%xmm0,(%%esp)\n\t" \
+ \
+ "call " #name "\n\t" \
+ "movl %%ebp,%%esp\n\t" \
+ : : \
+ : "eax","ecx","edx","xmm0")
+
+#elif defined(__amd64__)
+
+/* Needed because we call *_wrap. Should get optimized away anyway */
+#define OIL_SSE_WRAPPER(name,ret, ...) \
+ ret name ## _wrap (__VA_ARGS__) { \
+ name(__VA_ARGS__); \
+ }
+
+#else
+#error Can't use sse on !i386 and !amd64
+#endif
+
/* non-SSE2 compositing support */
#define COMPOSITE_OVER(d,s,m) ((d) + (s) - oil_muldiv_255((d),(m)))
#define COMPOSITE_ADD(d,s) oil_clamp_255((d) + (s))
@@ -41,20 +77,12 @@
* the channel value in the low byte. This means 2 pixels per pass.
*/
-union m128_int {
- __m128i m128;
- uint64_t ull[2];
-};
-
-static const struct _SSEData {
- union m128_int sse_8x00ff;
- union m128_int sse_8x0080;
-} c = {
- .sse_8x00ff.ull = {0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL},
- .sse_8x0080.ull = {0x0080008000800080ULL, 0x0080008000800080ULL},
-};
+static const __m128i c_sse_8x00ff =
+ {0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL};
+static const __m128i c_sse_8x0080 =
+ {0x0080008000800080ULL, 0x0080008000800080ULL};
-#define MC(x) (c.sse_##x.m128)
+#define MC(x) (c_sse_##x)
/* Shuffles the given value such that the alpha for each pixel appears in each
* channel of the pixel.
@@ -188,7 +216,11 @@
COMPOSITE_IN(oil_argb_B(*src), m));
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse_2pix,
+
+OIL_SSE_WRAPPER(composite_in_argb_const_src_sse_2pix, static void,
+ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
+
+OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse_2pix_wrap,
composite_in_argb_const_src, OIL_IMPL_FLAG_SSE2);
static void
@@ -216,7 +248,10 @@
COMPOSITE_IN(oil_argb_B(s), mask[0]));
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse_2pix,
+OIL_SSE_WRAPPER(composite_in_argb_const_mask_sse_2pix, static void,
+ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
+
+OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse_2pix_wrap,
composite_in_argb_const_mask, OIL_IMPL_FLAG_SSE2);
static void
@@ -272,7 +307,11 @@
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse_2pix,
+
+OIL_SSE_WRAPPER(composite_over_argb_const_src_sse_2pix, static void,
+ uint32_t *dest, const uint32_t *src, int n)
+
+OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse_2pix_wrap,
composite_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
static void
@@ -309,8 +348,12 @@
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_sse_2pix, composite_in_over_argb,
- OIL_IMPL_FLAG_SSE2);
+
+OIL_SSE_WRAPPER(composite_in_over_argb_sse_2pix , static void,
+ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
+
+OIL_DEFINE_IMPL_FULL (composite_in_over_argb_sse_2pix_wrap,
+ composite_in_over_argb, OIL_IMPL_FLAG_SSE2);
static void
composite_in_over_argb_const_src_sse_2pix (uint32_t *dest, const uint32_t *src,
@@ -348,7 +391,11 @@
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse_2pix,
+
+OIL_SSE_WRAPPER(composite_in_over_argb_const_src_sse_2pix , static void,
+ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
+
+OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse_2pix_wrap,
composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
static void
@@ -387,7 +434,11 @@
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse_2pix,
+
+OIL_SSE_WRAPPER(composite_in_over_argb_const_mask_sse_2pix, static void,
+ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
+
+OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse_2pix_wrap,
composite_in_over_argb_const_mask, OIL_IMPL_FLAG_SSE2);
static void
diff -Nur liboil-0.3.9/liboil/sse/composite_sse_4pix.c
liboil-0.3.9~/liboil/sse/composite_sse_4pix.c
--- liboil-0.3.9/liboil/sse/composite_sse_4pix.c 2005-12-21
02:27:54.000000000 +0100
+++ liboil-0.3.9~/liboil/sse/composite_sse_4pix.c 2006-06-22
20:36:42.000000000 +0200
@@ -32,20 +32,49 @@
#include <emmintrin.h>
#include <liboil/liboilcolorspace.h>
-union m128_int {
- __m128i m128;
- uint64_t ull[2];
-};
-
-static const struct _SSEData {
- union m128_int sse_16xff;
- union m128_int sse_8x0080;
-} c = {
- .sse_16xff.ull = {0xffffffffffffffffULL, 0xffffffffffffffffULL},
- .sse_8x0080.ull = {0x0080008000800080ULL, 0x0080008000800080ULL},
-};
+/* Work around non-aligned stack frames (which causes the intristics to crash
+ * by making sure the stack frame is always aligned
+ */
+#if defined(__i386__)
+#define OIL_SSE_WRAPPER(name,ret, ...) \
+ ret name(__VA_ARGS__) __attribute__((used)); \
+ ret name ## _wrap (__VA_ARGS__) { \
+ OIL_SSE_WRAPPER_CALL(name); \
+ }
+
+#define OIL_SSE_WRAPPER_CALL(name) \
+ asm volatile( \
+ "\n\t" \
+ "subl $0x10,%%esp\n\t" \
+ "andl $0xfffffff0,%%esp\n\t" \
+ \
+ "movdqu 8(%%ebp),%%xmm0\n\t" \
+ "movdqa %%xmm0,(%%esp)\n\t" \
+ \
+ "call " #name "\n\t" \
+ "movl %%ebp,%%esp\n\t" \
+ : : \
+ : "eax","ecx","edx","xmm0")
+
+#elif defined(__amd64__)
+
+/* Needed because we call *_wrap. Should get optimized away anyway */
+#define OIL_SSE_WRAPPER(name,ret, ...) \
+ ret name ## _wrap (__VA_ARGS__) { \
+ name(__VA_ARGS__); \
+ }
+
+#else
+#error Can't use sse on !i386 and !amd64
+#endif
+
-#define MC(x) (c.sse_##x.m128)
+static const __m128i c_sse_16xff =
+ {0xffffffffffffffffULL, 0xffffffffffffffffULL};
+static const __m128i c_sse_8x0080 =
+ {0x0080008000800080ULL, 0x0080008000800080ULL};
+
+#define MC(x) (c_sse_##x)
/* non-SSE2 compositing support */
#define COMPOSITE_OVER(d,s,m) ((d) + (s) - oil_muldiv_255((d),(m)))
@@ -193,7 +222,11 @@
COMPOSITE_IN(oil_argb_B(s), m));
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_argb_sse, composite_in_argb,
+
+OIL_SSE_WRAPPER(composite_in_argb_sse, static void,
+ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
+
+OIL_DEFINE_IMPL_FULL (composite_in_argb_sse_wrap, composite_in_argb,
OIL_IMPL_FLAG_SSE2);
static void
@@ -230,7 +263,11 @@
COMPOSITE_IN(oil_argb_B(*src), m));
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse,
+
+OIL_SSE_WRAPPER(composite_in_argb_const_src_sse , static void,
+ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
+
+OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse_wrap,
composite_in_argb_const_src, OIL_IMPL_FLAG_SSE2);
static void
@@ -267,7 +304,10 @@
COMPOSITE_IN(oil_argb_B(s), mask[0]));
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse,
+OIL_SSE_WRAPPER(composite_in_argb_const_mask_sse, static void,
+ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
+
+OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse_wrap,
composite_in_argb_const_mask, OIL_IMPL_FLAG_SSE2);
static void
@@ -339,7 +379,11 @@
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse,
+
+OIL_SSE_WRAPPER(composite_over_argb_const_src_sse, static void,
+ uint32_t *dest, const uint32_t *src, int n)
+
+OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse_wrap,
composite_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
static void
@@ -447,9 +491,11 @@
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse,
- composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
+OIL_SSE_WRAPPER(composite_in_over_argb_const_src_sse , static void,
+ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
+OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse_wrap,
+ composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
static void
composite_in_over_argb_const_mask_sse (uint32_t *dest, const uint32_t *src,
const uint8_t *mask, int n)
@@ -502,7 +548,11 @@
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse,
+
+OIL_SSE_WRAPPER(composite_in_over_argb_const_mask_sse, static void,
+ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
+
+OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse_wrap,
composite_in_over_argb_const_mask, OIL_IMPL_FLAG_SSE2);
static void
diff -Nur liboil-0.3.9/liboil/sse/sad8x8_sse.c
liboil-0.3.9~/liboil/sse/sad8x8_sse.c
--- liboil-0.3.9/liboil/sse/sad8x8_sse.c 2005-12-23 22:46:25.000000000
+0100
+++ liboil-0.3.9~/liboil/sse/sad8x8_sse.c 2006-06-22 20:36:42.000000000
+0200
@@ -31,6 +31,44 @@
#include <liboil/liboilfunction.h>
#include <emmintrin.h>
+/* Work around non-aligned stack frames (which causes the intristics to crash
+ * by making sure the stack frame is always aligned
+ */
+#if defined(__i386__)
+#define OIL_SSE_WRAPPER(name,ret, ...) \
+ ret name(__VA_ARGS__) __attribute__((used)); \
+ ret name ## _wrap (__VA_ARGS__) { \
+ OIL_SSE_WRAPPER_CALL(name); \
+ }
+
+#define OIL_SSE_WRAPPER_CALL(name) \
+ asm volatile( \
+ "\n\t" \
+ "subl $0x18,%%esp\n\t" \
+ "andl $0xfffffff0,%%esp\n\t" \
+ \
+ "movdqu 8(%%ebp),%%xmm0\n\t" \
+ "movdqa %%xmm0,(%%esp)\n\t" \
+ "movl 0x18(%%ebp), %%ecx\n\t" \
+ "movl %%ecx, 0x10(%%esp)\n\t" \
+ \
+ "call " #name "\n\t" \
+ "movl %%ebp,%%esp\n\t" \
+ : : \
+ : "eax","ecx","edx","xmm0")
+
+#elif defined(__amd64__)
+
+/* Needed because we call *_wrap. Should get optimized away anyway */
+#define OIL_SSE_WRAPPER(name,ret, ...) \
+ ret name ## _wrap (__VA_ARGS__) { \
+ name(__VA_ARGS__); \
+ }
+
+#else
+#error Can't use sse on !i386 and !amd64
+#endif
+
union m128_int {
__m128i m128;
uint32_t i[4];
@@ -42,7 +80,7 @@
int sstr2)
{
int i;
- __m128i sum = _mm_setzero_si128();
+ __m128i sum __attribute__ ((aligned (16))) = _mm_setzero_si128();
union m128_int sumi;
for (i = 0; i < 4; i++) {
@@ -60,4 +98,7 @@
sumi.m128 = sum;
*dest = sumi.i[0] + sumi.i[2];
}
-OIL_DEFINE_IMPL_FULL (sad8x8_u8_sse, sad8x8_u8, OIL_IMPL_FLAG_SSE2);
+
+OIL_SSE_WRAPPER(sad8x8_u8_sse, static void,
+ uint32_t *dest, uint8_t *src1, int sstr1, uint8_t *src2, int sstr2)
+OIL_DEFINE_IMPL_FULL (sad8x8_u8_sse_wrap, sad8x8_u8, OIL_IMPL_FLAG_SSE2);
diff -Nur liboil-0.3.9/liboil-368991-sse-segv-fix.4.diff
liboil-0.3.9~/liboil-368991-sse-segv-fix.4.diff
--- liboil-0.3.9/liboil-368991-sse-segv-fix.4.diff 1970-01-01
01:00:00.000000000 +0100
+++ liboil-0.3.9~/liboil-368991-sse-segv-fix.4.diff 2006-06-22
20:34:44.000000000 +0200
@@ -0,0 +1,358 @@
+--- liboil-0.3.9.orig/liboil/sse/composite_sse_2pix.c 2005-12-21
02:27:54.000000000 +0100
++++ liboil-0.3.9/liboil/sse/composite_sse_2pix.c 2006-06-20
19:10:33.000000000 +0200
+@@ -32,6 +32,42 @@
+ #include <emmintrin.h>
+ #include <liboil/liboilcolorspace.h>
+
++/* Work around non-aligned stack frames (which causes the intristics to crash
++ * by making sure the stack frame is always aligned
++ */
++#if defined(__i386__)
++#define OIL_SSE_WRAPPER(name,ret, ...) \
++ ret name(__VA_ARGS__) __attribute__((used)); \
++ ret name ## _wrap (__VA_ARGS__) { \
++ OIL_SSE_WRAPPER_CALL(name); \
++ }
++
++#define OIL_SSE_WRAPPER_CALL(name) \
++ asm volatile( \
++ "\n\t" \
++ "subl $0x10,%%esp\n\t" \
++ "andl $0xfffffff0,%%esp\n\t" \
++ \
++ "movdqu 8(%%ebp),%%xmm0\n\t" \
++ "movdqa %%xmm0,(%%esp)\n\t" \
++ \
++ "call " #name "\n\t" \
++ "movl %%ebp,%%esp\n\t" \
++ : : \
++ : "eax","ecx","edx","xmm0")
++
++#elif defined(__amd64__)
++
++/* Needed because we call *_wrap. Should get optimized away anyway */
++#define OIL_SSE_WRAPPER(name,ret, ...) \
++ ret name ## _wrap (__VA_ARGS__) { \
++ name(__VA_ARGS__); \
++ }
++
++#else
++#error Can't use sse on !i386 and !amd64
++#endif
++
+ /* non-SSE2 compositing support */
+ #define COMPOSITE_OVER(d,s,m) ((d) + (s) - oil_muldiv_255((d),(m)))
+ #define COMPOSITE_ADD(d,s) oil_clamp_255((d) + (s))
+@@ -41,20 +77,12 @@
+ * the channel value in the low byte. This means 2 pixels per pass.
+ */
+
+-union m128_int {
+- __m128i m128;
+- uint64_t ull[2];
+-};
+-
+-static const struct _SSEData {
+- union m128_int sse_8x00ff;
+- union m128_int sse_8x0080;
+-} c = {
+- .sse_8x00ff.ull = {0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL},
+- .sse_8x0080.ull = {0x0080008000800080ULL, 0x0080008000800080ULL},
+-};
++static const __m128i c_sse_8x00ff =
++ {0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL};
++static const __m128i c_sse_8x0080 =
++ {0x0080008000800080ULL, 0x0080008000800080ULL};
+
+-#define MC(x) (c.sse_##x.m128)
++#define MC(x) (c_sse_##x)
+
+ /* Shuffles the given value such that the alpha for each pixel appears in each
+ * channel of the pixel.
+@@ -188,7 +216,11 @@
+ COMPOSITE_IN(oil_argb_B(*src), m));
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse_2pix,
++
++OIL_SSE_WRAPPER(composite_in_argb_const_src_sse_2pix, static void,
++ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
++
++OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse_2pix_wrap,
+ composite_in_argb_const_src, OIL_IMPL_FLAG_SSE2);
+
+ static void
+@@ -216,7 +248,10 @@
+ COMPOSITE_IN(oil_argb_B(s), mask[0]));
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse_2pix,
++OIL_SSE_WRAPPER(composite_in_argb_const_mask_sse_2pix, static void,
++ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
++
++OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse_2pix_wrap,
+ composite_in_argb_const_mask, OIL_IMPL_FLAG_SSE2);
+
+ static void
+@@ -272,7 +307,11 @@
+ *dest++ = d;
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse_2pix,
++
++OIL_SSE_WRAPPER(composite_over_argb_const_src_sse_2pix, static void,
++ uint32_t *dest, const uint32_t *src, int n)
++
++OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse_2pix_wrap,
+ composite_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
+
+ static void
+@@ -309,8 +348,12 @@
+ *dest++ = d;
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_sse_2pix, composite_in_over_argb,
+- OIL_IMPL_FLAG_SSE2);
++
++OIL_SSE_WRAPPER(composite_in_over_argb_sse_2pix , static void,
++ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
++
++OIL_DEFINE_IMPL_FULL (composite_in_over_argb_sse_2pix_wrap,
++ composite_in_over_argb, OIL_IMPL_FLAG_SSE2);
+
+ static void
+ composite_in_over_argb_const_src_sse_2pix (uint32_t *dest, const uint32_t
*src,
+@@ -348,7 +391,11 @@
+ *dest++ = d;
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse_2pix,
++
++OIL_SSE_WRAPPER(composite_in_over_argb_const_src_sse_2pix , static void,
++ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
++
++OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse_2pix_wrap,
+ composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
+
+ static void
+@@ -387,7 +434,11 @@
+ *dest++ = d;
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse_2pix,
++
++OIL_SSE_WRAPPER(composite_in_over_argb_const_mask_sse_2pix, static void,
++ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
++
++OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse_2pix_wrap,
+ composite_in_over_argb_const_mask, OIL_IMPL_FLAG_SSE2);
+
+ static void
+--- liboil-0.3.9.orig/liboil/sse/composite_sse_4pix.c 2005-12-21
02:27:54.000000000 +0100
++++ liboil-0.3.9/liboil/sse/composite_sse_4pix.c 2006-06-20
19:10:34.000000000 +0200
+@@ -32,20 +32,49 @@
+ #include <emmintrin.h>
+ #include <liboil/liboilcolorspace.h>
+
+-union m128_int {
+- __m128i m128;
+- uint64_t ull[2];
+-};
+-
+-static const struct _SSEData {
+- union m128_int sse_16xff;
+- union m128_int sse_8x0080;
+-} c = {
+- .sse_16xff.ull = {0xffffffffffffffffULL, 0xffffffffffffffffULL},
+- .sse_8x0080.ull = {0x0080008000800080ULL, 0x0080008000800080ULL},
+-};
++/* Work around non-aligned stack frames (which causes the intristics to crash
++ * by making sure the stack frame is always aligned
++ */
++#if defined(__i386__)
++#define OIL_SSE_WRAPPER(name,ret, ...) \
++ ret name(__VA_ARGS__) __attribute__((used)); \
++ ret name ## _wrap (__VA_ARGS__) { \
++ OIL_SSE_WRAPPER_CALL(name); \
++ }
++
++#define OIL_SSE_WRAPPER_CALL(name) \
++ asm volatile( \
++ "\n\t" \
++ "subl $0x10,%%esp\n\t" \
++ "andl $0xfffffff0,%%esp\n\t" \
++ \
++ "movdqu 8(%%ebp),%%xmm0\n\t" \
++ "movdqa %%xmm0,(%%esp)\n\t" \
++ \
++ "call " #name "\n\t" \
++ "movl %%ebp,%%esp\n\t" \
++ : : \
++ : "eax","ecx","edx","xmm0")
++
++#elif defined(__amd64__)
++
++/* Needed because we call *_wrap. Should get optimized away anyway */
++#define OIL_SSE_WRAPPER(name,ret, ...) \
++ ret name ## _wrap (__VA_ARGS__) { \
++ name(__VA_ARGS__); \
++ }
++
++#else
++#error Can't use sse on !i386 and !amd64
++#endif
++
+
+-#define MC(x) (c.sse_##x.m128)
++static const __m128i c_sse_16xff =
++ {0xffffffffffffffffULL, 0xffffffffffffffffULL};
++static const __m128i c_sse_8x0080 =
++ {0x0080008000800080ULL, 0x0080008000800080ULL};
++
++#define MC(x) (c_sse_##x)
+
+ /* non-SSE2 compositing support */
+ #define COMPOSITE_OVER(d,s,m) ((d) + (s) - oil_muldiv_255((d),(m)))
+@@ -193,7 +222,11 @@
+ COMPOSITE_IN(oil_argb_B(s), m));
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_in_argb_sse, composite_in_argb,
++
++OIL_SSE_WRAPPER(composite_in_argb_sse, static void,
++ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
++
++OIL_DEFINE_IMPL_FULL (composite_in_argb_sse_wrap, composite_in_argb,
+ OIL_IMPL_FLAG_SSE2);
+
+ static void
+@@ -230,7 +263,11 @@
+ COMPOSITE_IN(oil_argb_B(*src), m));
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse,
++
++OIL_SSE_WRAPPER(composite_in_argb_const_src_sse , static void,
++ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
++
++OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse_wrap,
+ composite_in_argb_const_src, OIL_IMPL_FLAG_SSE2);
+
+ static void
+@@ -267,7 +304,10 @@
+ COMPOSITE_IN(oil_argb_B(s), mask[0]));
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse,
++OIL_SSE_WRAPPER(composite_in_argb_const_mask_sse, static void,
++ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
++
++OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse_wrap,
+ composite_in_argb_const_mask, OIL_IMPL_FLAG_SSE2);
+
+ static void
+@@ -339,7 +379,11 @@
+ *dest++ = d;
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse,
++
++OIL_SSE_WRAPPER(composite_over_argb_const_src_sse, static void,
++ uint32_t *dest, const uint32_t *src, int n)
++
++OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse_wrap,
+ composite_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
+
+ static void
+@@ -447,9 +491,11 @@
+ *dest++ = d;
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse,
+- composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
++OIL_SSE_WRAPPER(composite_in_over_argb_const_src_sse , static void,
++ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
+
++OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse_wrap,
++ composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
+ static void
+ composite_in_over_argb_const_mask_sse (uint32_t *dest, const uint32_t *src,
+ const uint8_t *mask, int n)
+@@ -502,7 +548,11 @@
+ *dest++ = d;
+ }
+ }
+-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse,
++
++OIL_SSE_WRAPPER(composite_in_over_argb_const_mask_sse, static void,
++ uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n)
++
++OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse_wrap,
+ composite_in_over_argb_const_mask, OIL_IMPL_FLAG_SSE2);
+
+ static void
+--- liboil-0.3.9.orig/liboil/sse/sad8x8_sse.c 2005-12-23 22:46:25.000000000
+0100
++++ liboil-0.3.9/liboil/sse/sad8x8_sse.c 2006-06-20 19:10:32.000000000
+0200
+@@ -31,6 +31,44 @@
+ #include <liboil/liboilfunction.h>
+ #include <emmintrin.h>
+
++/* Work around non-aligned stack frames (which causes the intristics to crash
++ * by making sure the stack frame is always aligned
++ */
++#if defined(__i386__)
++#define OIL_SSE_WRAPPER(name,ret, ...) \
++ ret name(__VA_ARGS__) __attribute__((used)); \
++ ret name ## _wrap (__VA_ARGS__) { \
++ OIL_SSE_WRAPPER_CALL(name); \
++ }
++
++#define OIL_SSE_WRAPPER_CALL(name) \
++ asm volatile( \
++ "\n\t" \
++ "subl $0x18,%%esp\n\t" \
++ "andl $0xfffffff0,%%esp\n\t" \
++ \
++ "movdqu 8(%%ebp),%%xmm0\n\t" \
++ "movdqa %%xmm0,(%%esp)\n\t" \
++ "movl 0x18(%%ebp), %%ecx\n\t" \
++ "movl %%ecx, 0x10(%%esp)\n\t" \
++ \
++ "call " #name "\n\t" \
++ "movl %%ebp,%%esp\n\t" \
++ : : \
++ : "eax","ecx","edx","xmm0")
++
++#elif defined(__amd64__)
++
++/* Needed because we call *_wrap. Should get optimized away anyway */
++#define OIL_SSE_WRAPPER(name,ret, ...) \
++ ret name ## _wrap (__VA_ARGS__) { \
++ name(__VA_ARGS__); \
++ }
++
++#else
++#error Can't use sse on !i386 and !amd64
++#endif
++
+ union m128_int {
+ __m128i m128;
+ uint32_t i[4];
+@@ -42,7 +80,7 @@
+ int sstr2)
+ {
+ int i;
+- __m128i sum = _mm_setzero_si128();
++ __m128i sum __attribute__ ((aligned (16))) = _mm_setzero_si128();
+ union m128_int sumi;
+
+ for (i = 0; i < 4; i++) {
+@@ -60,4 +98,7 @@
+ sumi.m128 = sum;
+ *dest = sumi.i[0] + sumi.i[2];
+ }
+-OIL_DEFINE_IMPL_FULL (sad8x8_u8_sse, sad8x8_u8, OIL_IMPL_FLAG_SSE2);
++
++OIL_SSE_WRAPPER(sad8x8_u8_sse, static void,
++ uint32_t *dest, uint8_t *src1, int sstr1, uint8_t *src2, int sstr2)
++OIL_DEFINE_IMPL_FULL (sad8x8_u8_sse_wrap, sad8x8_u8, OIL_IMPL_FLAG_SSE2);
--
http://home.arcor.de/andreas-barth/
--
To UNSUBSCRIBE, email to [EMAIL PROTECTED]
with a subject of "unsubscribe". Trouble? Contact [EMAIL PROTECTED]