On Mon, Jun 19, 2006 at 06:28:40PM +0200, Loïc Minier wrote: > From the gcc man page, -mpreferred-stack-boundary flag: > To ensure proper alignment of this values on the stack, the stack > boundary must be as aligned as that required by any value stored on > the stack. [...] > None of liboil, banshee, or even mono seem to be built with > -mpreferred-stack-boundary, yet I can imagine some of this software has > misaligned the stack. Is there a way to find out which and add stack > alignment code before external function calls?
After some discussion with Loïc on IRC, I've implemented something like this, it works for us both. So I'm pretty sure it works fine on i386. Where I'm sure it doesn't work is amd64, since I don't have access to an amd64 machine. Michael Ablassmeier reported it works there though. The idea is to add a little wrapper function around the sse functions to make sure the stack is aligned before they are called. Would be nice if gcc could do this, without its support some asm magic does work though. I've attached the patch. It would be nice if someone who knows amd64 assembly could review this, and add proper amd64 support via ifdefs if needed. Cheers, Christian Aichinger
--- liboil-0.3.9.orig/liboil/sse/composite_sse_2pix.c 2005-12-21 02:27:54.000000000 +0100 +++ liboil-0.3.9/liboil/sse/composite_sse_2pix.c 2006-06-20 16:12:12.000000000 +0200 @@ -32,6 +32,29 @@ #include <emmintrin.h> #include <liboil/liboilcolorspace.h> +/* Work around non-aligned stack frames (which causes the intristics to crash + * by making sure the stack frame is always aligned + */ +#define OIL_SSE_WRAPPER(name,ret, ...) \ + ret name(__VA_ARGS__) __attribute__((used)); \ + ret name ## _wrap (__VA_ARGS__) { \ + OIL_SSE_WRAPPER_CALL(name); \ + } + +#define OIL_SSE_WRAPPER_CALL(name) \ + asm volatile( \ + "\n\t" \ + "subl $0x10,%%esp\n\t" \ + "andl $0xfffffff0,%%esp\n\t" \ + \ + "movdqu 8(%%ebp),%%xmm0\n\t" \ + "movdqa %%xmm0,(%%esp)\n\t" \ + \ + "call " #name "\n\t" \ + "movl %%ebp,%%esp\n\t" \ + : : \ + : "eax","ecx","edx","xmm0") + /* non-SSE2 compositing support */ #define COMPOSITE_OVER(d,s,m) ((d) + (s) - oil_muldiv_255((d),(m))) #define COMPOSITE_ADD(d,s) oil_clamp_255((d) + (s)) @@ -41,20 +64,12 @@ * the channel value in the low byte. This means 2 pixels per pass. */ -union m128_int { - __m128i m128; - uint64_t ull[2]; -}; - -static const struct _SSEData { - union m128_int sse_8x00ff; - union m128_int sse_8x0080; -} c = { - .sse_8x00ff.ull = {0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL}, - .sse_8x0080.ull = {0x0080008000800080ULL, 0x0080008000800080ULL}, -}; +static const __m128i c_sse_8x00ff = + {0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL}; +static const __m128i c_sse_8x0080 = + {0x0080008000800080ULL, 0x0080008000800080ULL}; -#define MC(x) (c.sse_##x.m128) +#define MC(x) (c_sse_##x) /* Shuffles the given value such that the alpha for each pixel appears in each * channel of the pixel. @@ -188,7 +203,11 @@ COMPOSITE_IN(oil_argb_B(*src), m)); } } -OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse_2pix, + +OIL_SSE_WRAPPER(composite_in_argb_const_src_sse_2pix, static void, + uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) + +OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse_2pix_wrap, composite_in_argb_const_src, OIL_IMPL_FLAG_SSE2); static void @@ -216,7 +235,10 @@ COMPOSITE_IN(oil_argb_B(s), mask[0])); } } -OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse_2pix, +OIL_SSE_WRAPPER(composite_in_argb_const_mask_sse_2pix, static void, + uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) + +OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse_2pix_wrap, composite_in_argb_const_mask, OIL_IMPL_FLAG_SSE2); static void @@ -272,7 +294,11 @@ *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse_2pix, + +OIL_SSE_WRAPPER(composite_over_argb_const_src_sse_2pix, static void, + uint32_t *dest, const uint32_t *src, int n) + +OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse_2pix_wrap, composite_over_argb_const_src, OIL_IMPL_FLAG_SSE2); static void @@ -309,8 +335,12 @@ *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_in_over_argb_sse_2pix, composite_in_over_argb, - OIL_IMPL_FLAG_SSE2); + +OIL_SSE_WRAPPER(composite_in_over_argb_sse_2pix , static void, + uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) + +OIL_DEFINE_IMPL_FULL (composite_in_over_argb_sse_2pix_wrap, + composite_in_over_argb, OIL_IMPL_FLAG_SSE2); static void composite_in_over_argb_const_src_sse_2pix (uint32_t *dest, const uint32_t *src, @@ -348,7 +378,11 @@ *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse_2pix, + +OIL_SSE_WRAPPER(composite_in_over_argb_const_src_sse_2pix , static void, + uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) + +OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse_2pix_wrap, composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2); static void @@ -387,7 +421,11 @@ *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse_2pix, + +OIL_SSE_WRAPPER(composite_in_over_argb_const_mask_sse_2pix, static void, + uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) + +OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse_2pix_wrap, composite_in_over_argb_const_mask, OIL_IMPL_FLAG_SSE2); static void --- liboil-0.3.9.orig/liboil/sse/composite_sse_4pix.c 2005-12-21 02:27:54.000000000 +0100 +++ liboil-0.3.9/liboil/sse/composite_sse_4pix.c 2006-06-20 16:12:36.000000000 +0200 @@ -32,20 +32,36 @@ #include <emmintrin.h> #include <liboil/liboilcolorspace.h> -union m128_int { - __m128i m128; - uint64_t ull[2]; -}; - -static const struct _SSEData { - union m128_int sse_16xff; - union m128_int sse_8x0080; -} c = { - .sse_16xff.ull = {0xffffffffffffffffULL, 0xffffffffffffffffULL}, - .sse_8x0080.ull = {0x0080008000800080ULL, 0x0080008000800080ULL}, -}; +/* Work around non-aligned stack frames (which causes the intristics to crash + * by making sure the stack frame is always aligned + */ +#define OIL_SSE_WRAPPER(name,ret, ...) \ + ret name(__VA_ARGS__) __attribute__((used)); \ + ret name ## _wrap (__VA_ARGS__) { \ + OIL_SSE_WRAPPER_CALL(name); \ + } + +#define OIL_SSE_WRAPPER_CALL(name) \ + asm volatile( \ + "\n\t" \ + "subl $0x10,%%esp\n\t" \ + "andl $0xfffffff0,%%esp\n\t" \ + \ + "movdqu 8(%%ebp),%%xmm0\n\t" \ + "movdqa %%xmm0,(%%esp)\n\t" \ + \ + "call " #name "\n\t" \ + "movl %%ebp,%%esp\n\t" \ + : : \ + : "eax","ecx","edx","xmm0") + + +static const __m128i c_sse_16xff = + {0xffffffffffffffffULL, 0xffffffffffffffffULL}; +static const __m128i c_sse_8x0080 = + {0x0080008000800080ULL, 0x0080008000800080ULL}; -#define MC(x) (c.sse_##x.m128) +#define MC(x) (c_sse_##x) /* non-SSE2 compositing support */ #define COMPOSITE_OVER(d,s,m) ((d) + (s) - oil_muldiv_255((d),(m))) @@ -193,7 +209,11 @@ COMPOSITE_IN(oil_argb_B(s), m)); } } -OIL_DEFINE_IMPL_FULL (composite_in_argb_sse, composite_in_argb, + +OIL_SSE_WRAPPER(composite_in_argb_sse, static void, + uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) + +OIL_DEFINE_IMPL_FULL (composite_in_argb_sse_wrap, composite_in_argb, OIL_IMPL_FLAG_SSE2); static void @@ -230,7 +250,11 @@ COMPOSITE_IN(oil_argb_B(*src), m)); } } -OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse, + +OIL_SSE_WRAPPER(composite_in_argb_const_src_sse , static void, + uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) + +OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse_wrap, composite_in_argb_const_src, OIL_IMPL_FLAG_SSE2); static void @@ -267,7 +291,10 @@ COMPOSITE_IN(oil_argb_B(s), mask[0])); } } -OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse, +OIL_SSE_WRAPPER(composite_in_argb_const_mask_sse, static void, + uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) + +OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse_wrap, composite_in_argb_const_mask, OIL_IMPL_FLAG_SSE2); static void @@ -339,7 +366,11 @@ *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse, + +OIL_SSE_WRAPPER(composite_over_argb_const_src_sse, static void, + uint32_t *dest, const uint32_t *src, int n) + +OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse_wrap, composite_over_argb_const_src, OIL_IMPL_FLAG_SSE2); static void @@ -447,9 +478,11 @@ *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse, - composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2); +OIL_SSE_WRAPPER(composite_in_over_argb_const_src_sse , static void, + uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) +OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse_wrap, + composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2); static void composite_in_over_argb_const_mask_sse (uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) @@ -502,7 +535,11 @@ *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse, + +OIL_SSE_WRAPPER(composite_in_over_argb_const_mask_sse, static void, + uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) + +OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse_wrap, composite_in_over_argb_const_mask, OIL_IMPL_FLAG_SSE2); static void --- liboil-0.3.9.orig/liboil/sse/sad8x8_sse.c 2005-12-23 22:46:25.000000000 +0100 +++ liboil-0.3.9/liboil/sse/sad8x8_sse.c 2006-06-20 16:47:42.000000000 +0200 @@ -31,6 +31,31 @@ #include <liboil/liboilfunction.h> #include <emmintrin.h> +/* Work around non-aligned stack frames (which causes the intristics to crash + * by making sure the stack frame is always aligned + */ +#define OIL_SSE_WRAPPER(name,ret, ...) \ + ret name(__VA_ARGS__) __attribute__((used)); \ + ret name ## _wrap (__VA_ARGS__) { \ + OIL_SSE_WRAPPER_CALL(name); \ + } + +#define OIL_SSE_WRAPPER_CALL(name) \ + asm volatile( \ + "\n\t" \ + "subl $0x18,%%esp\n\t" \ + "andl $0xfffffff0,%%esp\n\t" \ + \ + "movdqu 8(%%ebp),%%xmm0\n\t" \ + "movdqa %%xmm0,(%%esp)\n\t" \ + "movl 0x18(%%ebp), %%ecx\n\t" \ + "movl %%ecx, 0x10(%%esp)\n\t" \ + \ + "call " #name "\n\t" \ + "movl %%ebp,%%esp\n\t" \ + : : \ + : "eax","ecx","edx","xmm0") + union m128_int { __m128i m128; uint32_t i[4]; @@ -42,7 +67,7 @@ int sstr2) { int i; - __m128i sum = _mm_setzero_si128(); + __m128i sum __attribute__ ((aligned (16))) = _mm_setzero_si128(); union m128_int sumi; for (i = 0; i < 4; i++) { @@ -60,4 +85,7 @@ sumi.m128 = sum; *dest = sumi.i[0] + sumi.i[2]; } -OIL_DEFINE_IMPL_FULL (sad8x8_u8_sse, sad8x8_u8, OIL_IMPL_FLAG_SSE2); + +OIL_SSE_WRAPPER(sad8x8_u8_sse, static void, + uint32_t *dest, uint8_t *src1, int sstr1, uint8_t *src2, int sstr2) +OIL_DEFINE_IMPL_FULL (sad8x8_u8_sse_wrap, sad8x8_u8, OIL_IMPL_FLAG_SSE2);
signature.asc
Description: Digital signature