On Tue, Apr 29, 2014 at 8:38 AM, Kenneth Graunke <kenn...@whitecape.org> wrote: > Apparently Clang needs an extra #include, and doesn't understand > __attribute__((flatten)). Marking the helper functions as > always_inline works just as well, and actually compiles with Clang. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/intel_tex_subimage.c | 17 ++++++----------- > 1 file changed, 6 insertions(+), 11 deletions(-) > > Seems to work with GCC 4.8 and Clang 3.4. The _mm_loadu_ps change seems > to be harmless enough, but I'm sure it'll break somebody's build somehow... > > diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c > b/src/mesa/drivers/dri/i965/intel_tex_subimage.c > index 891a46d..69c808f 100644 > --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c > +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c > @@ -26,6 +26,7 @@ > * > **************************************************************************/ > > +#include <xmmintrin.h>
This needs SSE to be included. We determined that all i965-paired CPUs had at least SSE2. We should probably just add -msse2 to i965's Makefile. > #include "main/bufferobj.h" > #include "main/image.h" > #include "main/macros.h" > @@ -179,7 +180,7 @@ typedef char v16 __attribute__((vector_size(16))); > /* NOTE: dst must be 16 byte aligned */ > #define rgba8_copy_16(dst, src) \ > *(v16*)(dst) = __builtin_ia32_pshufb128( \ > - (v16) __builtin_ia32_loadups((float*)(src)), \ > + (v16) _mm_loadu_ps((float*)(src)), \ While we're doing this, we should also replace __builtin_ia32_pshufb128 with _mm_shuffle_epi8. #include <tmmintrin.h>, guarded with #ifdef __SSSE3___ like the guards around this code. I'd probably split these suggestions into a different patch? Not sure. > *(v16*) rgba8_permutation \ > ) > #endif > @@ -233,7 +234,7 @@ rgba8_copy(void *dst, const void *src, size_t bytes) > * > * \copydoc tile_copy_fn > */ > -static inline void > +static __attribute__((always_inline)) inline void > xtile_copy(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, > uint32_t y0, uint32_t y1, > char *dst, const char *src, > @@ -274,7 +275,7 @@ xtile_copy(uint32_t x0, uint32_t x1, uint32_t x2, > uint32_t x3, > * > * \copydoc tile_copy_fn > */ > -static inline void > +static __attribute__((always_inline)) inline void > ytile_copy( > uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, > uint32_t y0, uint32_t y1, > @@ -331,12 +332,6 @@ ytile_copy( > } > } > > -#ifdef __GNUC__ > -#define FLATTEN __attribute__((flatten)) > -#else > -#define FLATTEN > -#endif > - > /** > * Copy texture data from linear to X tile layout, faster. > * > @@ -346,7 +341,7 @@ ytile_copy( > * > * \copydoc tile_copy_fn > */ > -static FLATTEN void > +static void > xtile_copy_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, > uint32_t y0, uint32_t y1, > char *dst, const char *src, > @@ -382,7 +377,7 @@ xtile_copy_faster(uint32_t x0, uint32_t x1, uint32_t x2, > uint32_t x3, > * > * \copydoc tile_copy_fn > */ > -static FLATTEN void > +static void > ytile_copy_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, > uint32_t y0, uint32_t y1, > char *dst, const char *src, > -- > 1.9.2 The attribute changes are Reviewed-by: Matt Turner <matts...@gmail.com> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev