------- Comment #4 from dominiq at lps dot ens dot fr 2010-08-26 16:57 ------- Following a discussion on IRC I have tested the following patch that does not work with "gcc version 4.0.1 (Apple Inc. build 5493)" and without 'GCC_VERSION >= 4005'.
--- /opt/gcc/gcc-4.6-work/libcpp/lex.c 2010-08-25 00:53:19.000000000 +0200 +++ /opt/gcc/gcc-4.6-work/old-patches/lex.c 2010-08-23 18:46:46.000000000 +0200 @@ -264,17 +264,7 @@ search_line_acc_char (const uchar *s, co } } -/* Disable on Solaris 2/x86 until the following problems can be properly - autoconfed: - - The Solaris 8 assembler cannot assemble SSE2/SSE4.2 insns. - The Solaris 9 assembler cannot assemble SSE4.2 insns. - Before Solaris 9 Update 6, SSE insns cannot be executed. - The Solaris 10+ assembler tags objects with the instruction set - extensions used, so SSE4.2 executables cannot run on machines that - don't support that extension. */ - -#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__)) +#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) /* Replicated character data to be shared between implementations. Recall that outside of a context with vector support we can't @@ -410,7 +400,6 @@ search_line_sse2 (const uchar *s, const return (const uchar *)p + found; } -#ifdef HAVE_SSE4 /* A version of the fast scanner using SSE 4.2 vectorized string insns. */ static const uchar * @@ -465,11 +454,6 @@ search_line_sse42 (const uchar *s, const return s + index; } -#else -/* Work around out-dated assemblers without sse4 support. */ -#define search_line_sse42 search_line_sse2 -#endif - /* Check the CPU capabilities. */ #include "../gcc/config/i386/cpuid.h" @@ -512,7 +496,8 @@ init_vectorized_lexer (void) search_line_fast = impl; } -#elif defined(__GNUC__) && defined(__ALTIVEC__) +#elif GCC_VERSION >= 4005 && defined(__ALTIVEC__) +#include "altivec.h" /* A vection of the fast scanner using AltiVec vectorized byte compares. */ /* ??? Unfortunately, attribute(target("altivec")) is not yet supported, @@ -522,7 +507,7 @@ init_vectorized_lexer (void) static const uchar * search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) { - typedef __attribute__((altivec(vector))) unsigned char vc; + typedef __vector unsigned char vc; const vc repl_nl = { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', @@ -550,14 +535,14 @@ search_line_fast (const uchar *s, const /* Altivec loads automatically mask addresses with -16. This lets us issue the first load as early as possible. */ - data = __builtin_vec_ld(0, (const vc *)s); + data = vec_ld(0, (const vc *)s); /* Discard bytes before the beginning of the buffer. Do this by beginning with all ones and shifting in zeros according to the mis-alignment. The LVSR instruction pulls the exact shift we want from the address. */ - mask = __builtin_vec_lvsr(0, s); - mask = __builtin_vec_perm(zero, ones, mask); + mask = vec_lvsr(0, s); + mask = vec_perm(zero, ones, mask); data &= mask; /* While altivec loads mask addresses, we still need to align S so @@ -571,20 +556,20 @@ search_line_fast (const uchar *s, const vc m_nl, m_cr, m_bs, m_qm; s += 16; - data = __builtin_vec_ld(0, (const vc *)s); + data = vec_ld(0, (const vc *)s); start: - m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl); - m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr); - m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs); - m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm); + m_nl = (vc) vec_cmpeq(data, repl_nl); + m_cr = (vc) vec_cmpeq(data, repl_cr); + m_bs = (vc) vec_cmpeq(data, repl_bs); + m_qm = (vc) vec_cmpeq(data, repl_qm); t = (m_nl | m_cr) | (m_bs | m_qm); /* T now contains 0xff in bytes for which we matched one of the relevant characters. We want to exit the loop if any byte in T is non-zero. Below is the expansion of vec_any_ne(t, zero). */ } - while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero)); + while (!vec_any_ne(t, zero)); { #define N (sizeof(vc) / sizeof(long)) -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45381