------- Comment #4 from dominiq at lps dot ens dot fr  2010-08-26 16:57 -------
Following a discussion on IRC I have tested the following patch that does not
work with "gcc version 4.0.1 (Apple Inc. build 5493)" and without 'GCC_VERSION
>= 4005'. 

--- /opt/gcc/gcc-4.6-work/libcpp/lex.c  2010-08-25 00:53:19.000000000 +0200
+++ /opt/gcc/gcc-4.6-work/old-patches/lex.c     2010-08-23 18:46:46.000000000
+0200
@@ -264,17 +264,7 @@ search_line_acc_char (const uchar *s, co
     }
 }

-/* Disable on Solaris 2/x86 until the following problems can be properly
-   autoconfed:
-
-   The Solaris 8 assembler cannot assemble SSE2/SSE4.2 insns.
-   The Solaris 9 assembler cannot assemble SSE4.2 insns.
-   Before Solaris 9 Update 6, SSE insns cannot be executed.
-   The Solaris 10+ assembler tags objects with the instruction set
-   extensions used, so SSE4.2 executables cannot run on machines that
-   don't support that extension.  */
-
-#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) &&
!(defined(__sun__) && defined(__svr4__))
+#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__))

 /* Replicated character data to be shared between implementations.
    Recall that outside of a context with vector support we can't
@@ -410,7 +400,6 @@ search_line_sse2 (const uchar *s, const 
   return (const uchar *)p + found;
 }

-#ifdef HAVE_SSE4
 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */

 static const uchar *
@@ -465,11 +454,6 @@ search_line_sse42 (const uchar *s, const
   return s + index;
 }

-#else
-/* Work around out-dated assemblers without sse4 support.  */
-#define search_line_sse42 search_line_sse2
-#endif
-
 /* Check the CPU capabilities.  */

 #include "../gcc/config/i386/cpuid.h"
@@ -512,7 +496,8 @@ init_vectorized_lexer (void)
   search_line_fast = impl;
 }

-#elif defined(__GNUC__) && defined(__ALTIVEC__)
+#elif GCC_VERSION >= 4005 && defined(__ALTIVEC__)
+#include "altivec.h"

 /* A vection of the fast scanner using AltiVec vectorized byte compares.  */
 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
@@ -522,7 +507,7 @@ init_vectorized_lexer (void)
 static const uchar *
 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
 {
-  typedef __attribute__((altivec(vector))) unsigned char vc;
+  typedef __vector unsigned char vc;

   const vc repl_nl = {
     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', 
@@ -550,14 +535,14 @@ search_line_fast (const uchar *s, const 

   /* Altivec loads automatically mask addresses with -16.  This lets us
      issue the first load as early as possible.  */
-  data = __builtin_vec_ld(0, (const vc *)s);
+  data = vec_ld(0, (const vc *)s);

   /* Discard bytes before the beginning of the buffer.  Do this by
      beginning with all ones and shifting in zeros according to the
      mis-alignment.  The LVSR instruction pulls the exact shift we
      want from the address.  */
-  mask = __builtin_vec_lvsr(0, s);
-  mask = __builtin_vec_perm(zero, ones, mask);
+  mask = vec_lvsr(0, s);
+  mask = vec_perm(zero, ones, mask);
   data &= mask;

   /* While altivec loads mask addresses, we still need to align S so
@@ -571,20 +556,20 @@ search_line_fast (const uchar *s, const 
       vc m_nl, m_cr, m_bs, m_qm;

       s += 16;
-      data = __builtin_vec_ld(0, (const vc *)s);
+      data = vec_ld(0, (const vc *)s);

     start:
-      m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
-      m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
-      m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
-      m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
+      m_nl = (vc) vec_cmpeq(data, repl_nl);
+      m_cr = (vc) vec_cmpeq(data, repl_cr);
+      m_bs = (vc) vec_cmpeq(data, repl_bs);
+      m_qm = (vc) vec_cmpeq(data, repl_qm);
       t = (m_nl | m_cr) | (m_bs | m_qm);

       /* T now contains 0xff in bytes for which we matched one of the relevant
         characters.  We want to exit the loop if any byte in T is non-zero.
         Below is the expansion of vec_any_ne(t, zero).  */
     }
-  while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
+  while (!vec_any_ne(t, zero));

   {
 #define N  (sizeof(vc) / sizeof(long))


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45381

Reply via email to