On Tue, Jul 30, 2024 at 5:43 PM Andi Kleen <a...@linux.intel.com> wrote:
>
> From: Andi Kleen <a...@gcc.gnu.org>
>
> Host systems with only MMX and no SSE2 should be really rare now.
> Let's remove the MMX code path to keep the number of custom
> implementations the same.
>
> The SSE2 code path is also somewhat dubious now (nearly everything
> should have SSE4 4.2 which is >15 years old now), but the SSE2
> code path is used as fallback for others and also apparently
> Solaris uses it due to tool chain deficiencies.

OK if nobody objects this week.

Thanks,
Richard.

> libcpp/ChangeLog:
>
>         * lex.cc (search_line_mmx): Remove function.
>         (init_vectorized_lexer): Remove search_line_mmx.
> ---
>  libcpp/lex.cc | 75 ---------------------------------------------------
>  1 file changed, 75 deletions(-)
>
> diff --git a/libcpp/lex.cc b/libcpp/lex.cc
> index 16f2c23af1e1..1591dcdf151a 100644
> --- a/libcpp/lex.cc
> +++ b/libcpp/lex.cc
> @@ -290,71 +290,6 @@ static const char repl_chars[4][16] 
> __attribute__((aligned(16))) = {
>      '?', '?', '?', '?', '?', '?', '?', '?' },
>  };
>
> -/* A version of the fast scanner using MMX vectorized byte compare insns.
> -
> -   This uses the PMOVMSKB instruction which was introduced with "MMX2",
> -   which was packaged into SSE1; it is also present in the AMD MMX
> -   extension.  Mark the function as using "sse" so that we emit a real
> -   "emms" instruction, rather than the 3dNOW "femms" instruction.  */
> -
> -static const uchar *
> -#ifndef __SSE__
> -__attribute__((__target__("sse")))
> -#endif
> -search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
> -{
> -  typedef char v8qi __attribute__ ((__vector_size__ (8)));
> -  typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
> -
> -  const v8qi repl_nl = *(const v8qi *)repl_chars[0];
> -  const v8qi repl_cr = *(const v8qi *)repl_chars[1];
> -  const v8qi repl_bs = *(const v8qi *)repl_chars[2];
> -  const v8qi repl_qm = *(const v8qi *)repl_chars[3];
> -
> -  unsigned int misalign, found, mask;
> -  const v8qi *p;
> -  v8qi data, t, c;
> -
> -  /* Align the source pointer.  While MMX doesn't generate unaligned data
> -     faults, this allows us to safely scan to the end of the buffer without
> -     reading beyond the end of the last page.  */
> -  misalign = (uintptr_t)s & 7;
> -  p = (const v8qi *)((uintptr_t)s & -8);
> -  data = *p;
> -
> -  /* Create a mask for the bytes that are valid within the first
> -     16-byte block.  The Idea here is that the AND with the mask
> -     within the loop is "free", since we need some AND or TEST
> -     insn in order to set the flags for the branch anyway.  */
> -  mask = -1u << misalign;
> -
> -  /* Main loop processing 8 bytes at a time.  */
> -  goto start;
> -  do
> -    {
> -      data = *++p;
> -      mask = -1;
> -
> -    start:
> -      t = __builtin_ia32_pcmpeqb(data, repl_nl);
> -      c = __builtin_ia32_pcmpeqb(data, repl_cr);
> -      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
> -      c = __builtin_ia32_pcmpeqb(data, repl_bs);
> -      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
> -      c = __builtin_ia32_pcmpeqb(data, repl_qm);
> -      t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
> -      found = __builtin_ia32_pmovmskb (t);
> -      found &= mask;
> -    }
> -  while (!found);
> -
> -  __builtin_ia32_emms ();
> -
> -  /* FOUND contains 1 in bits for which we matched a relevant
> -     character.  Conversion to the byte index is trivial.  */
> -  found = __builtin_ctz(found);
> -  return (const uchar *)p + found;
> -}
>
>  /* A version of the fast scanner using SSE2 vectorized byte compare insns.  
> */
>
> @@ -509,8 +444,6 @@ init_vectorized_lexer (void)
>    minimum = 3;
>  #elif defined(__SSE2__)
>    minimum = 2;
> -#elif defined(__SSE__)
> -  minimum = 1;
>  #endif
>
>    if (minimum == 3)
> @@ -521,14 +454,6 @@ init_vectorized_lexer (void)
>          impl = search_line_sse42;
>        else if (minimum == 2 || (edx & bit_SSE2))
>         impl = search_line_sse2;
> -      else if (minimum == 1 || (edx & bit_SSE))
> -       impl = search_line_mmx;
> -    }
> -  else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
> -    {
> -      if (minimum == 1
> -         || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
> -       impl = search_line_mmx;
>      }
>
>    search_line_fast = impl;
> --
> 2.45.2
>

Reply via email to