MaskRay created this revision.
MaskRay added a reviewer: bkramer.
Herald added a subscriber: cfe-commits.

SSE2 vectorization was added in 2012, but it is 2018 now and I can't
observe any performance boost with the existing _mm_movemask_epi8 or the 
following SSE4.2 (compiling with -msse4.2):

  __m128i C = _mm_setr_epi8('\r','\n',0,0,0,0,0,0,0,0,0,0,0,0,0,0);
  _mm_cmpestri(C, 2, Chunk, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | 
_SIDD_POSITIVE_POLARITY | _SIDD_LEAST_SIGNIFICANT)

Delete the vectorization to simplify the code.

Also don't check the line ending sequence \n\r


Repository:
  rC Clang

https://reviews.llvm.org/D55484

Files:
  lib/Basic/SourceManager.cpp


Index: lib/Basic/SourceManager.cpp
===================================================================
--- lib/Basic/SourceManager.cpp
+++ lib/Basic/SourceManager.cpp
@@ -1216,65 +1216,22 @@
 
   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
-  unsigned Offs = 0;
+  unsigned I = 0;
   while (true) {
     // Skip over the contents of the line.
-    const unsigned char *NextBuf = (const unsigned char *)Buf;
-
-#ifdef __SSE2__
-    // Try to skip to the next newline using SSE instructions. This is very
-    // performance sensitive for programs with lots of diagnostics and in -E
-    // mode.
-    __m128i CRs = _mm_set1_epi8('\r');
-    __m128i LFs = _mm_set1_epi8('\n');
-
-    // First fix up the alignment to 16 bytes.
-    while (((uintptr_t)NextBuf & 0xF) != 0) {
-      if (*NextBuf == '\n' || *NextBuf == '\r' || *NextBuf == '\0')
-        goto FoundSpecialChar;
-      ++NextBuf;
-    }
-
-    // Scan 16 byte chunks for '\r' and '\n'. Ignore '\0'.
-    while (NextBuf+16 <= End) {
-      const __m128i Chunk = *(const __m128i*)NextBuf;
-      __m128i Cmp = _mm_or_si128(_mm_cmpeq_epi8(Chunk, CRs),
-                                 _mm_cmpeq_epi8(Chunk, LFs));
-      unsigned Mask = _mm_movemask_epi8(Cmp);
-
-      // If we found a newline, adjust the pointer and jump to the handling 
code.
-      if (Mask != 0) {
-        NextBuf += llvm::countTrailingZeros(Mask);
-        goto FoundSpecialChar;
-      }
-      NextBuf += 16;
-    }
-#endif
-
-    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
-      ++NextBuf;
-
-#ifdef __SSE2__
-FoundSpecialChar:
-#endif
-    Offs += NextBuf-Buf;
-    Buf = NextBuf;
-
-    if (Buf[0] == '\n' || Buf[0] == '\r') {
-      // If this is \n\r or \r\n, skip both characters.
-      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) {
-        ++Offs;
-        ++Buf;
-      }
-      ++Offs;
-      ++Buf;
-      LineOffsets.push_back(Offs);
+    while (Buf[I] != '\n' && Buf[I] != '\r' && Buf[I] != '\0')
+      ++I;
+
+    if (Buf[I] == '\n' || Buf[I] == '\r') {
+      // If this is \r\n, skip both characters.
+      if (Buf[I] == '\r' && Buf[I+1] == '\n')
+        ++I;
+      ++I;
+      LineOffsets.push_back(I);
     } else {
-      // Otherwise, this is a null.  If end of file, exit.
-      if (Buf == End) break;
-      // Otherwise, skip the null.
-      ++Offs;
-      ++Buf;
+      // Otherwise, this is a NUL. If end of file, exit.
+      if (Buf+I == End) break;
+      ++I;
     }
   }
 


Index: lib/Basic/SourceManager.cpp
===================================================================
--- lib/Basic/SourceManager.cpp
+++ lib/Basic/SourceManager.cpp
@@ -1216,65 +1216,22 @@
 
   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
-  unsigned Offs = 0;
+  unsigned I = 0;
   while (true) {
     // Skip over the contents of the line.
-    const unsigned char *NextBuf = (const unsigned char *)Buf;
-
-#ifdef __SSE2__
-    // Try to skip to the next newline using SSE instructions. This is very
-    // performance sensitive for programs with lots of diagnostics and in -E
-    // mode.
-    __m128i CRs = _mm_set1_epi8('\r');
-    __m128i LFs = _mm_set1_epi8('\n');
-
-    // First fix up the alignment to 16 bytes.
-    while (((uintptr_t)NextBuf & 0xF) != 0) {
-      if (*NextBuf == '\n' || *NextBuf == '\r' || *NextBuf == '\0')
-        goto FoundSpecialChar;
-      ++NextBuf;
-    }
-
-    // Scan 16 byte chunks for '\r' and '\n'. Ignore '\0'.
-    while (NextBuf+16 <= End) {
-      const __m128i Chunk = *(const __m128i*)NextBuf;
-      __m128i Cmp = _mm_or_si128(_mm_cmpeq_epi8(Chunk, CRs),
-                                 _mm_cmpeq_epi8(Chunk, LFs));
-      unsigned Mask = _mm_movemask_epi8(Cmp);
-
-      // If we found a newline, adjust the pointer and jump to the handling code.
-      if (Mask != 0) {
-        NextBuf += llvm::countTrailingZeros(Mask);
-        goto FoundSpecialChar;
-      }
-      NextBuf += 16;
-    }
-#endif
-
-    while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
-      ++NextBuf;
-
-#ifdef __SSE2__
-FoundSpecialChar:
-#endif
-    Offs += NextBuf-Buf;
-    Buf = NextBuf;
-
-    if (Buf[0] == '\n' || Buf[0] == '\r') {
-      // If this is \n\r or \r\n, skip both characters.
-      if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) {
-        ++Offs;
-        ++Buf;
-      }
-      ++Offs;
-      ++Buf;
-      LineOffsets.push_back(Offs);
+    while (Buf[I] != '\n' && Buf[I] != '\r' && Buf[I] != '\0')
+      ++I;
+
+    if (Buf[I] == '\n' || Buf[I] == '\r') {
+      // If this is \r\n, skip both characters.
+      if (Buf[I] == '\r' && Buf[I+1] == '\n')
+        ++I;
+      ++I;
+      LineOffsets.push_back(I);
     } else {
-      // Otherwise, this is a null.  If end of file, exit.
-      if (Buf == End) break;
-      // Otherwise, skip the null.
-      ++Offs;
-      ++Buf;
+      // Otherwise, this is a NUL. If end of file, exit.
+      if (Buf+I == End) break;
+      ++I;
     }
   }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to