tianqing created this revision.
Herald added subscribers: dexonsmith, dang, pengfei, hiraditya, mgorny.
tianqing requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

d8faf03807ac 
<https://reviews.llvm.org/rGd8faf03807ac059f669ddea8742dd540e58e45be> 
implemented general-regs-only for X86 by disabling all features
with vector instructions. But the CRC32 instruction in SSE4.2 ISA, which uses
only GPRs, also becomes unavailable. This patch adds a CRC32 feature for this
instruction and allows it to be used with general-regs-only.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D105462

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Basic/BuiltinsX86.def
  clang/include/clang/Basic/BuiltinsX86_64.def
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/crc32intrin.h
  clang/lib/Headers/ia32intrin.h
  clang/lib/Headers/immintrin.h
  clang/lib/Headers/smmintrin.h
  clang/test/CodeGen/X86/x86-crc-builtins.c
  clang/test/CodeGen/attr-cpuspecific.c
  clang/test/CodeGen/attr-target-crc32-x86.c
  clang/test/CodeGen/attr-target-x86.c
  clang/test/Driver/x86-mgeneral-regs-only-crc32.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFormats.td
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSSE.td
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
  llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
  llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
  llvm/test/CodeGen/X86/crc32-target-feature.ll
  llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel-x86_64.ll
  llvm/test/CodeGen/X86/sse42-intrinsics-x86_64.ll

Index: llvm/test/CodeGen/X86/crc32-target-feature.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/crc32-target-feature.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define i32 @test1(i32 %a, i8 %b) nounwind #0 {
+; CHECK-LABEL: test1:
+; CHECK:         crc32b
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+
+define i32 @test2(i32 %a, i8 %b) nounwind #1 {
+; CHECK-LABEL: test2:
+; CHECK:         crc32b
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+
+define i32 @test3(i32 %a, i8 %b) nounwind #2 {
+; CHECK-LABEL: test3:
+; CHECK:         crc32b
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+
+define i32 @test4(i32 %a, i8 %b) nounwind #3 {
+; CHECK-LABEL: test4:
+; CHECK:         crc32b
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+
+define i32 @test5(i32 %a, i8 %b) nounwind #4 {
+; CHECK-LABEL: test5:
+; CHECK:         crc32b
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+
+attributes #0 = { "target-features"="+sse,+sse2,+sse4.2" }
+attributes #1 = { "target-features"="+crc32" }
+attributes #2 = { "target-features"="+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop,+crc32" }
+attributes #3 = { "target-features"="+crc32,+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" }
+attributes #4 = { "target-features"="+avx2" }
Index: llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
===================================================================
--- llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
+++ llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
@@ -1,4 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse4.2 -show-mc-encoding | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s
@@ -25,4 +26,3 @@
   %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
   ret i64 %tmp
 }
-
Index: llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -show-mc-encoding | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=X64
+
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+; X86-LABEL: crc32_32_8:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: crc32_32_8:
+; X64:       ## %bb.0:
+; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+; X86-LABEL: crc32_32_16:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: crc32_32_16:
+; X64:       ## %bb.0:
+; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+; X86-LABEL: crc32_32_32:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: crc32_32_32:
+; X64:       ## %bb.0:
+; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; X64-NEXT:    crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
Index: llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
===================================================================
--- llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
+++ llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
@@ -1,4 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s
 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s
Index: llvm/lib/Target/X86/X86Subtarget.h
===================================================================
--- llvm/lib/Target/X86/X86Subtarget.h
+++ llvm/lib/Target/X86/X86Subtarget.h
@@ -425,6 +425,10 @@
   /// Processor supports User Level Interrupt instructions
   bool HasUINTR = false;
 
+  /// Enable SSE4.2 CRC32 instruction (Used when SSE4.2 is supported but
+  /// function is GPR only)
+  bool HasCRC32 = false;
+
   /// Processor has a single uop BEXTR implementation.
   bool HasFastBEXTR = false;
 
@@ -763,6 +767,7 @@
   bool hasSERIALIZE() const { return HasSERIALIZE; }
   bool hasTSXLDTRK() const { return HasTSXLDTRK; }
   bool hasUINTR() const { return HasUINTR; }
+  bool hasCRC32() const { return HasCRC32; }
   bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
   bool useRetpolineIndirectBranches() const {
     return UseRetpolineIndirectBranches;
Index: llvm/lib/Target/X86/X86InstrSSE.td
===================================================================
--- llvm/lib/Target/X86/X86InstrSSE.td
+++ llvm/lib/Target/X86/X86InstrSSE.td
@@ -6572,14 +6572,14 @@
 // of r and m.
 class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
                    RegisterClass RCIn, SDPatternOperator Int> :
-  SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
+  CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
          !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
          [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>,
          Sched<[WriteCRC32]>;
 
 class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
                    X86MemOperand x86memop, SDPatternOperator Int> :
-  SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
+  CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
          !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
          [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
          Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
Index: llvm/lib/Target/X86/X86InstrInfo.td
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -993,6 +993,7 @@
 def HasAMXBF16   : Predicate<"Subtarget->hasAMXBF16()">;
 def HasAMXINT8   : Predicate<"Subtarget->hasAMXINT8()">;
 def HasUINTR     : Predicate<"Subtarget->hasUINTR()">;
+def HasCRC32     : Predicate<"Subtarget->hasCRC32()">;
 def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
                              AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">;
 def In64BitMode  : Predicate<"Subtarget->is64Bit()">,
Index: llvm/lib/Target/X86/X86InstrFormats.td
===================================================================
--- llvm/lib/Target/X86/X86InstrFormats.td
+++ llvm/lib/Target/X86/X86InstrFormats.td
@@ -738,18 +738,19 @@
       : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
         Requires<[UseSSE42]>;
 
-//   SS42FI - SSE 4.2 instructions with T8XD prefix.
-// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
-class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
-
 //   SS42AI = SSE 4.2 instructions with TA prefix
 class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
              list<dag> pattern>
       : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
         Requires<[UseSSE42]>;
 
+//   CRC32I - SSE 4.2 CRC32 instructions.
+// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly
+// controlled by the SSE42 flag.
+class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasCRC32]>;
+
 // AVX Instruction Templates:
 //   Instructions introduced in AVX (no SSE equivalent forms)
 //
Index: llvm/lib/Target/X86/X86.td
===================================================================
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -77,9 +77,11 @@
 def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
                                       "Enable SSE 4.1 instructions",
                                       [FeatureSSSE3]>;
+def FeatureCRC32   : SubtargetFeature<"crc32", "HasCRC32", "true",
+                                      "Enable SSE 4.2 CRC32 instruction">;
 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
                                       "Enable SSE 4.2 instructions",
-                                      [FeatureSSE41]>;
+                                      [FeatureSSE41, FeatureCRC32]>;
 // The MMX subtarget feature is separate from the rest of the SSE features
 // because it's important (for odd compatibility reasons) to be able to
 // turn it off explicitly while allowing SSE+ to be on.
Index: llvm/lib/Support/X86TargetParser.cpp
===================================================================
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -470,6 +470,7 @@
 constexpr FeatureBitset ImpliedFeaturesCMOV = {};
 constexpr FeatureBitset ImpliedFeaturesCMPXCHG16B = {};
 constexpr FeatureBitset ImpliedFeaturesCMPXCHG8B = {};
+constexpr FeatureBitset ImpliedFeaturesCRC32 = {};
 constexpr FeatureBitset ImpliedFeaturesENQCMD = {};
 constexpr FeatureBitset ImpliedFeaturesFSGSBASE = {};
 constexpr FeatureBitset ImpliedFeaturesFXSR = {};
@@ -527,7 +528,7 @@
 constexpr FeatureBitset ImpliedFeaturesSSE3 = FeatureSSE2;
 constexpr FeatureBitset ImpliedFeaturesSSSE3 = FeatureSSE3;
 constexpr FeatureBitset ImpliedFeaturesSSE4_1 = FeatureSSSE3;
-constexpr FeatureBitset ImpliedFeaturesSSE4_2 = FeatureSSE4_1;
+constexpr FeatureBitset ImpliedFeaturesSSE4_2 = FeatureSSE4_1 | FeatureCRC32;
 constexpr FeatureBitset ImpliedFeaturesAVX = FeatureSSE4_2;
 constexpr FeatureBitset ImpliedFeaturesAVX2 = FeatureAVX;
 constexpr FeatureBitset ImpliedFeaturesAVX512F =
Index: llvm/lib/Support/Host.cpp
===================================================================
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -1035,8 +1035,10 @@
     setFeature(X86::FEATURE_FMA);
   if ((ECX >> 19) & 1)
     setFeature(X86::FEATURE_SSE4_1);
-  if ((ECX >> 20) & 1)
+  if ((ECX >> 20) & 1) {
     setFeature(X86::FEATURE_SSE4_2);
+    setFeature(X86::FEATURE_CRC32);
+  }
   if ((ECX >> 23) & 1)
     setFeature(X86::FEATURE_POPCNT);
   if ((ECX >> 25) & 1)
@@ -1482,6 +1484,7 @@
   Features["cx16"]   = (ECX >> 13) & 1;
   Features["sse4.1"] = (ECX >> 19) & 1;
   Features["sse4.2"] = (ECX >> 20) & 1;
+  Features["crc32"]  = Features["sse4.2"];
   Features["movbe"]  = (ECX >> 22) & 1;
   Features["popcnt"] = (ECX >> 23) & 1;
   Features["aes"]    = (ECX >> 25) & 1;
Index: llvm/include/llvm/Support/X86TargetParser.def
===================================================================
--- llvm/include/llvm/Support/X86TargetParser.def
+++ llvm/include/llvm/Support/X86TargetParser.def
@@ -153,6 +153,7 @@
 X86_FEATURE       (CLZERO,          "clzero")
 X86_FEATURE       (CMPXCHG16B,      "cx16")
 X86_FEATURE       (CMPXCHG8B,       "cx8")
+X86_FEATURE       (CRC32,           "crc32")
 X86_FEATURE       (ENQCMD,          "enqcmd")
 X86_FEATURE       (F16C,            "f16c")
 X86_FEATURE       (FSGSBASE,        "fsgsbase")
Index: clang/test/Preprocessor/x86_target_features.c
===================================================================
--- clang/test/Preprocessor/x86_target_features.c
+++ clang/test/Preprocessor/x86_target_features.c
@@ -558,3 +558,11 @@
 
 // AVXVNNINOAVX2-NOT: #define __AVX2__ 1
 // AVXVNNINOAVX2-NOT: #define __AVXVNNI__ 1
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
+
+// CRC32: #define __CRC32__ 1
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 -x c -E -dM -o - %s | FileCheck -check-prefix=NOCRC32 %s
+
+// NOCRC32-NOT: #define __CRC32__ 1
Index: clang/test/Driver/x86-target-features.c
===================================================================
--- clang/test/Driver/x86-target-features.c
+++ clang/test/Driver/x86-target-features.c
@@ -293,3 +293,8 @@
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avxvnni %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-AVX-VNNI %s
 // AVX-VNNI: "-target-feature" "+avxvnni"
 // NO-AVX-VNNI: "-target-feature" "-avxvnni"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CRC32 %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
+// CRC32: "-target-feature" "+crc32"
+// NO-CRC32: "-target-feature" "-crc32"
Index: clang/test/Driver/x86-mgeneral-regs-only-crc32.c
===================================================================
--- /dev/null
+++ clang/test/Driver/x86-mgeneral-regs-only-crc32.c
@@ -0,0 +1,50 @@
+// Test the -mgeneral-regs-only with -mcrc32 option on x86
+
+// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefix=CMD %s
+// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mavx2 -mgeneral-regs-only %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
+// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-BEFORE %s
+// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
+// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 -mcrc32 %s -### 2>&1 | FileCheck --check-prefixes=CMD,CMD-AFTER %s
+
+// RUN: not %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
+// RUN: not %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
+// RUN: not %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
+// RUN: not %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
+// RUN: not %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
+// RUN: not %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mno-crc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=ERROR %s
+// RUN: %clang -target i386-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -msse4.2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
+// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
+// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mcrc32 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
+// RUN: %clang -target i386-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -mavx2 -mgeneral-regs-only -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-GPR %s
+// RUN: %clang -target i386-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
+// RUN: %clang -target i386-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -mcrc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
+// RUN: %clang -target i386-unknown-linux-gnu -mno-crc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
+// RUN: %clang -target x86_64-unknown-linux-gnu -mno-crc32 -mgeneral-regs-only -mavx2 -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefix=IR-AVX2 %s
+
+// CMD-BEFORE: "-target-feature" "+crc32"
+// CMD: "-target-feature" "-x87"
+// CMD: "-target-feature" "-mmx"
+// CMD: "-target-feature" "-sse"
+// CMD-AFTER: "-target-feature" "+crc32"
+
+unsigned int test__crc32b(unsigned int CRC, unsigned char V) {
+// CHECK-LABEL: test__crc32b
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
+  return __builtin_ia32_crc32qi(CRC, V);
+}
+
+// ERROR: error: '__builtin_ia32_crc32qi' needs target feature crc32
+
+// IR-GPR: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}"
+// IR-AVX2: attributes {{.*}} = { {{.*}} "target-features"="{{.*}}+avx{{.*}}+avx2{{.*}}+crc32{{.*}}+sse{{.*}}+sse2{{.*}}+ssse3{{.*}}-avx512f{{.*}}-x87{{.*}}"
Index: clang/test/CodeGen/attr-target-x86.c
===================================================================
--- clang/test/CodeGen/attr-target-x86.c
+++ clang/test/CodeGen/attr-target-x86.c
@@ -52,12 +52,12 @@
 // CHECK: use_before_def{{.*}} #7
 // CHECK: walrus{{.*}} #8
 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686"
-// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
+// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
 // CHECK-NOT: tune-cpu
 // CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
-// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
+// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
 // CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
-// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
+// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
 // CHECK-NOT: tune-cpu
 // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
 // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
@@ -65,8 +65,8 @@
 // CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="sandybridge"
 
 // CHECK: "target-cpu"="x86-64-v2"
-// CHECK-SAME: "target-features"="+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
+// CHECK-SAME: "target-features"="+crc32,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
 // CHECK: "target-cpu"="x86-64-v3"
-// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
 // CHECK: "target-cpu"="x86-64-v4"
-// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
Index: clang/test/CodeGen/attr-target-crc32-x86.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/attr-target-crc32-x86.c
@@ -0,0 +1,38 @@
+// Test crc32 target attribute on x86
+
+// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: define{{.*}} i32 @test1({{.*}}) [[TEST1_ATTRS:#[0-9]+]]
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
+unsigned int __attribute__((target("crc32"))) test1(unsigned int CRC, unsigned char V) {
+  return __builtin_ia32_crc32qi(CRC, V);
+}
+
+// CHECK: define{{.*}} i32 @test2({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]]
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
+unsigned int __attribute__((target("general-regs-only,crc32"))) test2(unsigned int CRC, unsigned char V) {
+  return __builtin_ia32_crc32qi(CRC, V);
+}
+
+// CHECK: define{{.*}} i32 @test3({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]]
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
+unsigned int __attribute__((target("crc32,general-regs-only"))) test3(unsigned int CRC, unsigned char V) {
+  return __builtin_ia32_crc32qi(CRC, V);
+}
+
+// CHECK: define{{.*}} i32 @test4({{.*}}) [[TEST4_ATTRS:#[0-9]+]]
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
+unsigned int __attribute__((target("sse4.2"))) test4(unsigned int CRC, unsigned char V) {
+  return __builtin_ia32_crc32qi(CRC, V);
+}
+
+// CHECK: define{{.*}} i32 @test5({{.*}}) [[GPR_ONLY_ATTRS:#[0-9]+]]
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
+unsigned int __attribute__((target("sse4.2,general-regs-only"))) test5(unsigned int CRC, unsigned char V) {
+  return __builtin_ia32_crc32qi(CRC, V);
+}
+
+// CHECK: attributes [[TEST1_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}"
+// CHECK: attributes [[GPR_ONLY_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}-avx{{.*}}-avx2{{.*}}-avx512f{{.*}}-sse{{.*}}-sse2{{.*}}-ssse3{{.*}}-x87{{.*}}"
+// CHECK: attributes [[TEST4_ATTRS]] = { {{.*}} "target-features"="{{.*}}+crc32{{.*}}+sse4.2{{.*}}"
Index: clang/test/CodeGen/attr-cpuspecific.c
===================================================================
--- clang/test/CodeGen/attr-cpuspecific.c
+++ clang/test/CodeGen/attr-cpuspecific.c
@@ -270,6 +270,6 @@
 // WINDOWS: define dso_local i32 @DispatchFirst.B
 // WINDOWS: ret i32 1
 
-// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
-// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
 // CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87"
Index: clang/test/CodeGen/X86/x86-crc-builtins.c
===================================================================
--- clang/test/CodeGen/X86/x86-crc-builtins.c
+++ clang/test/CodeGen/X86/x86-crc-builtins.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK64
 // RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +crc32 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK64
+// RUN: %clang_cc1 -ffreestanding %s -triple=i686-apple-darwin -target-feature +crc32 -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <x86intrin.h>
 
@@ -28,3 +30,29 @@
   return __crc32q(CRC, V);
 }
 #endif
+
+unsigned int test_mm_crc32_u8(unsigned int CRC, unsigned char V) {
+// CHECK-LABEL: test_mm_crc32_u8
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.8(i32 %{{.*}}, i8 %{{.*}})
+  return _mm_crc32_u8(CRC, V);
+}
+
+unsigned int test_mm_crc32_u16(unsigned int CRC, unsigned short V) {
+// CHECK-LABEL: test_mm_crc32_u16
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.16(i32 %{{.*}}, i16 %{{.*}})
+  return _mm_crc32_u16(CRC, V);
+}
+
+unsigned int test_mm_crc32_u32(unsigned int CRC, unsigned int V) {
+// CHECK-LABEL: test_mm_crc32_u32
+// CHECK: call i32 @llvm.x86.sse42.crc32.32.32(i32 %{{.*}}, i32 %{{.*}})
+  return _mm_crc32_u32(CRC, V);
+}
+
+#ifdef __x86_64__
+unsigned long long test_mm_crc32_u64(unsigned long long CRC, unsigned long long V) {
+// CHECK64-LABEL: test_mm_crc32_u64
+// CHECK64: call i64 @llvm.x86.sse42.crc32.64.64(i64 %{{.*}}, i64 %{{.*}})
+  return _mm_crc32_u64(CRC, V);
+}
+#endif
Index: clang/lib/Headers/smmintrin.h
===================================================================
--- clang/lib/Headers/smmintrin.h
+++ clang/lib/Headers/smmintrin.h
@@ -2340,91 +2340,10 @@
   return (__m128i)((__v2di)__V1 > (__v2di)__V2);
 }
 
-/* SSE4.2 Accumulate CRC32.  */
-/// Adds the unsigned integer operand to the CRC-32C checksum of the
-///    unsigned char operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> CRC32B </c> instruction.
-///
-/// \param __C
-///    An unsigned integer operand to add to the CRC-32C checksum of operand
-///    \a  __D.
-/// \param __D
-///    An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
-/// \returns The result of adding operand \a __C to the CRC-32C checksum of
-///    operand \a __D.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_mm_crc32_u8(unsigned int __C, unsigned char __D)
-{
-  return __builtin_ia32_crc32qi(__C, __D);
-}
-
-/// Adds the unsigned integer operand to the CRC-32C checksum of the
-///    unsigned short operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> CRC32W </c> instruction.
-///
-/// \param __C
-///    An unsigned integer operand to add to the CRC-32C checksum of operand
-///    \a __D.
-/// \param __D
-///    An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
-/// \returns The result of adding operand \a __C to the CRC-32C checksum of
-///    operand \a __D.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_mm_crc32_u16(unsigned int __C, unsigned short __D)
-{
-  return __builtin_ia32_crc32hi(__C, __D);
-}
-
-/// Adds the first unsigned integer operand to the CRC-32C checksum of
-///    the second unsigned integer operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> CRC32L </c> instruction.
-///
-/// \param __C
-///    An unsigned integer operand to add to the CRC-32C checksum of operand
-///    \a __D.
-/// \param __D
-///    An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
-/// \returns The result of adding operand \a __C to the CRC-32C checksum of
-///    operand \a __D.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_mm_crc32_u32(unsigned int __C, unsigned int __D)
-{
-  return __builtin_ia32_crc32si(__C, __D);
-}
-
-#ifdef __x86_64__
-/// Adds the unsigned integer operand to the CRC-32C checksum of the
-///    unsigned 64-bit integer operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.
-///
-/// \param __C
-///    An unsigned integer operand to add to the CRC-32C checksum of operand
-///    \a __D.
-/// \param __D
-///    An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
-/// \returns The result of adding operand \a __C to the CRC-32C checksum of
-///    operand \a __D.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-_mm_crc32_u64(unsigned long long __C, unsigned long long __D)
-{
-  return __builtin_ia32_crc32di(__C, __D);
-}
-#endif /* __x86_64__ */
-
 #undef __DEFAULT_FN_ATTRS
 
 #include <popcntintrin.h>
 
+#include <crc32intrin.h>
+
 #endif /* __SMMINTRIN_H */
Index: clang/lib/Headers/immintrin.h
===================================================================
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -513,6 +513,11 @@
 #include <tsxldtrkintrin.h>
 #endif
 
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
+    defined(__CRC32__)
+#include <crc32intrin.h>
+#endif
+
 #if defined(_MSC_VER) && __has_extension(gnu_asm)
 /* Define the default attributes for these intrinsics */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
Index: clang/lib/Headers/ia32intrin.h
===================================================================
--- clang/lib/Headers/ia32intrin.h
+++ clang/lib/Headers/ia32intrin.h
@@ -17,6 +17,7 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
 #define __DEFAULT_FN_ATTRS_SSE42 __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
+#define __DEFAULT_FN_ATTRS_CRC32 __attribute__((__always_inline__, __nodebug__, __target__("crc32")))
 
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) constexpr
@@ -282,7 +283,7 @@
  *  \returns The result of adding operand \a __C to the CRC-32C checksum of
  *     operand \a __D.
  */
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
+static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
 __crc32b(unsigned int __C, unsigned char __D)
 {
   return __builtin_ia32_crc32qi(__C, __D);
@@ -303,7 +304,7 @@
  *  \returns The result of adding operand \a __C to the CRC-32C checksum of
  *     operand \a __D.
  */
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
+static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
 __crc32w(unsigned int __C, unsigned short __D)
 {
   return __builtin_ia32_crc32hi(__C, __D);
@@ -324,7 +325,7 @@
  *  \returns The result of adding operand \a __C to the CRC-32C checksum of
  *     operand \a __D.
  */
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
+static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
 __crc32d(unsigned int __C, unsigned int __D)
 {
   return __builtin_ia32_crc32si(__C, __D);
@@ -346,7 +347,7 @@
  *  \returns The result of adding operand \a __C to the CRC-32C checksum of
  *     operand \a __D.
  */
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS_SSE42
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CRC32
 __crc32q(unsigned long long __C, unsigned long long __D)
 {
   return __builtin_ia32_crc32di(__C, __D);
@@ -435,6 +436,7 @@
 
 #undef __DEFAULT_FN_ATTRS
 #undef __DEFAULT_FN_ATTRS_CAST
+#undef __DEFAULT_FN_ATTRS_CRC32
 #undef __DEFAULT_FN_ATTRS_SSE42
 #undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
Index: clang/lib/Headers/crc32intrin.h
===================================================================
--- /dev/null
+++ clang/lib/Headers/crc32intrin.h
@@ -0,0 +1,100 @@
+/*===---- crc32intrin.h - SSE4.2 Accumulate CRC32 intrinsics ---------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CRC32INTRIN_H
+#define __CRC32INTRIN_H
+
+#define __DEFAULT_FN_ATTRS \
+  __attribute__((__always_inline__, __nodebug__, __target__("crc32")))
+
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
+///    unsigned char operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> CRC32B </c> instruction.
+///
+/// \param __C
+///    An unsigned integer operand to add to the CRC-32C checksum of operand
+///    \a  __D.
+/// \param __D
+///    An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+///    operand \a __D.
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_mm_crc32_u8(unsigned int __C, unsigned char __D)
+{
+  return __builtin_ia32_crc32qi(__C, __D);
+}
+
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
+///    unsigned short operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> CRC32W </c> instruction.
+///
+/// \param __C
+///    An unsigned integer operand to add to the CRC-32C checksum of operand
+///    \a __D.
+/// \param __D
+///    An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+///    operand \a __D.
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_mm_crc32_u16(unsigned int __C, unsigned short __D)
+{
+  return __builtin_ia32_crc32hi(__C, __D);
+}
+
+/// Adds the first unsigned integer operand to the CRC-32C checksum of
+///    the second unsigned integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> CRC32L </c> instruction.
+///
+/// \param __C
+///    An unsigned integer operand to add to the CRC-32C checksum of operand
+///    \a __D.
+/// \param __D
+///    An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+///    operand \a __D.
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_mm_crc32_u32(unsigned int __C, unsigned int __D)
+{
+  return __builtin_ia32_crc32si(__C, __D);
+}
+
+#ifdef __x86_64__
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
+///    unsigned 64-bit integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.
+///
+/// \param __C
+///    An unsigned integer operand to add to the CRC-32C checksum of operand
+///    \a __D.
+/// \param __D
+///    An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+///    operand \a __D.
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_mm_crc32_u64(unsigned long long __C, unsigned long long __D)
+{
+  return __builtin_ia32_crc32di(__C, __D);
+}
+#endif /* __x86_64__ */
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif /* __CRC32INTRIN_H */
Index: clang/lib/Headers/CMakeLists.txt
===================================================================
--- clang/lib/Headers/CMakeLists.txt
+++ clang/lib/Headers/CMakeLists.txt
@@ -55,6 +55,7 @@
   cet.h
   cldemoteintrin.h
   clzerointrin.h
+  crc32intrin.h
   cpuid.h
   clflushoptintrin.h
   clwbintrin.h
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -142,6 +142,7 @@
   bool HasSERIALIZE = false;
   bool HasTSXLDTRK = false;
   bool HasUINTR = false;
+  bool HasCRC32 = false;
 
 protected:
   llvm::X86::CPUKind CPU = llvm::X86::CK_None;
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -327,6 +327,8 @@
       HasTSXLDTRK = true;
     } else if (Feature == "+uintr") {
       HasUINTR = true;
+    } else if (Feature == "+crc32") {
+      HasCRC32 = true;
     }
 
     X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
@@ -753,6 +755,8 @@
     Builder.defineMacro("__TSXLDTRK__");
   if (HasUINTR)
     Builder.defineMacro("__UINTR__");
+  if (HasCRC32)
+    Builder.defineMacro("__CRC32__");
 
   // Each case falls through to the previous one here.
   switch (SSELevel) {
@@ -872,6 +876,7 @@
       .Case("clflushopt", true)
       .Case("clwb", true)
       .Case("clzero", true)
+      .Case("crc32", true)
       .Case("cx16", true)
       .Case("enqcmd", true)
       .Case("f16c", true)
@@ -963,6 +968,7 @@
       .Case("clflushopt", HasCLFLUSHOPT)
       .Case("clwb", HasCLWB)
       .Case("clzero", HasCLZERO)
+      .Case("crc32", HasCRC32)
       .Case("cx8", HasCX8)
       .Case("cx16", HasCX16)
       .Case("enqcmd", HasENQCMD)
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -4149,6 +4149,8 @@
 def mno_wbnoinvd : Flag<["-"], "mno-wbnoinvd">, Group<m_x86_Features_Group>;
 def mclzero : Flag<["-"], "mclzero">, Group<m_x86_Features_Group>;
 def mno_clzero : Flag<["-"], "mno-clzero">, Group<m_x86_Features_Group>;
+def mcrc32 : Flag<["-"], "mcrc32">, Group<m_x86_Features_Group>;
+def mno_crc32 : Flag<["-"], "mno-crc32">, Group<m_x86_Features_Group>;
 def mcx16 : Flag<["-"], "mcx16">, Group<m_x86_Features_Group>;
 def mno_cx16 : Flag<["-"], "mno-cx16">, Group<m_x86_Features_Group>;
 def menqcmd : Flag<["-"], "menqcmd">, Group<m_x86_Features_Group>;
Index: clang/include/clang/Basic/BuiltinsX86_64.def
===================================================================
--- clang/include/clang/Basic/BuiltinsX86_64.def
+++ clang/include/clang/Basic/BuiltinsX86_64.def
@@ -44,7 +44,7 @@
 TARGET_BUILTIN(__builtin_ia32_movnti64, "vOi*Oi", "n", "sse2")
 TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "OiV2OiIi", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2OiV2OiOiIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "crc32")
 TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "OiV4OiIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4OiV4OiOiIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "n", "fsgsbase")
Index: clang/include/clang/Basic/BuiltinsX86.def
===================================================================
--- clang/include/clang/Basic/BuiltinsX86.def
+++ clang/include/clang/Basic/BuiltinsX86.def
@@ -421,9 +421,9 @@
 TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
 TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
 
-TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "crc32")
+TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "crc32")
+TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "crc32")
 
 // SSE4a
 TARGET_BUILTIN(__builtin_ia32_extrqi, "V2OiV2OiIcIc", "ncV:128:", "sse4a")
Index: clang/docs/ClangCommandLineReference.rst
===================================================================
--- clang/docs/ClangCommandLineReference.rst
+++ clang/docs/ClangCommandLineReference.rst
@@ -3569,6 +3569,8 @@
 
 .. option:: -mclzero, -mno-clzero
 
+.. option:: -mcrc32, -mno-crc32
+
 .. option:: -mcx16, -mno-cx16
 
 .. option:: -menqcmd, -mno-enqcmd
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to