[PATCH] D50448: [CGObjCGNU] Rename GetSelector helper method to fix -Woverloaded-virtual warning (PR38210)

2018-08-08 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon created this revision.
RKSimon added a reviewer: theraven.

As suggested by @theraven on PR38210, this patch fixes the gcc 
-Woverloaded-virtual warnings by renaming the extra CGObjCGNU::GetSelector 
method to CGObjCGNU::GetTypedSelector


Repository:
  rC Clang

https://reviews.llvm.org/D50448

Files:
  lib/CodeGen/CGObjCGNU.cpp


Index: lib/CodeGen/CGObjCGNU.cpp
===
--- lib/CodeGen/CGObjCGNU.cpp
+++ lib/CodeGen/CGObjCGNU.cpp
@@ -510,8 +510,8 @@
 
   /// Returns a selector with the specified type encoding.  An empty string is
   /// used to return an untyped selector (with the types field set to NULL).
-  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-   const std::string &TypeEncoding);
+  virtual llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+const std::string &TypeEncoding);
 
   /// Returns the name of ivar offset variables.  In the GNUstep v1 ABI, this
   /// contains the class and ivar names, in the v2 ABI this contains the type
@@ -1342,8 +1342,8 @@
   return Val;
 return llvm::ConstantExpr::getBitCast(Val, Ty);
   }
-  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-const std::string &TypeEncoding) override {
+  llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+const std::string &TypeEncoding) override {
 return GetConstantSelector(Sel, TypeEncoding);
   }
   llvm::Constant  *GetTypeString(llvm::StringRef TypeEncoding) {
@@ -2121,8 +2121,8 @@
   return Value;
 }
 
-llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
-const std::string &TypeEncoding) {
+llvm::Value *CGObjCGNU::GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+ const std::string &TypeEncoding) {
   SmallVectorImpl &Types = SelectorTable[Sel];
   llvm::GlobalAlias *SelValue = nullptr;
 
@@ -2155,13 +2155,13 @@
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel) {
-  return GetSelector(CGF, Sel, std::string());
+  return GetTypedSelector(CGF, Sel, std::string());
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF,
 const ObjCMethodDecl *Method) {
   std::string SelTypes = CGM.getContext().getObjCEncodingForMethodDecl(Method);
-  return GetSelector(CGF, Method->getSelector(), SelTypes);
+  return GetTypedSelector(CGF, Method->getSelector(), SelTypes);
 }
 
 llvm::Constant *CGObjCGNU::GetEHType(QualType T) {


Index: lib/CodeGen/CGObjCGNU.cpp
===
--- lib/CodeGen/CGObjCGNU.cpp
+++ lib/CodeGen/CGObjCGNU.cpp
@@ -510,8 +510,8 @@
 
   /// Returns a selector with the specified type encoding.  An empty string is
   /// used to return an untyped selector (with the types field set to NULL).
-  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-   const std::string &TypeEncoding);
+  virtual llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+const std::string &TypeEncoding);
 
   /// Returns the name of ivar offset variables.  In the GNUstep v1 ABI, this
   /// contains the class and ivar names, in the v2 ABI this contains the type
@@ -1342,8 +1342,8 @@
   return Val;
 return llvm::ConstantExpr::getBitCast(Val, Ty);
   }
-  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-const std::string &TypeEncoding) override {
+  llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+const std::string &TypeEncoding) override {
 return GetConstantSelector(Sel, TypeEncoding);
   }
   llvm::Constant  *GetTypeString(llvm::StringRef TypeEncoding) {
@@ -2121,8 +2121,8 @@
   return Value;
 }
 
-llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
-const std::string &TypeEncoding) {
+llvm::Value *CGObjCGNU::GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+ const std::string &TypeEncoding) {
   SmallVectorImpl &Types = SelectorTable[Sel];
   llvm::GlobalAlias *SelValue = nullptr;
 
@@ -2155,13 +2155,13 @@
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel) {
-  return GetSelector(CGF, Sel, std::string());
+  return GetTypedSelector(CGF, Sel, std::string());
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF,
 const ObjCMethodDecl *Method) {
   std::string SelTypes = CGM.getContext().getObjCEncodingForMethodDecl(Method);
-  return GetSelector(CGF, Method->getSelector(), SelTypes);
+  return GetTypedSelector(CGF, Method->getSelector(), SelTypes);
 }
 
 llvm::Constant *CGObjCGNU::GetEHType(QualType T) {
_

[PATCH] D50448: [CGObjCGNU] Rename GetSelector helper method to fix -Woverloaded-virtual warning (PR38210)

2018-08-08 Thread Simon Pilgrim via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL339264: [CGObjCGNU] Rename GetSelector helper method to fix 
-Woverloaded-virtual… (authored by RKSimon, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D50448?vs=159709&id=159732#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D50448

Files:
  cfe/trunk/lib/CodeGen/CGObjCGNU.cpp


Index: cfe/trunk/lib/CodeGen/CGObjCGNU.cpp
===
--- cfe/trunk/lib/CodeGen/CGObjCGNU.cpp
+++ cfe/trunk/lib/CodeGen/CGObjCGNU.cpp
@@ -510,8 +510,8 @@
 
   /// Returns a selector with the specified type encoding.  An empty string is
   /// used to return an untyped selector (with the types field set to NULL).
-  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-   const std::string &TypeEncoding);
+  virtual llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+const std::string &TypeEncoding);
 
   /// Returns the name of ivar offset variables.  In the GNUstep v1 ABI, this
   /// contains the class and ivar names, in the v2 ABI this contains the type
@@ -1342,8 +1342,8 @@
   return Val;
 return llvm::ConstantExpr::getBitCast(Val, Ty);
   }
-  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-const std::string &TypeEncoding) override {
+  llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+const std::string &TypeEncoding) override {
 return GetConstantSelector(Sel, TypeEncoding);
   }
   llvm::Constant  *GetTypeString(llvm::StringRef TypeEncoding) {
@@ -2121,8 +2121,8 @@
   return Value;
 }
 
-llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
-const std::string &TypeEncoding) {
+llvm::Value *CGObjCGNU::GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+ const std::string &TypeEncoding) {
   SmallVectorImpl &Types = SelectorTable[Sel];
   llvm::GlobalAlias *SelValue = nullptr;
 
@@ -2155,13 +2155,13 @@
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel) {
-  return GetSelector(CGF, Sel, std::string());
+  return GetTypedSelector(CGF, Sel, std::string());
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF,
 const ObjCMethodDecl *Method) {
   std::string SelTypes = CGM.getContext().getObjCEncodingForMethodDecl(Method);
-  return GetSelector(CGF, Method->getSelector(), SelTypes);
+  return GetTypedSelector(CGF, Method->getSelector(), SelTypes);
 }
 
 llvm::Constant *CGObjCGNU::GetEHType(QualType T) {


Index: cfe/trunk/lib/CodeGen/CGObjCGNU.cpp
===
--- cfe/trunk/lib/CodeGen/CGObjCGNU.cpp
+++ cfe/trunk/lib/CodeGen/CGObjCGNU.cpp
@@ -510,8 +510,8 @@
 
   /// Returns a selector with the specified type encoding.  An empty string is
   /// used to return an untyped selector (with the types field set to NULL).
-  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-   const std::string &TypeEncoding);
+  virtual llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+const std::string &TypeEncoding);
 
   /// Returns the name of ivar offset variables.  In the GNUstep v1 ABI, this
   /// contains the class and ivar names, in the v2 ABI this contains the type
@@ -1342,8 +1342,8 @@
   return Val;
 return llvm::ConstantExpr::getBitCast(Val, Ty);
   }
-  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-const std::string &TypeEncoding) override {
+  llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+const std::string &TypeEncoding) override {
 return GetConstantSelector(Sel, TypeEncoding);
   }
   llvm::Constant  *GetTypeString(llvm::StringRef TypeEncoding) {
@@ -2121,8 +2121,8 @@
   return Value;
 }
 
-llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
-const std::string &TypeEncoding) {
+llvm::Value *CGObjCGNU::GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+ const std::string &TypeEncoding) {
   SmallVectorImpl &Types = SelectorTable[Sel];
   llvm::GlobalAlias *SelValue = nullptr;
 
@@ -2155,13 +2155,13 @@
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel) {
-  return GetSelector(CGF, Sel, std::string());
+  return GetTypedSelector(CGF, Sel, std::string());
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF,
 const ObjCMethodDecl *Method) {
   std::string SelTypes = CGM.getContext().getObjCEncodingForMethodDecl(Method);
-  return GetSelector(CGF, Method->getSelector(), SelTypes);
+  

[PATCH] D46863: [X86] Use __builtin_convertvector to implement some of the packed integer to packed flow conversion intrinsics.

2018-05-15 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added reviewers: efriedma, hfinkel.
RKSimon added a comment.

I'm all for keeping the scalar/vector behaviour the same but I'm concerned 
about constant folding not taking into account runtime rounding mode:

e,.g. SelectionDAG::getNode - we don't check the return status of 
convertFromAPInt from *INT_TO_FP - but then again the FP_TO_*INT constant folds 
only bail on invalid conversions.

I haven't checked what other passes are doing.


Repository:
  rC Clang

https://reviews.llvm.org/D46863



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D47125: [X86] Remove masking from pternlog llvm intrinsics and use a select instruction instead.

2018-05-21 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - I'd love to see constexpr arguments someday in C/C++ to avoid all of 
these macro shenanigans...


Repository:
  rC Clang

https://reviews.llvm.org/D47125



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D47174: [X86] Move 128-bit f16c intrinsics to __emmintrin_f16c.h include from emmintrin.h. Move 256-bit f16c intrinsics back to f16cintrin.h

2018-05-22 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Aren't all the instructions from the same CPUID bit? It seems odd to split them 
across multiple files.


Repository:
  rC Clang

https://reviews.llvm.org/D47174



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D47174: [X86] Move 128-bit f16c intrinsics to __emmintrin_f16c.h include from emmintrin.h. Move 256-bit f16c intrinsics back to f16cintrin.h

2018-05-22 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

In https://reviews.llvm.org/D47174#1108329, @craig.topper wrote:

> It is odd, but they really are split in the icc include files. So they got 
> split a while back in clang to match the Intel Intrinsic Guide documentation.


OK - if that means we're matching latest icc/gcc. LGTM.


Repository:
  rC Clang

https://reviews.llvm.org/D47174



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D47401: [X86] Rewrite the max and min reduction intrinsics to make better use of other functions and to reduce width to 256 and 128 bits were possible.

2018-05-26 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: cfe/trunk/lib/Headers/avx512fintrin.h:9855
+  __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
+  return __t6[0];
 

Would it be dumb to allow VLX capable CPUs to use 128/256 variants of the 
VPMAXUQ etc ? Or is it better to focus on improving SimplifyDemandedElts to 
handle this (and many other reduction cases that all tend to keep to the 
original vector width)?


Repository:
  rL LLVM

https://reviews.llvm.org/D47401



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38683: [X86][AVX512] lowering broadcastm intrinsic - clang part

2017-10-10 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: test/CodeGen/avx512cdintrin.c:106
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}
+  return _mm512_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b)); 
 }

Any reason why you can't use the actual insertion indices instead of regexps?


https://reviews.llvm.org/D38683



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38781: [X86] Add CLWB intrinsic. clang part

2017-10-12 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: lib/Headers/clwbintrin.h:34
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_clwb(void const *__m) {

Worth adding the doxygen description? You can probably just copy+paste+modify 
the _mm_clflush documentation.



Comment at: test/CodeGen/builtin-clwb.c:4
+
+#include 
+void test_mm_clwb(const void *__m) {

Match what is done in the other files: use -ffreestanding instead of defining 
__MM_MALLOC_H and include the "top header":


https://reviews.llvm.org/D38781



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38781: [X86] Add CLWB intrinsic. clang part

2017-10-12 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - one minor




Comment at: lib/Headers/clwbintrin.h:42
+///
+/// \param __p
+///A pointer to the memory location used to identify the cache line to be

\param __m


https://reviews.llvm.org/D38781



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38813: [X86] Add skeleton support for "knm" cpu - clang side

2017-10-13 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM




Comment at: lib/Basic/Targets/X86.cpp:232
 
+  case CK_KNM:
   case CK_KNL:

Worth adding a TODO comment for the missing features?


https://reviews.llvm.org/D38813



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51771: [X86] Modify addcarry/subborrow builtins to emit an 2 result and intrinsic and an store instruction.

2018-09-07 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rC Clang

https://reviews.llvm.org/D51771



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D51805: [X86] Custom emit __builtin_rdtscp so we can emit an explicit store for the out parameter

2018-09-07 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - The lack of existing tests is worrying.


https://reviews.llvm.org/D51805



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D42530: Clang permits assignment to vector/extvector elements in a const method

2018-02-10 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

@avt77 Close this now that https://reviews.llvm.org/rL324721 has landed?


https://reviews.llvm.org/D42530



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D33406: PR28129 expand vector oparation to an IR constant.

2017-06-26 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon closed this revision.
RKSimon added a comment.

https://reviews.llvm.org/rL305551


https://reviews.llvm.org/D33406



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D41520: smmintrin.h documentation fixes and updates

2017-12-22 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM


https://reviews.llvm.org/D41520



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D41518: pmmintrin.h documentation fixes and updates

2017-12-22 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM


https://reviews.llvm.org/D41518



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D41523: xmmintrin.h documentation fixes and updates

2018-01-05 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Sort of related - should we enable -Wdocumentation (it's currently -Wall and 
-Weverything might be too much) on the respective clang builtin tests? 
Doesn't have to be part of this patch.




Comment at: lib/Headers/xmmintrin.h:1927
 ///
-/// This intrinsic corresponds to the  VPEXTRQ / MOVQ  instruction.
+/// This intrinsic corresponds to the  VPEXTRQ / PEXTRQ  instruction.
 ///

Not necessarily, it could be (v)movhps/(v)movhpd - but those are tricky to 
generate in current isel.


https://reviews.llvm.org/D41523



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D41507: avxintrin.h documentation fixes and updates

2018-01-07 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM


https://reviews.llvm.org/D41507



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D41517: mmintrin.h documentation fixes and updates

2018-02-28 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: lib/Headers/mmintrin.h:1292
 ///
-/// This intrinsic corresponds to the  VXORPS / XORPS  instruction.
+/// This intrinsic corresponds to the  XOR  instruction.
 ///

craig.topper wrote:
> kromanova wrote:
> > craig.topper wrote:
> > > PXOR?
> > For which platform/compiler? 
> >  
> > I checked, for x86_64 Linux XORPS(no avx)/VXORPS (with -mavx) is generated.
> > For PS4 we generate XORL.
> > 
> > I guess, we need to write something more generic, implying that an 
> > appropriate platform-specific XOR instruction is generated. 
> Ideally to interoperate with other mmx intrinsics it should have been a PXOR 
> into an mmx register. But apparently our mmx support is so limited that we 
> aren't capable of that and instead create it in another domain and move it 
> over.
> 
> I guess just indicate it as a utility function with no specific instruction.
D41908 fixed MMX zero constant creation so you should be able to say "This 
intrinsic corresponds to the  PXOR  instruction."


https://reviews.llvm.org/D41517



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D41517: mmintrin.h documentation fixes and updates

2018-03-08 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Any more comments?


https://reviews.llvm.org/D41517



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D41517: mmintrin.h documentation fixes and updates

2018-03-08 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - thanks


https://reviews.llvm.org/D41517



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D41168: [X86] Lowering X86 avx512 sqrt intrinsics to IR

2018-06-01 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon requested changes to this revision.
RKSimon added a comment.
This revision now requires changes to proceed.

In https://reviews.llvm.org/D41168#1118624, @tkrupa wrote:

> Mask scalar case is closed and doesn't have any effects on this revision. 
> Besides, I resolved issues connected to lowering scalar sqrt intrinsics 
> without rounding (that is, if https://reviews.llvm.org/D47621 is accepted). 
> Should I add them here to have everything sqrt in one place or upstream this 
> and add them to a new revision?


Add them here please


Repository:
  rC Clang

https://reviews.llvm.org/D41168



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D41168: [X86] Lowering X86 avx512 sqrt intrinsics to IR

2018-06-03 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: test/CodeGen/sse2-builtins.c:1199
   // CHECK-LABEL: test_mm_sqrt_sd
-  // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}})
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 1
-  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
+  // CHECK-NOT: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}})
+  // CHECK: extractelement <2 x double> %{{.*}}, i64 0

You've added CHECK-NOT to some (scalar only?) tests but not others - please be 
consistent.


Repository:
  rC Clang

https://reviews.llvm.org/D41168



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D47693: [X86] Use target independent masked expandload and compressstore intrinsics to implement expandload/compressstore builtins.

2018-06-09 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

We're going to need more codegen tests on the llvm side - test coverage 
(fast-isel and intrinsics) isn't great. But LGTM on the clang side.


Repository:
  rC Clang

https://reviews.llvm.org/D47693



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D41168: [X86] Lowering X86 avx512 sqrt intrinsics to IR

2018-06-15 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

LGTM as well


Repository:
  rL LLVM

https://reviews.llvm.org/D41168



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D47401: [X86] Rewrite the max and min reduction intrinsics to make better use of other functions and to reduce width to 256 and 128 bits were possible.

2018-06-18 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Is there any llvm side fast isel tests for these?




Comment at: cfe/trunk/lib/Headers/avx512fintrin.h:9855
+  __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
+  return __t6[0];
 

craig.topper wrote:
> RKSimon wrote:
> > Would it be dumb to allow VLX capable CPUs to use 128/256 variants of the 
> > VPMAXUQ etc ? Or is it better to focus on improving SimplifyDemandedElts to 
> > handle this (and many other reduction cases that all tend to keep to the 
> > original vector width)?
> I'm not sure how to do that from clang. Should we be using a reduction 
> intrinsic and do custom lowering in the backend?
I believe we're still missing the clang reduction intrinsics.

We also have the problem with the 'fast math only' accumulator argument on the 
'experimental' reductions that we can't actually change because arm64 already 
relies on them in production code...

But we could maybe propose clang integer reduction intrinsics?

TBH I don't think any of that needs to be done for this patch, its just an 
indication that x86 reductions aren't great yet.


Repository:
  rL LLVM

https://reviews.llvm.org/D47401



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D47401: [X86] Rewrite the max and min reduction intrinsics to make better use of other functions and to reduce width to 256 and 128 bits were possible.

2018-06-19 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

OK, if the llvm side tests are incoming I'm happy with this patch


https://reviews.llvm.org/D47401



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D48346: [X86] Rewrite the add/mul/or/and reduction intrinsics to make better use of other intrinsics and remove undef shuffle indices.

2018-06-20 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: test/CodeGen/avx512-reduceIntrin.c:1
 // RUN: %clang_cc1 -ffreestanding %s -O2 -triple=x86_64-apple-darwin 
-target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
 

We're using -O2 here but most codegen tests use -O0 - is that OK? It makes much 
more recognisable codegen, but I don't know what people expect from these 
intrinsics without optimization.


Repository:
  rC Clang

https://reviews.llvm.org/D48346



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D48346: [X86] Rewrite the add/mul/or/and reduction intrinsics to make better use of other intrinsics and remove undef shuffle indices.

2018-06-21 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM (with suitable IR fast-isel tests in llvm)


https://reviews.llvm.org/D48346



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D48487: [X86][AVX512] Lowering _mm512_[max|min]_p[s|d] to native IR

2018-06-22 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

I haven't used the AVX512 versions, but I do know plenty of cases that rely on 
the 'use the second op if there is a NAN or both zeros' special cases in the 
SSE version of these FMIN/FMAX intrinsics.

The Intel docs state:

> If the values being compared are both 0.0s (of either sign), the value in the 
> second operand (source operand) is returned. If a value in the second operand 
> is an SNaN, then SNaN is forwarded unchanged to the destination (that is, a 
> QNaN version of the SNaN is not returned).



> If only one value is a NaN (SNaN or QNaN) for this instruction, the second 
> operand (source operand), either a NaN or a valid floating-point value, is 
> written to the result. If instead of this behavior, it is required that the 
> NaN source operand (from either the first or second operand) be returned, the 
> action of MAXPS can be emulated using a sequence of instructions, such as, a 
> comparison followed by AND, ANDN and OR.




Repository:
  rC Clang

https://reviews.llvm.org/D48487



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D52967: Extend shelf-life by 70 years

2018-10-10 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon resigned from this revision.
RKSimon added reviewers: rsmith, bruno, rtrieu.
RKSimon added a comment.

Sorry I'm not the right person to review this - adding some other potentials


Repository:
  rC Clang

https://reviews.llvm.org/D52967



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56530: [X86] Add versions of the avx512 gather intrinsics that take the mask as a vXi1 vector instead of a scalar (clang side)

2019-01-16 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56530/new/

https://reviews.llvm.org/D56530



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56965: [X86] Remove the cvtuqq2ps256/cvtqq2ps256 mask builtins. Replace with uitofp/sitofp and select.

2019-01-19 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Just to confirm, this isn't driven by RC in any way? cvtsi642ps isn't so I 
don't expect it to be, but just to be sure..


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56965/new/

https://reviews.llvm.org/D56965



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56965: [X86] Remove the cvtuqq2ps256/cvtqq2ps256 mask builtins. Replace with uitofp/sitofp and select.

2019-01-20 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

OK - LGTM, thanks


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56965/new/

https://reviews.llvm.org/D56965



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56998: [X86] Custom codegen 512-bit cvt(u)qq2tops, cvt(u)qqtopd, and cvt(u)dqtops intrinsics.

2019-01-25 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56998/new/

https://reviews.llvm.org/D56998



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58675: [clang] Adds `-ftime-trace` option to clang that produces Chrome `chrome://tracing` compatible JSON profiling output dumps

2019-02-28 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Ideally I think you need more clang/frontend experts reviewers - many of us on 
the reviewers list tend to work mainly in llvm.

Test cases would be good as well - even if its just basic sanity tests for 
command line args etc.




Comment at: clang/lib/Sema/Sema.cpp:113
+  llvm::TimeTraceProfilerEnd();
+}
 S->DiagnoseNonDefaultPragmaPack(

remove braces



Comment at: clang/lib/Sema/SemaTemplateInstantiate.cpp:24
 #include "clang/Sema/Lookup.h"
+#include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/Template.h"

nfc change?



Comment at: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp:23
 #include "clang/Sema/Lookup.h"
+#include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/Template.h"

nfc change?



Comment at: clang/tools/driver/cc1_main.cpp:201
+llvm::TimeTraceProfilerInitialize();
+  }
+

remove braces


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58675/new/

https://reviews.llvm.org/D58675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58675: [clang] Adds `-ftime-trace` option to clang that produces Chrome `chrome://tracing` compatible JSON profiling output dumps

2019-02-28 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp:23
 #include "clang/Sema/Lookup.h"
+#include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/Template.h"

anton-afanasyev wrote:
> RKSimon wrote:
> > nfc change?
> What do you mean? This change is not NFC.
Moving the "clang/Sema/SemaInternal.h" include?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58675/new/

https://reviews.llvm.org/D58675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58675: [clang] Adds `-ftime-trace` option to clang that produces Chrome `chrome://tracing` compatible JSON profiling output dumps

2019-03-13 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

@rsmith Any suggestions for good reviewers for this please?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58675/new/

https://reviews.llvm.org/D58675



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59287: [X86] Only define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 in 64-bit mode.

2019-03-13 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

I agree that its not great, but I don't think we're going to do much better - 
and matching gcc behaviours does make sense here.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59287/new/

https://reviews.llvm.org/D59287



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59557: Fix CodeGen/arm64-microsoft-status-reg.cpp test

2019-03-19 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

I thought we weren't supposed to reference asm codegen in clang tests? (PR24580)


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59557/new/

https://reviews.llvm.org/D59557



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59567: [X86] Add __popcntd and __popcntq to ia32intrin.h to match gcc and icc. Remove popcnt feature flag from _popcnt32/_popcnt64 and move to ia32intrin.h to match gcc

2019-03-20 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

IIRC we don't use libcalls for popcnt - we just expand




Comment at: clang/lib/Headers/ia32intrin.h:35
+///
+/// This intrinsic corresponds to the  POPCNT  instruction.
+///

Change this as we don't require POPCNT any more?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59567/new/

https://reviews.llvm.org/D59567



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59533: [X86] Add __crc32b/__crc32w/__crc32d/__crc32q intrinsics to match gcc and icc.

2019-03-20 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM with one minor (doxygen desciptions)




Comment at: lib/Headers/ia32intrin.h:58
 
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, 
__target__("sse4.2")))
+__crc32b(unsigned int __C, unsigned char __D)

Please can you copy the doxygen comments from smmintrin.h (_mm_crc32_u8 etc.)?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59533/new/

https://reviews.llvm.org/D59533



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59567: [X86] Add __popcntd and __popcntq to ia32intrin.h to match gcc and icc. Remove popcnt feature flag from _popcnt32/_popcnt64 and move to ia32intrin.h to match gcc

2019-03-21 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM with some minor typos




Comment at: lib/Headers/ia32intrin.h:36
+ *  This intrinsic corresponds to the  POPCNT  instruction or a
+ *  a sequence of arithmetic and logic ops to calcute it.
+ *

calculate



Comment at: lib/Headers/ia32intrin.h:57
+ *  This intrinsic corresponds to the  POPCNT  instruction or a
+ *  a sequence of arithmetic and logic ops to calcute it.
+ *

calculate


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59567/new/

https://reviews.llvm.org/D59567



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59682: [X86] Add BSR/BSF/BSWAP intrinsics to ia32intrin.h to match gcc.

2019-03-22 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: lib/Headers/ia32intrin.h:31
 
+static __inline__ int __attribute__((__always_inline__, __nodebug__))
+__bsfd(int __A) {

Ideally we'd have doxygen comments.



Comment at: test/CodeGen/bitscan-builtins.c:24
+// CHECK: @test__bsfd
+// CHECK: %[[call:.*]] = call i32 @llvm.cttz.i32(
+  return __bsfd(X);

Should we be testing the is_zero_undef flag is true?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59682/new/

https://reviews.llvm.org/D59682



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59682: [X86] Add BSR/BSF/BSWAP intrinsics to ia32intrin.h to match gcc.

2019-03-23 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - cheers


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59682/new/

https://reviews.llvm.org/D59682



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59744: Fix i386 ABI "__m64" type bug

2019-04-08 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: test/CodeGen/x86_32-mmx-linux.c:2
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-pc-linux-gnu -target-cpu 
pentium4 -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+

Test on more triples and add the test file to trunk with current codegen so 
this patch shows the delta


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59744/new/

https://reviews.llvm.org/D59744



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60674: [X86] Restore the pavg intrinsics.

2019-04-15 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - there's too many different optimizations and canonicalizations that can 
occur on such a pattern to be able to match all of the permutations.

We can probably add some InstCombine optimizations (e.g. where the avg can't 
overflow etc.) and keep the existing DAG matching.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60674/new/

https://reviews.llvm.org/D60674



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59744: Fix i386 ABI "__m64" type bug

2019-04-15 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: test/CodeGen/x86_32-m64-darwin.c:1
+// RUN: %clang_cc1 -w -fblocks -triple i386-apple-darwin9 -target-cpu yonah 
-target-feature +mmx -emit-llvm -O2 -o - %s | FileCheck %s
+

You should be able to merge all of these triples into the same test file, each 
with their own RUN: line, you will need to add a FileCheck prefix, something 
like:
```
| FileCheck %s --check-prefixes=CHECK,DARWIN
| FileCheck %s --check-prefixes=CHECK,IAMCU
| FileCheck %s --check-prefixes=CHECK,LINUX
| FileCheck %s --check-prefixes=CHECK,WIN32
```


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59744/new/

https://reviews.llvm.org/D59744



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59744: Fix i386 ABI "__m64" type bug

2019-04-17 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

One last style comment from me but we need somebody better with the different 
ABIs to finally approve this.




Comment at: lib/CodeGen/TargetInfo.cpp:1416
+  return ABIArgInfo::getDirect(llvm::Type::getX86_MMXTy(getVMContext()));
+}
+

superfluous braces?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59744/new/

https://reviews.llvm.org/D59744



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60552: [X86] Enable intrinsics of AVX512_BF16, which are supported for BFLOAT16 in Cooper Lake

2019-04-17 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: lib/Headers/avx512bf16intrin.h:23
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS512
+_mm512_cvtne2ps_pbh(__m512 __A, __m512 __B) {

All of these need proper doxygen based descriptions - see xmmintrin.h etc. for 
examples.



Comment at: test/CodeGen/avx512bf16-builtins.c:9
+  // CHECK-LABEL: @test_mm512_cvtne2ps2bf16
+  // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512
+  return _mm512_cvtne2ps_pbh(A, B);

add ret/arg types?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60552/new/

https://reviews.llvm.org/D60552



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58404: [clang-format] Add basic support for formatting C# files

2019-04-29 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: lib/Format/FormatTokenLexer.cpp:249
+  Identifier->ColumnWidth += Question->ColumnWidth;
+  Identifier->Type = Identifier->Type;
+  Tokens.erase(Tokens.end() - 1);

@MyDeveloperDay Should this be
```
Identifier->Type = Question->Type;
```
Reported in https://www.viva64.com/en/b/0629/


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58404/new/

https://reviews.llvm.org/D58404



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60937: [clangd] Fix code completion of macros defined in the preamble region of the main file.

2019-05-02 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

@sammccall I'm sorry but I had to revert this to fix the buildbots: 
http://lab.llvm.org:8011/builders/llvm-clang-lld-x86_64-scei-ps4-ubuntu-fast/builds/47684/


Repository:
  rCTE Clang Tools Extra

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60937/new/

https://reviews.llvm.org/D60937



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D61281: [clang-format] Fixed self assignment

2019-05-02 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a reviewer: RKSimon.
RKSimon added a comment.

In D61281#1485833 , @MyDeveloperDay 
wrote:

> Did this cause some issue? Does this fix something if so can we add a test, 
> because maybe the line isn't needed
>
> I would think we'd want to keep this as an identifier. we are just treating 
> arg? the same as we would arg


@MyDeveloperDay didn't you write this code in D58404 
?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61281/new/

https://reviews.llvm.org/D61281



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D61472: [X86FixupLEAs] Turn optIncDec into a generic two address LEA optimizer. Support LEA64_32r properly.

2019-05-06 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Where did all the clang diffs come from?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61472/new/

https://reviews.llvm.org/D61472



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60934: [clang] adding explicit(bool) from c++2a

2019-05-09 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: cfe/trunk/lib/Parse/ParseDecl.cpp:3939
+ "both or neither of isAlreadyConsumed and 
"
+ "RangeEnd needs to be set");
+DS.SetRangeEnd(isAlreadyConsumed ? RangeEnd : Tok.getLocation());

@Tyker @rsmith We're seeing -Wparentheses warnings because of this - please can 
you take a look? The logic looks a little dodgy for a both/neither assertion as 
well.

http://lab.llvm.org:8011/builders/clang-ppc64be-linux-lnt/builds/27322/steps/build%20stage%201/logs/warnings%20%281%29


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60934/new/

https://reviews.llvm.org/D60934



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D52392: [X86] For lzcnt/tzcnt intrinsics use cttz/ctlz intrinsics with zero_undef flag set to false.

2018-09-23 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Are there other targets that would benefit from this and if so should we 
provide a more generic intrinsic?


Repository:
  rC Clang

https://reviews.llvm.org/D52392



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D52392: [X86] For lzcnt/tzcnt intrinsics use cttz/ctlz intrinsics with zero_undef flag set to false.

2018-09-26 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM, thanks for checking.

Please update the *-intrinsics-fast-isel.ll llvm test cases to match the 
*-builtins.c changes.


Repository:
  rC Clang

https://reviews.llvm.org/D52392



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D52665: [X86] Add more of the icc unaligned load/store to/from 128 bit vector intrinsics

2018-09-29 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Just to confirm - does icc provide the si64 intrinsics on 32-bit builds?


https://reviews.llvm.org/D52665



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D52665: [X86] Add more of the icc unaligned load/store to/from 128 bit vector intrinsics

2018-09-29 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

Thanks for checking - LGTM with the equivalent updates to 
sse2-intrinsics-fast-isel.ll


https://reviews.llvm.org/D52665



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37448: Fix cast assertion on MS inline assembly with vector spills (PR34021)

2017-09-04 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon created this revision.

I can add the other test cases from PR34021 if required?


Repository:
  rL LLVM

https://reviews.llvm.org/D37448

Files:
  lib/CodeGen/CGStmt.cpp
  test/CodeGen/pr34021.c


Index: test/CodeGen/pr34021.c
===
--- test/CodeGen/pr34021.c
+++ test/CodeGen/pr34021.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -fms-extensions %s -triple=i686-unknown-unknown -emit-llvm 
-o -
+// RUN: %clang_cc1 -fms-extensions %s -triple=x86_64-unknown-unknown 
-emit-llvm -o -
+// REQUIRES: asserts
+
+typedef int v4si __attribute__ ((vector_size (16)));
+v4si rep() {
+v4si res;
+__asm {}
+return res;
+}
Index: lib/CodeGen/CGStmt.cpp
===
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -2210,7 +2210,7 @@
llvm::IntegerType::get(getLLVMContext(), 
(unsigned)TmpSize));
 Tmp = Builder.CreateTrunc(Tmp, TruncTy);
   } else if (TruncTy->isIntegerTy()) {
-Tmp = Builder.CreateTrunc(Tmp, TruncTy);
+Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
   } else if (TruncTy->isVectorTy()) {
 Tmp = Builder.CreateBitCast(Tmp, TruncTy);
   }


Index: test/CodeGen/pr34021.c
===
--- test/CodeGen/pr34021.c
+++ test/CodeGen/pr34021.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -fms-extensions %s -triple=i686-unknown-unknown -emit-llvm -o -
+// RUN: %clang_cc1 -fms-extensions %s -triple=x86_64-unknown-unknown -emit-llvm -o -
+// REQUIRES: asserts
+
+typedef int v4si __attribute__ ((vector_size (16)));
+v4si rep() {
+v4si res;
+__asm {}
+return res;
+}
Index: lib/CodeGen/CGStmt.cpp
===
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -2210,7 +2210,7 @@
llvm::IntegerType::get(getLLVMContext(), (unsigned)TmpSize));
 Tmp = Builder.CreateTrunc(Tmp, TruncTy);
   } else if (TruncTy->isIntegerTy()) {
-Tmp = Builder.CreateTrunc(Tmp, TruncTy);
+Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
   } else if (TruncTy->isVectorTy()) {
 Tmp = Builder.CreateBitCast(Tmp, TruncTy);
   }
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37449: [X86][AVX512] _mm512_stream_load_si512 should take a void const* argument (PR33977)

2017-09-04 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon created this revision.

Based off the Intel Intrinsics guide, we should expect a void const* argument.

Prevents 'passing 'const void *' to parameter of type 'void *' discards 
qualifiers' warnings.


Repository:
  rL LLVM

https://reviews.llvm.org/D37449

Files:
  lib/Headers/avx512fintrin.h
  test/CodeGen/avx512f-builtins.c


Index: test/CodeGen/avx512f-builtins.c
===
--- test/CodeGen/avx512f-builtins.c
+++ test/CodeGen/avx512f-builtins.c
@@ -6258,6 +6258,12 @@
   return _mm512_stream_load_si512(__P); 
 }
 
+__m512i test_mm512_stream_load_si512_const(void const *__P) {
+  // CHECK-LABEL: @test_mm512_stream_load_si512_const
+  // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 64, !nontemporal
+  return _mm512_stream_load_si512(__P); 
+}
+
 void test_mm512_stream_pd(double *__P, __m512d __A) {
   // CHECK-LABEL: @test_mm512_stream_pd
   // CHECK: store <8 x double> %{{.*}}, <8 x double>* %{{.*}}, align 64, 
!nontemporal
Index: lib/Headers/avx512fintrin.h
===
--- lib/Headers/avx512fintrin.h
+++ lib/Headers/avx512fintrin.h
@@ -9040,7 +9040,7 @@
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_stream_load_si512 (void *__P)
+_mm512_stream_load_si512 (void const *__P)
 {
   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
   return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);


Index: test/CodeGen/avx512f-builtins.c
===
--- test/CodeGen/avx512f-builtins.c
+++ test/CodeGen/avx512f-builtins.c
@@ -6258,6 +6258,12 @@
   return _mm512_stream_load_si512(__P); 
 }
 
+__m512i test_mm512_stream_load_si512_const(void const *__P) {
+  // CHECK-LABEL: @test_mm512_stream_load_si512_const
+  // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 64, !nontemporal
+  return _mm512_stream_load_si512(__P); 
+}
+
 void test_mm512_stream_pd(double *__P, __m512d __A) {
   // CHECK-LABEL: @test_mm512_stream_pd
   // CHECK: store <8 x double> %{{.*}}, <8 x double>* %{{.*}}, align 64, !nontemporal
Index: lib/Headers/avx512fintrin.h
===
--- lib/Headers/avx512fintrin.h
+++ lib/Headers/avx512fintrin.h
@@ -9040,7 +9040,7 @@
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_stream_load_si512 (void *__P)
+_mm512_stream_load_si512 (void const *__P)
 {
   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
   return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37449: [X86][AVX512] _mm512_stream_load_si512 should take a void const* argument (PR33977)

2017-09-05 Thread Simon Pilgrim via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL312523: [X86][AVX512] _mm512_stream_load_si512 should take a 
void const* argument… (authored by RKSimon).

Changed prior to commit:
  https://reviews.llvm.org/D37449?vs=113786&id=113819#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D37449

Files:
  cfe/trunk/lib/Headers/avx512fintrin.h
  cfe/trunk/test/CodeGen/avx512f-builtins.c


Index: cfe/trunk/test/CodeGen/avx512f-builtins.c
===
--- cfe/trunk/test/CodeGen/avx512f-builtins.c
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c
@@ -6258,6 +6258,12 @@
   return _mm512_stream_load_si512(__P); 
 }
 
+__m512i test_mm512_stream_load_si512_const(void const *__P) {
+  // CHECK-LABEL: @test_mm512_stream_load_si512_const
+  // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 64, !nontemporal
+  return _mm512_stream_load_si512(__P); 
+}
+
 void test_mm512_stream_pd(double *__P, __m512d __A) {
   // CHECK-LABEL: @test_mm512_stream_pd
   // CHECK: store <8 x double> %{{.*}}, <8 x double>* %{{.*}}, align 64, 
!nontemporal
Index: cfe/trunk/lib/Headers/avx512fintrin.h
===
--- cfe/trunk/lib/Headers/avx512fintrin.h
+++ cfe/trunk/lib/Headers/avx512fintrin.h
@@ -9040,7 +9040,7 @@
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_stream_load_si512 (void *__P)
+_mm512_stream_load_si512 (void const *__P)
 {
   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
   return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);


Index: cfe/trunk/test/CodeGen/avx512f-builtins.c
===
--- cfe/trunk/test/CodeGen/avx512f-builtins.c
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c
@@ -6258,6 +6258,12 @@
   return _mm512_stream_load_si512(__P); 
 }
 
+__m512i test_mm512_stream_load_si512_const(void const *__P) {
+  // CHECK-LABEL: @test_mm512_stream_load_si512_const
+  // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 64, !nontemporal
+  return _mm512_stream_load_si512(__P); 
+}
+
 void test_mm512_stream_pd(double *__P, __m512d __A) {
   // CHECK-LABEL: @test_mm512_stream_pd
   // CHECK: store <8 x double> %{{.*}}, <8 x double>* %{{.*}}, align 64, !nontemporal
Index: cfe/trunk/lib/Headers/avx512fintrin.h
===
--- cfe/trunk/lib/Headers/avx512fintrin.h
+++ cfe/trunk/lib/Headers/avx512fintrin.h
@@ -9040,7 +9040,7 @@
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_stream_load_si512 (void *__P)
+_mm512_stream_load_si512 (void const *__P)
 {
   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
   return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37448: Fix cast assertion on MS inline assembly with vector spills (PR34021)

2017-09-05 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

In https://reviews.llvm.org/D37448#861019, @erichkeane wrote:

> I apologize for the lack of detail for this comment, but I didn't seem to 
> capture this at the time.  When I investigated this initially, I came up with 
> this solution and it was insufficient.  I don't remember WHAT this ends up 
> not fixing however.


@erichkeane OK, are you in a position to take on this bug? Either commandeering 
this patch or I abandon it and you start your own?


Repository:
  rL LLVM

https://reviews.llvm.org/D37448



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37413: [X86][MS-InlineAsm] Extended support for variables / identifiers on memory / immediate expressions

2017-09-08 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a reviewer: RKSimon.
RKSimon added a comment.

Tests?




Comment at: lib/Sema/SemaStmtAsm.cpp:71
+if (!Piece.isOperand())
+  continue;
 

This all looks like a clang-format NFC change - just commit it?



Comment at: lib/Sema/SemaStmtAsm.cpp:617
+return;
+  } else if (Res->isRValue()) {
+bool Enum = isa(T) && Res->EvaluateAsRValue(Eval, 
Context);

(style) Split these instead of an if-elseif chain


Repository:
  rL LLVM

https://reviews.llvm.org/D37413



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-10 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

As with https://reviews.llvm.org/D37562, strip the builtins from 
include/clang/Basic/BuiltinsX86.def


https://reviews.llvm.org/D37668



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37448: Fix cast assertion on MS inline assembly with vector spills (PR34021)

2017-09-11 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon updated this revision to Diff 114569.
RKSimon added a comment.

Added checks to test case


Repository:
  rL LLVM

https://reviews.llvm.org/D37448

Files:
  lib/CodeGen/CGStmt.cpp
  test/CodeGen/pr34021.c


Index: test/CodeGen/pr34021.c
===
--- test/CodeGen/pr34021.c
+++ test/CodeGen/pr34021.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -fms-extensions %s -triple=i686-unknown-unknown -emit-llvm 
-o - | FileCheck %s --check-prefix=X86
+// RUN: %clang_cc1 -fms-extensions %s -triple=x86_64-unknown-unknown 
-emit-llvm -o - | FileCheck %s --check-prefix=X64
+// REQUIRES: asserts
+
+typedef int v4si __attribute__ ((vector_size (16)));
+v4si rep() {
+// X86-LABEL: define <4 x i32> @rep
+// X86:  %retval = alloca <4 x i32>, align 16
+// X86-NEXT: %res = alloca <4 x i32>, align 16
+// X86-NEXT: %0 = bitcast <4 x i32>* %retval to i128*
+// X86-NEXT: %1 = call i64 asm sideeffect inteldialect "", 
"=A,~{dirflag},~{fpsr},~{flags}"()
+// X86-NEXT: %2 = zext i64 %1 to i128
+// X86-NEXT: store i128 %2, i128* %0, align 16
+// X86-NEXT: %3 = load <4 x i32>, <4 x i32>* %res, align 16
+// X86-NEXT: ret <4 x i32> %3
+//
+// X64-LABEL: define <4 x i32> @rep
+// X64:  %res = alloca <4 x i32>, align 16
+// X64-NEXT: call void asm sideeffect inteldialect "", 
"~{dirflag},~{fpsr},~{flags}"()
+// X64-NEXT: %0 = load <4 x i32>, <4 x i32>* %res, align 16
+// X64-NEXT: ret <4 x i32> %0
+  v4si res;
+  __asm {}
+  return res;
+}
Index: lib/CodeGen/CGStmt.cpp
===
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -2194,7 +2194,7 @@
llvm::IntegerType::get(getLLVMContext(), 
(unsigned)TmpSize));
 Tmp = Builder.CreateTrunc(Tmp, TruncTy);
   } else if (TruncTy->isIntegerTy()) {
-Tmp = Builder.CreateTrunc(Tmp, TruncTy);
+Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
   } else if (TruncTy->isVectorTy()) {
 Tmp = Builder.CreateBitCast(Tmp, TruncTy);
   }


Index: test/CodeGen/pr34021.c
===
--- test/CodeGen/pr34021.c
+++ test/CodeGen/pr34021.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -fms-extensions %s -triple=i686-unknown-unknown -emit-llvm -o - | FileCheck %s --check-prefix=X86
+// RUN: %clang_cc1 -fms-extensions %s -triple=x86_64-unknown-unknown -emit-llvm -o - | FileCheck %s --check-prefix=X64
+// REQUIRES: asserts
+
+typedef int v4si __attribute__ ((vector_size (16)));
+v4si rep() {
+// X86-LABEL: define <4 x i32> @rep
+// X86:  %retval = alloca <4 x i32>, align 16
+// X86-NEXT: %res = alloca <4 x i32>, align 16
+// X86-NEXT: %0 = bitcast <4 x i32>* %retval to i128*
+// X86-NEXT: %1 = call i64 asm sideeffect inteldialect "", "=A,~{dirflag},~{fpsr},~{flags}"()
+// X86-NEXT: %2 = zext i64 %1 to i128
+// X86-NEXT: store i128 %2, i128* %0, align 16
+// X86-NEXT: %3 = load <4 x i32>, <4 x i32>* %res, align 16
+// X86-NEXT: ret <4 x i32> %3
+//
+// X64-LABEL: define <4 x i32> @rep
+// X64:  %res = alloca <4 x i32>, align 16
+// X64-NEXT: call void asm sideeffect inteldialect "", "~{dirflag},~{fpsr},~{flags}"()
+// X64-NEXT: %0 = load <4 x i32>, <4 x i32>* %res, align 16
+// X64-NEXT: ret <4 x i32> %0
+  v4si res;
+  __asm {}
+  return res;
+}
Index: lib/CodeGen/CGStmt.cpp
===
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -2194,7 +2194,7 @@
llvm::IntegerType::get(getLLVMContext(), (unsigned)TmpSize));
 Tmp = Builder.CreateTrunc(Tmp, TruncTy);
   } else if (TruncTy->isIntegerTy()) {
-Tmp = Builder.CreateTrunc(Tmp, TruncTy);
+Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
   } else if (TruncTy->isVectorTy()) {
 Tmp = Builder.CreateBitCast(Tmp, TruncTy);
   }
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37448: Fix cast assertion on MS inline assembly with vector spills (PR34021)

2017-09-11 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

In https://reviews.llvm.org/D37448#866700, @rnk wrote:

> lgtm
>
> I was hoping for a test case that didn't require assertions, but this is 
> enough to land the fix.


My mistake, I should be able to remove that as well - I'll do it as part of the 
commit. Thanks.


Repository:
  rL LLVM

https://reviews.llvm.org/D37448



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37448: Fix cast assertion on MS inline assembly with vector spills (PR34021)

2017-09-12 Thread Simon Pilgrim via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL313019: [MS-InlineAsm] Fix cast assertion with vector spills 
(PR34021) (authored by RKSimon).

Changed prior to commit:
  https://reviews.llvm.org/D37448?vs=114569&id=114794#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D37448

Files:
  cfe/trunk/lib/CodeGen/CGStmt.cpp
  cfe/trunk/test/CodeGen/pr34021.c


Index: cfe/trunk/lib/CodeGen/CGStmt.cpp
===
--- cfe/trunk/lib/CodeGen/CGStmt.cpp
+++ cfe/trunk/lib/CodeGen/CGStmt.cpp
@@ -2194,7 +2194,7 @@
llvm::IntegerType::get(getLLVMContext(), 
(unsigned)TmpSize));
 Tmp = Builder.CreateTrunc(Tmp, TruncTy);
   } else if (TruncTy->isIntegerTy()) {
-Tmp = Builder.CreateTrunc(Tmp, TruncTy);
+Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
   } else if (TruncTy->isVectorTy()) {
 Tmp = Builder.CreateBitCast(Tmp, TruncTy);
   }
Index: cfe/trunk/test/CodeGen/pr34021.c
===
--- cfe/trunk/test/CodeGen/pr34021.c
+++ cfe/trunk/test/CodeGen/pr34021.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fms-extensions %s -triple=i686-unknown-unknown -emit-llvm 
-o - | FileCheck %s --check-prefix=X86
+// RUN: %clang_cc1 -fms-extensions %s -triple=x86_64-unknown-unknown 
-emit-llvm -o - | FileCheck %s --check-prefix=X64
+
+typedef int v4si __attribute__ ((vector_size (16)));
+v4si rep() {
+// X86-LABEL: define <4 x i32> @rep
+// X86:  %retval = alloca <4 x i32>, align 16
+// X86-NEXT: %res = alloca <4 x i32>, align 16
+// X86-NEXT: %0 = bitcast <4 x i32>* %retval to i128*
+// X86-NEXT: %1 = call i64 asm sideeffect inteldialect "", 
"=A,~{dirflag},~{fpsr},~{flags}"()
+// X86-NEXT: %2 = zext i64 %1 to i128
+// X86-NEXT: store i128 %2, i128* %0, align 16
+// X86-NEXT: %3 = load <4 x i32>, <4 x i32>* %res, align 16
+// X86-NEXT: ret <4 x i32> %3
+//
+// X64-LABEL: define <4 x i32> @rep
+// X64:  %res = alloca <4 x i32>, align 16
+// X64-NEXT: call void asm sideeffect inteldialect "", 
"~{dirflag},~{fpsr},~{flags}"()
+// X64-NEXT: %0 = load <4 x i32>, <4 x i32>* %res, align 16
+// X64-NEXT: ret <4 x i32> %0
+  v4si res;
+  __asm {}
+  return res;
+}


Index: cfe/trunk/lib/CodeGen/CGStmt.cpp
===
--- cfe/trunk/lib/CodeGen/CGStmt.cpp
+++ cfe/trunk/lib/CodeGen/CGStmt.cpp
@@ -2194,7 +2194,7 @@
llvm::IntegerType::get(getLLVMContext(), (unsigned)TmpSize));
 Tmp = Builder.CreateTrunc(Tmp, TruncTy);
   } else if (TruncTy->isIntegerTy()) {
-Tmp = Builder.CreateTrunc(Tmp, TruncTy);
+Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
   } else if (TruncTy->isVectorTy()) {
 Tmp = Builder.CreateBitCast(Tmp, TruncTy);
   }
Index: cfe/trunk/test/CodeGen/pr34021.c
===
--- cfe/trunk/test/CodeGen/pr34021.c
+++ cfe/trunk/test/CodeGen/pr34021.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fms-extensions %s -triple=i686-unknown-unknown -emit-llvm -o - | FileCheck %s --check-prefix=X86
+// RUN: %clang_cc1 -fms-extensions %s -triple=x86_64-unknown-unknown -emit-llvm -o - | FileCheck %s --check-prefix=X64
+
+typedef int v4si __attribute__ ((vector_size (16)));
+v4si rep() {
+// X86-LABEL: define <4 x i32> @rep
+// X86:  %retval = alloca <4 x i32>, align 16
+// X86-NEXT: %res = alloca <4 x i32>, align 16
+// X86-NEXT: %0 = bitcast <4 x i32>* %retval to i128*
+// X86-NEXT: %1 = call i64 asm sideeffect inteldialect "", "=A,~{dirflag},~{fpsr},~{flags}"()
+// X86-NEXT: %2 = zext i64 %1 to i128
+// X86-NEXT: store i128 %2, i128* %0, align 16
+// X86-NEXT: %3 = load <4 x i32>, <4 x i32>* %res, align 16
+// X86-NEXT: ret <4 x i32> %3
+//
+// X64-LABEL: define <4 x i32> @rep
+// X64:  %res = alloca <4 x i32>, align 16
+// X64-NEXT: call void asm sideeffect inteldialect "", "~{dirflag},~{fpsr},~{flags}"()
+// X64-NEXT: %0 = load <4 x i32>, <4 x i32>* %res, align 16
+// X64-NEXT: ret <4 x i32> %0
+  v4si res;
+  __asm {}
+  return res;
+}
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-12 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

some very minor whitespace/indentation issues

please can you confirm  @craig.topper's query about 
__builtin_ia32_pbroadcastq512_mem_mask




Comment at: lib/Headers/avx512vlintrin.h:5727
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+ _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) {
+   return (__m128i)__builtin_ia32_selectd_128(__M,

weird indentation?



Comment at: lib/Headers/avx512vlintrin.h:5734
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+ _mm_maskz_set1_epi32( __mmask8 __M, int __A) {
+   return (__m128i)__builtin_ia32_selectd_128(__M,

weird indentation?



Comment at: lib/Headers/avx512vlintrin.h:5741
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+ _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) {
+   return (__m256i)__builtin_ia32_selectd_256(__M,

weird indentation?



Comment at: lib/Headers/avx512vlintrin.h:5748
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+ _mm256_maskz_set1_epi32( __mmask8 __M, int __A) {
+   return (__m256i)__builtin_ia32_selectd_256(__M,

weird indentation?



Comment at: test/CodeGen/avx512f-builtins.c:7732
+   // CHECK-LABEL: @test_mm512_mask_set1_epi32
+// CHECK: insertelement <16 x i32> undef, i32 %{{.*}}, i32 0
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 1

weird indentation?



Comment at: test/CodeGen/avx512f-builtins.c:7755
+  // CHECK-LABEL: @test_mm512_maskz_set1_epi32
+// CHECK: insertelement <16 x i32> undef, i32 %{{.*}}, i32 0
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 1

weird indentation?



Comment at: test/CodeGen/avx512f-builtins.c:7966
 //CHECK: insertelement{{.*}}i32 14
 //CHECK: insertelement{{.*}}i32 15
  return _mm512_setr_epi32( __A, __B, __C, __D,__E, __F, __G, __H,

weird indentation?



Comment at: test/CodeGen/avx512vl-builtins.c:4597
   // CHECK-LABEL: @test_mm256_maskz_set1_epi64
-  // CHECK: @llvm.x86.avx512.mask.pbroadcast.q.gpr.256
+// CHECK: insertelement <4 x i64> undef, i64 %{{.*}}, i32 0
+  // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 1

weird indentation?


https://reviews.llvm.org/D37668



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37892: [X86] Use native shuffle vector for the perm2f128 intrinsics

2017-09-15 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

_mm256_permute2x128_si256 ?


https://reviews.llvm.org/D37892



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37892: [X86] Use native shuffle vector for the perm2f128 intrinsics

2017-09-15 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Also, there currently isn't any testing of the zero vector case.


https://reviews.llvm.org/D37892



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37892: [X86] Use native shuffle vector for the perm2f128 intrinsics

2017-09-15 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - please can you update the 
avx-intrinsics-fast-isel.ll/avxs-intrinsics-fast-isel.ll cases to match the 
*-builtins.c as well (either now or if/when you add the intrinsics to 
autoupgrade).


https://reviews.llvm.org/D37892



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37941: [X86] Move even more of our CPU to feature mapping switch to use fallthroughs

2017-09-16 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM, there are a couple of other fall-throughs that could still be done.




Comment at: lib/Basic/Targets/X86.cpp:141
 setFeatureEnabledImpl(Features, "sse2", true);
 setFeatureEnabledImpl(Features, "fxsr", true);
 break;

CK_Pentium3/CK_C3_2/CK_PentiumM/CK_Pentium4/CK_x86_64 can be put below Yonah et 
al. (unless you're worried about including non-Intel enums)?


https://reviews.llvm.org/D37941



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37938: [X86] Remove unnecessary extra encodings from the CPU name enum in clang

2017-09-16 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM


https://reviews.llvm.org/D37938



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D62282: [X86] Add ENQCMD intrinsics.

2019-06-04 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.

In D62282#1528631 , @tianqing wrote:

> Added doxygen comments of intrinsics in enqcmdintrin.h.


Thanks, LGTM as well.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62282/new/

https://reviews.llvm.org/D62282



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D62363: [X86] Enable intrinsics that convert float and bf16 data to each other

2019-06-04 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.

Yup, doxygen LGTM - cheers




Comment at: lib/Headers/avx512bf16intrin.h:33
+
+/// Convert One BF16 Data to One Single Data.
+///

Single Float Data


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62363/new/

https://reviews.llvm.org/D62363



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63018: [X86] Attempt to make the Intel core CPU inheritance a little more readable and maintainable

2019-06-10 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - the goto is unfortunate but more maintainable and less confusing then 
the current state of things




Comment at: clang/lib/Basic/Targets/X86.cpp:157
+setFeatureEnabledImpl(Features, "pku", true);
+goto SkylakeCommon;
+

Worth adding a comment explaining the AVX512 ISA divergence?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63018/new/

https://reviews.llvm.org/D63018



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63473: Support -fclang-abi-compat=8.0 to keep old ABI behavior

2019-06-18 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: test/CodeGen/x86_32-m64.c:6
 // RUN: %clang_cc1 -w -O2 -fblocks -triple i386-pc-win32 -emit-llvm -o - %s | 
FileCheck %s --check-prefixes=CHECK,WIN32
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-pc-linux-gnu -target-cpu 
pentium4 -emit-llvm -fclang-abi-compat=8.0 -o - %s | FileCheck %s 
--check-prefixes=CHECK,OLDABI
 

Use ABI80 instead of OLDABI?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63473/new/

https://reviews.llvm.org/D63473



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59744: Fix i386 ABI "__m64" type bug

2019-06-19 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

I've raised https://bugs.llvm.org/show_bug.cgi?id=42319 which suggests the 
creation of a EMMS insertion pass.


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59744/new/

https://reviews.llvm.org/D59744



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64416: [AArch64] Add support for Transactional Memory Extension (TME)

2019-07-17 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

@chill This is failing on buildbots with EXPENSIVE_CHECKS enabled: 
http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/18684/steps/test-check-all/logs/stdio


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64416/new/

https://reviews.llvm.org/D64416



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64416: [AArch64] Add support for Transactional Memory Extension (TME)

2019-07-19 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: llvm/test/CodeGen/AArch64/tme-tcancel.ll:1
+; RUN: llc %s -o - | FileCheck %s
+

Would it make sense to add -verify-machineinstrs to all these 
Codegen/AArch64/tme-*.ll tests?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64416/new/

https://reviews.llvm.org/D64416



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D65110: [NewPM] Run avx*-builtins.c tests under the new pass manager only

2019-07-23 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: clang/test/CodeGen/avx512vlbw-builtins.c:904
+  // CHECK: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]]
+  // CHECK: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]]
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}}

Can you release the DSTTY/SRCTY patterns with the actual types to improve 
readability?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D65110/new/

https://reviews.llvm.org/D65110



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64638: [CrossTU] Fix plist macro expansion if macro in other file.

2019-07-25 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

@balazske This is causing buildbot failures - revert?


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64638/new/

https://reviews.llvm.org/D64638



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25845: [CUDA] Ignore implicit target attributes during function template instantiation.

2019-08-11 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.
Herald added subscribers: llvm-commits, sanjoy.google, bixia.
Herald added a project: LLVM.



Comment at: cfe/trunk/lib/Sema/SemaDecl.cpp:8416
+// in the CheckFunctionTemplateSpecialization() call below.
+if (getLangOpts().CUDA & !isFunctionTemplateSpecialization)
+  maybeAddCUDAHostDeviceAttrs(NewFD, Previous);

@tra Sorry for touching such an old ticket - but cppcheck is warning that we 
are using a boolean result in a bitwise expression - I'm assuming this should 
be:
```
if (getLangOpts().CUDA && !isFunctionTemplateSpecialization)
```


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D25845/new/

https://reviews.llvm.org/D25845



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63538: [CFG] Add a new function to get the proper condition of a CFGBlock

2019-07-05 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

@Szelethus This is causing problems on windows buildbots 
http://lab.llvm.org:8011/builders/llvm-clang-lld-x86_64-scei-ps4-windows10pro-fast
 - revert?


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63538/new/

https://reviews.llvm.org/D63538



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63538: [CFG] Add a new function to get the proper condition of a CFGBlock

2019-07-05 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

In D63538#1571454 , @Szelethus wrote:

> In D63538#1571451 , @Szelethus wrote:
>
> > Since the followup patches test this roughly anyways, and the fact that the 
> > AST's lifetime ends right after the CFG's construction makes the remaining 
> > tests pretty much pointless, if I can't resolve this, I'll just remove the 
> > testfile.
>
>
> I mean, this is a good enough reason to just go through with it. Removed in 
> rL365209 . Sorry for the inconvenience!


Thanks!


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63538/new/

https://reviews.llvm.org/D63538



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D61637: [Syntax] Introduce syntax trees

2019-07-09 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

@ilya-biryukov We're seeing buildbot failures in SyntaxTests.exe : 
http://lab.llvm.org:8011/builders/llvm-clang-lld-x86_64-scei-ps4-ubuntu-fast/builds/50927
http://lab.llvm.org:8011/builders/llvm-clang-lld-x86_64-scei-ps4-windows10pro-fast/builds/26822

Failing Tests (1):

  Clang-Unit :: Tooling/Syntax/./SyntaxTests.exe/SyntaxTreeTest.Basic


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61637/new/

https://reviews.llvm.org/D61637



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D61637: [Syntax] Introduce syntax trees

2019-07-09 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon reopened this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

@ilya-biryukov I'm sorry but I've reverted this at rL365465 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61637/new/

https://reviews.llvm.org/D61637



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64389: [NFC] [X86] Fix scan-build complaining

2019-07-09 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

LGTM - cheers


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64389/new/

https://reviews.llvm.org/D64389



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D62232: [Clang][Driver] recheck for nullptr

2019-05-22 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

According to the coverage report we don't have any coverage of TC == null at 
all - not sure if its (a) possible to add tests or (b) if we can ever have a 
non-null TC - so should we just assert?

http://lab.llvm.org:8080/coverage/coverage-reports/clang/coverage/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/clang/lib/Driver/Driver.cpp.html


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62232/new/

https://reviews.llvm.org/D62232



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D62282: [X86] Add ENQCMD intrinsics.

2019-05-23 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: lib/Headers/enqcmdintrin.h:21
+
+static __inline__ int _DEFAULT_FN_ATTRS
+_enqcmd (void *__dst, const void *__src)

Please add doxygen comments describing the intrinsics


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62282/new/

https://reviews.llvm.org/D62282



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D62363: [X86] Enable intrinsics that convert float and bf16 data to each other

2019-05-26 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: lib/Headers/avx512bf16intrin.h:52
+///
+/// \headerfile
+///

\headerfile 



Comment at: lib/Headers/avx512bf16intrin.h:54
+///
+/// This intrinsic is lowered to  LLVM IR.
+///

Please can you write a better description?



Comment at: lib/Headers/avx512bf16intrin.h:66
+///
+/// \headerfile
+///

\headerfile 



Comment at: lib/Headers/avx512bf16intrin.h:98
+  return (__m512bh)__builtin_ia32_selectw_512(
+  (__mmask32)__U, (__v32hi)_mm512_cvtne2ps_pbh(__A, __B), (__v32hi)__W);
 }

style/clang-format changes like these should be separated into their own patch


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62363/new/

https://reviews.llvm.org/D62363



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D62367: [X86] VP2INTERSECT clang

2019-05-28 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added inline comments.



Comment at: lib/Headers/avx512vlvp2intersectintrin.h:39
+
+static __inline__ void __DEFAULT_FN_ATTRS256
+_mm256_2intersect_epi32(__m256i __a, __m256i __b, __mmask8 *__m0, __mmask8 
*__m1) {

craig.topper wrote:
> xiangzhangllvm wrote:
> > craig.topper wrote:
> > > Can you add doxygen comments for the new intrinsics? @RKSimon has been 
> > > asking for it on other reviews. I forgot to say something in our internal 
> > > review.
> > OK! But I really find many ntrinsicxxx.h have no doxygen comments, Is it 
> > like this format: ?
> > 
> > 
> > ```
> > /// Rounds up each element of the 128-bit vector of [4 x float] to an
> > ///integer and returns the rounded values in a 128-bit vector of
> > ///[4 x float].
> > ///
> > /// \headerfile 
> > ///
> > /// \code
> > /// __m128 _mm_ceil_ps(__m128 X);
> > /// \endcode
> > ///
> > /// This intrinsic corresponds to the  VROUNDPS / ROUNDPS  
> > instruction.
> > ///
> > /// \param X
> > ///A 128-bit vector of [4 x float] values to be rounded up.
> > /// \returns A 128-bit vector of [4 x float] containing the rounded values.
> > #define _mm_ceil_ps(X)   _mm_round_ps((X), _MM_FROUND_CEIL)
> > ```
> > 
> Yeah that looks right. Someone was kind enough to provide comments for some 
> of the header files a few years ago, but it wasn't complete. I know avx512* 
> especially are missing comments.
The hope is that these will get added as time goes on - usually it just 
requires a copy + paste of existing comments and tweak for ymm/zmm/whatever.

New intrinsics should always include documentation since the person adding it 
is the most likely to be able to correct describe it.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62367/new/

https://reviews.llvm.org/D62367



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D55879: [X86][SSE] Auto upgrade PADDUS/PSUBUS intrinsics to UADD_SAT/USUB_SAT generic intrinsics (clang)

2018-12-19 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: craig.topper, spatel, nikic, tkrupa.
Herald added a subscriber: llvm-commits.

Sibling patch to D55855 , this emits 
UADD_SAT/USUB_SAT generic intrinsics for the SSE saturated math intrinsics 
instead of expanding to a IR code sequence that could be difficult to 
reassemble.


Repository:
  rL LLVM

https://reviews.llvm.org/D55879

Files:
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGen/avx2-builtins.c
  test/CodeGen/avx512bw-builtins.c
  test/CodeGen/avx512vlbw-builtins.c
  test/CodeGen/sse2-builtins.c

Index: test/CodeGen/sse2-builtins.c
===
--- test/CodeGen/sse2-builtins.c
+++ test/CodeGen/sse2-builtins.c
@@ -60,18 +60,14 @@
 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epu8
   // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  // CHECK: add <16 x i8> %{{.*}}, %{{.*}}
-  // CHECK: icmp ugt <16 x i8> %{{.*}}, %{{.*}}
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> , <16 x i8> {{.*}}
+  // CHECK: call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_adds_epu8(A, B);
 }
 
 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epu16
   // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
-  // CHECK: add <8 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp ugt <8 x i16> %{{.*}}, %{{.*}}
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> , <8 x i16> {{.*}}
+  // CHECK: call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_adds_epu16(A, B);
 }
 
@@ -1477,18 +1473,14 @@
 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_subs_epu8
   // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  // CHECK: icmp ugt <16 x i8> {{.*}}, {{.*}}
-  // CHECK: select <16 x i1> {{.*}}, <16 x i8> {{.*}}, <16 x i8> {{.*}}
-  // CHECK: sub <16 x i8> {{.*}}, {{.*}}
+  // CHECK: call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_subs_epu8(A, B);
 }
 
 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_subs_epu16
   // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
-  // CHECK: icmp ugt <8 x i16> {{.*}}, {{.*}}
-  // CHECK: select <8 x i1> {{.*}}, <8 x i16> {{.*}}, <8 x i16> {{.*}}
-  // CHECK: sub <8 x i16> {{.*}}, {{.*}}
+  // CHECK: call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_subs_epu16(A, B);
 }
 
Index: test/CodeGen/avx512vlbw-builtins.c
===
--- test/CodeGen/avx512vlbw-builtins.c
+++ test/CodeGen/avx512vlbw-builtins.c
@@ -1124,72 +1124,56 @@
 __m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_adds_epu8
   // CHECK-NOT: @llvm.x86.sse2.paddus.b
-  // CHECK: add <16 x i8> %{{.*}}, %{{.*}}
-  // CHECK: icmp ugt <16 x i8> %{{.*}}, %{{.*}}
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> , <16 x i8> {{.*}}
+  // CHECK: call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_adds_epu8(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_adds_epu8
   // CHECK-NOT: @llvm.x86.sse2.paddus.b
-  // CHECK: add <16 x i8> %{{.*}}, %{{.*}}
-  // CHECK: icmp ugt <16 x i8> %{{.*}}, %{{.*}}
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> , <16 x i8> {{.*}}
+  // CHECK: call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_adds_epu8(__U,__A,__B); 
 }
 __m256i test_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_adds_epu8
   // CHECK-NOT: @llvm.x86.avx2.paddus.b
-  // CHECK: add <32 x i8> %{{.*}}, %{{.*}}
-  // CHECK: icmp ugt <32 x i8> %{{.*}}, %{{.*}}
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i8> , <32 x i8> {{.*}}
+  // CHECK: call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_adds_epu8(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_adds_epu8
   // CHECK-NOT: @llvm.x86.avx2.paddus.b
-  // CHECK: add <32 x i8> %{{.*}}, %{{.*}}
-  // CHECK: icmp ugt <32 x i8> %{{.*}}, %{{.*}}
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i8> , <32 x i8> {{.*}}
+  // CHECK: call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <

[PATCH] D55879: [X86][SSE] Auto upgrade PADDUS/PSUBUS intrinsics to UADD_SAT/USUB_SAT generic intrinsics (clang)

2018-12-19 Thread Simon Pilgrim via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC349631: [X86][SSE] Auto upgrade PADDUS/PSUBUS intrinsics to 
UADD_SAT/USUB_SAT generic… (authored by RKSimon, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D55879?vs=178839&id=178880#toc

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55879/new/

https://reviews.llvm.org/D55879

Files:
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGen/avx2-builtins.c
  test/CodeGen/avx512bw-builtins.c
  test/CodeGen/avx512vlbw-builtins.c
  test/CodeGen/sse2-builtins.c

Index: lib/CodeGen/CGBuiltin.cpp
===
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -9487,31 +9487,14 @@
   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
 }
 
-// Emit addition or subtraction with saturation.
-// Handles both signed and unsigned intrinsics.
-static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, const CallExpr *E,
+// Emit addition or subtraction with unsigned saturation.
+// TODO: Handle signed intrinsics.
+static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF,
SmallVectorImpl &Ops,
bool IsAddition) {
-
-  // Collect vector elements and type data.
-  llvm::Type *ResultType = CGF.ConvertType(E->getType());
-
-  Value *Res;
-  if (IsAddition) {
-// ADDUS: a > (a+b) ? ~0 : (a+b)
-// If Ops[0] > Add, overflow occurred.
-Value *Add = CGF.Builder.CreateAdd(Ops[0], Ops[1]);
-Value *ICmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Add);
-Value *Max = llvm::Constant::getAllOnesValue(ResultType);
-Res = CGF.Builder.CreateSelect(ICmp, Max, Add);
-  } else {
-// SUBUS: max(a, b) - b
-Value *ICmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
-Value *Select = CGF.Builder.CreateSelect(ICmp, Ops[0], Ops[1]);
-Res = CGF.Builder.CreateSub(Select, Ops[1]);
-  }
-
-  return Res;
+  Intrinsic::ID IID = IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat;
+  llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
+  return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
 }
 
 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
@@ -11382,14 +11365,14 @@
   case X86::BI__builtin_ia32_paddusw256:
   case X86::BI__builtin_ia32_paddusb128:
   case X86::BI__builtin_ia32_paddusw128:
-return EmitX86AddSubSatExpr(*this, E, Ops, true /* IsAddition */);
+return EmitX86AddSubSatExpr(*this, Ops, true /* IsAddition */);
   case X86::BI__builtin_ia32_psubusb512:
   case X86::BI__builtin_ia32_psubusw512:
   case X86::BI__builtin_ia32_psubusb256:
   case X86::BI__builtin_ia32_psubusw256:
   case X86::BI__builtin_ia32_psubusb128:
   case X86::BI__builtin_ia32_psubusw128:
-return EmitX86AddSubSatExpr(*this, E, Ops, false /* IsAddition */);
+return EmitX86AddSubSatExpr(*this, Ops, false /* IsAddition */);
   }
 }
 
Index: test/CodeGen/avx2-builtins.c
===
--- test/CodeGen/avx2-builtins.c
+++ test/CodeGen/avx2-builtins.c
@@ -69,18 +69,14 @@
 __m256i test_mm256_adds_epu8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_adds_epu8
   // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
-  // CHECK: add <32 x i8> %{{.*}}, %{{.*}}
-  // CHECK: icmp ugt <32 x i8> %{{.*}}, %{{.*}}
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i8> , <32 x i8> {{.*}}
+  // CHECK: call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_adds_epu8(a, b);
 }
 
 __m256i test_mm256_adds_epu16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_adds_epu16
   // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
-  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp ugt <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> , <16 x i16> {{.*}}
+  // CHECK: call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_adds_epu16(a, b);
 }
 
@@ -1188,18 +1184,14 @@
 __m256i test_mm256_subs_epu8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_subs_epu8
   // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
-  // CHECK: icmp ugt <32 x i8> {{.*}}, {{.*}}
-  // CHECK: select <32 x i1> {{.*}}, <32 x i8> {{.*}}, <32 x i8> {{.*}}
-  // CHECK: sub <32 x i8> {{.*}}, {{.*}}
+  // CHECK: call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_subs_epu8(a, b);
 }
 
 __m256i test_mm256_subs_epu16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_subs_epu16
   // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
-  // CHECK: icmp ugt <16 x i16> {{.*}}, {{.*}}
-  // CHECK: select <16 x i1> {{.*}}, <16 x i16> {{.*}}, <16 x i16> {{.*}}
-  // CHECK: sub <16 x i16> {{.*}}, {{.*}

[PATCH] D55890: [X86][SSE] Auto upgrade PADDS/PSUBS intrinsics to SADD_SAT/SSUB_SAT generic intrinsics (clang)

2018-12-19 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: craig.topper, spatel, nikic, tkrupa.
Herald added a subscriber: llvm-commits.

This emits SADD_SAT/SSUB_SAT generic intrinsics for the SSE saturated math 
intrinsics.

I'm still working on the llvm sibling patch (auto upgrade etc.) as there's a 
number of things that need unravelling. If people are interested in getting 
this committed quicker I could include minimal llvm side commit changes 
(GCCBuiltin and fast-isel tests) in the meantime.


Repository:
  rL LLVM

https://reviews.llvm.org/D55890

Files:
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGen/avx2-builtins.c
  test/CodeGen/avx512bw-builtins.c
  test/CodeGen/avx512vlbw-builtins.c
  test/CodeGen/sse2-builtins.c

Index: test/CodeGen/sse2-builtins.c
===
--- test/CodeGen/sse2-builtins.c
+++ test/CodeGen/sse2-builtins.c
@@ -47,13 +47,13 @@
 
 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epi8
-  // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_adds_epi8(A, B);
 }
 
 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epi16
-  // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_adds_epi16(A, B);
 }
 
@@ -1460,13 +1460,13 @@
 
 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_subs_epi8
-  // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_subs_epi8(A, B);
 }
 
 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_subs_epi16
-  // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_subs_epi16(A, B);
 }
 
Index: test/CodeGen/avx512vlbw-builtins.c
===
--- test/CodeGen/avx512vlbw-builtins.c
+++ test/CodeGen/avx512vlbw-builtins.c
@@ -1075,49 +1075,49 @@
 
 __m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_adds_epi8
-  // CHECK: @llvm.x86.sse2.padds.b
+  // CHECK: @llvm.sadd.sat.v16i8
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_adds_epi8(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_adds_epi8
-  // CHECK: @llvm.x86.sse2.padds.b
+  // CHECK: @llvm.sadd.sat.v16i8
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_adds_epi8(__U,__A,__B); 
 }
 __m256i test_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_adds_epi8
-  // CHECK: @llvm.x86.avx2.padds.b
+  // CHECK: @llvm.sadd.sat.v32i8
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_adds_epi8(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_adds_epi8
-  // CHECK: @llvm.x86.avx2.padds.b
+  // CHECK: @llvm.sadd.sat.v32i8
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_maskz_adds_epi8(__U,__A,__B); 
 }
 __m128i test_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_adds_epi16
-  // CHECK: @llvm.x86.sse2.padds.w
+  // CHECK: @llvm.sadd.sat.v8i16
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_adds_epi16(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_adds_epi16
-  // CHECK: @llvm.x86.sse2.padds.w
+  // CHECK: @llvm.sadd.sat.v8i16
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_adds_epi16(__U,__A,__B); 
 }
 __m256i test_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_adds_epi16
-  // CHECK: @llvm.x86.avx2.padds.w
+  // CHECK: @llvm.sadd.sat.v16i16
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_adds_epi16(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_adds_epi16
-  // CHECK: @llvm.x86.avx2.padds.w
+  // CHECK: @llvm.sadd.sat.v16i16
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_adds_epi16(__U,__A,__B); 
 }
@@ -1527,49 +

[PATCH] D55890: [X86][SSE] Auto upgrade PADDS/PSUBS intrinsics to SADD_SAT/SSUB_SAT generic intrinsics (clang)

2018-12-19 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon updated this revision to Diff 178946.
RKSimon added a comment.

Use ArrayRef


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55890/new/

https://reviews.llvm.org/D55890

Files:
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGen/avx2-builtins.c
  test/CodeGen/avx512bw-builtins.c
  test/CodeGen/avx512vlbw-builtins.c
  test/CodeGen/sse2-builtins.c

Index: test/CodeGen/sse2-builtins.c
===
--- test/CodeGen/sse2-builtins.c
+++ test/CodeGen/sse2-builtins.c
@@ -47,13 +47,13 @@
 
 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epi8
-  // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_adds_epi8(A, B);
 }
 
 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epi16
-  // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_adds_epi16(A, B);
 }
 
@@ -1460,13 +1460,13 @@
 
 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_subs_epi8
-  // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_subs_epi8(A, B);
 }
 
 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_subs_epi16
-  // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_subs_epi16(A, B);
 }
 
Index: test/CodeGen/avx512vlbw-builtins.c
===
--- test/CodeGen/avx512vlbw-builtins.c
+++ test/CodeGen/avx512vlbw-builtins.c
@@ -1075,49 +1075,49 @@
 
 __m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_adds_epi8
-  // CHECK: @llvm.x86.sse2.padds.b
+  // CHECK: @llvm.sadd.sat.v16i8
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_adds_epi8(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_adds_epi8
-  // CHECK: @llvm.x86.sse2.padds.b
+  // CHECK: @llvm.sadd.sat.v16i8
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_adds_epi8(__U,__A,__B); 
 }
 __m256i test_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_adds_epi8
-  // CHECK: @llvm.x86.avx2.padds.b
+  // CHECK: @llvm.sadd.sat.v32i8
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_adds_epi8(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_adds_epi8
-  // CHECK: @llvm.x86.avx2.padds.b
+  // CHECK: @llvm.sadd.sat.v32i8
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_maskz_adds_epi8(__U,__A,__B); 
 }
 __m128i test_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_adds_epi16
-  // CHECK: @llvm.x86.sse2.padds.w
+  // CHECK: @llvm.sadd.sat.v8i16
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_adds_epi16(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_adds_epi16
-  // CHECK: @llvm.x86.sse2.padds.w
+  // CHECK: @llvm.sadd.sat.v8i16
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_adds_epi16(__U,__A,__B); 
 }
 __m256i test_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_adds_epi16
-  // CHECK: @llvm.x86.avx2.padds.w
+  // CHECK: @llvm.sadd.sat.v16i16
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_adds_epi16(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_adds_epi16
-  // CHECK: @llvm.x86.avx2.padds.w
+  // CHECK: @llvm.sadd.sat.v16i16
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_adds_epi16(__U,__A,__B); 
 }
@@ -1527,49 +1527,49 @@
 }
 __m128i test_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_subs_epi8
-  // CHECK: @llvm.x86.sse2.psubs.b
+  // CHECK: @llvm.ssub.sat.v16i8
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_subs_epi8(__W,__U,__A,__B); 
 }
 __m128i 

[PATCH] D55890: [X86][SSE] Auto upgrade PADDS/PSUBS intrinsics to SADD_SAT/SSUB_SAT generic intrinsics (clang)

2018-12-20 Thread Simon Pilgrim via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL349743: [X86][SSE] Auto upgrade PADDS/PSUBS intrinsics to 
SADD_SAT/SSUB_SAT generic… (authored by RKSimon, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D55890?vs=178946&id=179036#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D55890/new/

https://reviews.llvm.org/D55890

Files:
  cfe/trunk/lib/CodeGen/CGBuiltin.cpp
  cfe/trunk/test/CodeGen/avx2-builtins.c
  cfe/trunk/test/CodeGen/avx512bw-builtins.c
  cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
  cfe/trunk/test/CodeGen/sse2-builtins.c

Index: cfe/trunk/test/CodeGen/avx2-builtins.c
===
--- cfe/trunk/test/CodeGen/avx2-builtins.c
+++ cfe/trunk/test/CodeGen/avx2-builtins.c
@@ -56,13 +56,13 @@
 
 __m256i test_mm256_adds_epi8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_adds_epi8
-  // CHECK: call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
+  // CHECK: call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_adds_epi8(a, b);
 }
 
 __m256i test_mm256_adds_epi16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_adds_epi16
-  // CHECK: call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
+  // CHECK: call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_adds_epi16(a, b);
 }
 
@@ -1171,13 +1171,13 @@
 
 __m256i test_mm256_subs_epi8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_subs_epi8
-  // CHECK: call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
+  // CHECK: call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_subs_epi8(a, b);
 }
 
 __m256i test_mm256_subs_epi16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_subs_epi16
-  // CHECK: call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
+  // CHECK: call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_subs_epi16(a, b);
 }
 
Index: cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
===
--- cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
+++ cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
@@ -1075,49 +1075,49 @@
 
 __m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_adds_epi8
-  // CHECK: @llvm.x86.sse2.padds.b
+  // CHECK: @llvm.sadd.sat.v16i8
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_adds_epi8(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_adds_epi8
-  // CHECK: @llvm.x86.sse2.padds.b
+  // CHECK: @llvm.sadd.sat.v16i8
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_adds_epi8(__U,__A,__B); 
 }
 __m256i test_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_adds_epi8
-  // CHECK: @llvm.x86.avx2.padds.b
+  // CHECK: @llvm.sadd.sat.v32i8
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_adds_epi8(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_adds_epi8
-  // CHECK: @llvm.x86.avx2.padds.b
+  // CHECK: @llvm.sadd.sat.v32i8
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_maskz_adds_epi8(__U,__A,__B); 
 }
 __m128i test_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_adds_epi16
-  // CHECK: @llvm.x86.sse2.padds.w
+  // CHECK: @llvm.sadd.sat.v8i16
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_adds_epi16(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_adds_epi16
-  // CHECK: @llvm.x86.sse2.padds.w
+  // CHECK: @llvm.sadd.sat.v8i16
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_adds_epi16(__U,__A,__B); 
 }
 __m256i test_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_adds_epi16
-  // CHECK: @llvm.x86.avx2.padds.w
+  // CHECK: @llvm.sadd.sat.v16i16
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_adds_epi16(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_adds_epi16
-  // CHECK: @llvm.x86.avx2.padds.w
+  // CHECK: @llvm.sadd.sat.v16i16
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_adds_epi16(__U,__A

[PATCH] D55937: [X86] Auto upgrade XOP/AVX512 rotation intrinsics to generic funnel shift intrinsics (clang)

2018-12-20 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon created this revision.
RKSimon added reviewers: spatel, craig.topper.
Herald added a subscriber: llvm-commits.

This emits FSHL/FSHR generic intrinsics for the XOP VPROT and AVX512 
VPROL/VPROR rotation intrinsics.


Repository:
  rL LLVM

https://reviews.llvm.org/D55937

Files:
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGen/avx512f-builtins.c
  test/CodeGen/avx512vl-builtins.c
  test/CodeGen/xop-builtins.c

Index: test/CodeGen/xop-builtins.c
===
--- test/CodeGen/xop-builtins.c
+++ test/CodeGen/xop-builtins.c
@@ -194,49 +194,49 @@
 
 __m128i test_mm_rot_epi8(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_rot_epi8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_rot_epi8(a, b);
 }
 
 __m128i test_mm_rot_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_rot_epi16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_rot_epi16(a, b);
 }
 
 __m128i test_mm_rot_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_rot_epi32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   return _mm_rot_epi32(a, b);
 }
 
 __m128i test_mm_rot_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_rot_epi64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_rot_epi64(a, b);
 }
 
 __m128i test_mm_roti_epi8(__m128i a) {
   // CHECK-LABEL: test_mm_roti_epi8
-  // CHECK: call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %{{.*}}, i8 1)
+  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> )
   return _mm_roti_epi8(a, 1);
 }
 
 __m128i test_mm_roti_epi16(__m128i a) {
   // CHECK-LABEL: test_mm_roti_epi16
-  // CHECK: call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %{{.*}}, i8 50)
+  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> )
   return _mm_roti_epi16(a, 50);
 }
 
 __m128i test_mm_roti_epi32(__m128i a) {
   // CHECK-LABEL: test_mm_roti_epi32
-  // CHECK: call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %{{.*}}, i8 -30)
+  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> )
   return _mm_roti_epi32(a, -30);
 }
 
 __m128i test_mm_roti_epi64(__m128i a) {
   // CHECK-LABEL: test_mm_roti_epi64
-  // CHECK: call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %{{.*}}, i8 100)
+  // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> )
   return _mm_roti_epi64(a, 100);
 }
 
Index: test/CodeGen/avx512vl-builtins.c
===
--- test/CodeGen/avx512vl-builtins.c
+++ test/CodeGen/avx512vl-builtins.c
@@ -5779,240 +5779,240 @@
 
 __m128i test_mm_rol_epi32(__m128i __A) {
   // CHECK-LABEL: @test_mm_rol_epi32
-  // CHECK: @llvm.x86.avx512.prol.d.128
+  // CHECK: @llvm.fshl.v4i32
   return _mm_rol_epi32(__A, 5); 
 }
 
 __m128i test_mm_mask_rol_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_rol_epi32
-  // CHECK: @llvm.x86.avx512.prol.d.128
+  // CHECK: @llvm.fshl.v4i32
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_mask_rol_epi32(__W, __U, __A, 5); 
 }
 
 __m128i test_mm_maskz_rol_epi32(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_rol_epi32
-  // CHECK: @llvm.x86.avx512.prol.d.128
+  // CHECK: @llvm.fshl.v4i32
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm_maskz_rol_epi32(__U, __A, 5); 
 }
 
 __m256i test_mm256_rol_epi32(__m256i __A) {
   // CHECK-LABEL: @test_mm256_rol_epi32
-  // CHECK: @llvm.x86.avx512.prol.d.256
+  // CHECK: @llvm.fshl.v8i32
   return _mm256_rol_epi32(__A, 5); 
 }
 
 __m256i test_mm256_mask_rol_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_rol_epi32
-  // CHECK: @llvm.x86.avx512.prol.d.256
+  // CHECK: @llvm.fshl.v8i32
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_mask_rol_epi32(__W, __U, __A, 5); 
 }
 
 __m256i test_mm256_maskz_rol_epi32(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_rol_epi32
-  // CHECK: @llvm.x86.avx512.prol.d.256
+  // CHECK: @llvm.fshl.v8i32
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm256_maskz_rol_epi32(__U, __A, 5); 
 }
 
 __m128i test_mm_rol_epi64(__m128i __A) {
   // CHECK-LABEL: @test_mm_rol_epi64
-  // CHECK: @llvm.x86.avx512.prol.q.128
+  // CHECK: @ll

  1   2   3   4   5   6   7   >