[PATCH] D104790: [x86] fix mm_undefined intrinsics to use arbitrary frozen bit pattern

Juneyoung Lee via Phabricator via cfe-commits Sat, 26 Jun 2021 07:32:02 -0700

aqjune updated this revision to Diff 354678.
aqjune added a comment.

Minor fixes



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104790/new/

https://reviews.llvm.org/D104790

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/X86/avx-builtins.c
  clang/test/CodeGen/X86/avx2-builtins.c
  clang/test/CodeGen/X86/avx512f-builtins.c
  clang/test/CodeGen/X86/sse-builtins.c
  clang/test/CodeGen/X86/sse2-builtins.c
  llvm/include/llvm/CodeGen/SelectionDAGNodes.h
  llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
  llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

Index: llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
===================================================================
--- llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -6386,18 +6386,30 @@
 }
 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
 
+define <4 x float> @test_mm_undefined_ps() {
+; CHECK-LABEL: test_mm_undefined_ps:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %v = freeze <2 x double> poison
+  %w = bitcast <2 x double> %v to <4 x float>
+  ret <4 x float> %w
+}
+
 define <2 x double> @test_mm_undefined_pd() {
 ; CHECK-LABEL: test_mm_undefined_pd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  ret <2 x double> undef
+  %v = freeze <2 x double> poison
+  ret <2 x double> %v
 }
 
 define <2 x i64> @test_mm_undefined_si128() {
 ; CHECK-LABEL: test_mm_undefined_si128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  ret <2 x i64> undef
+  %v = freeze <2 x double> poison
+  %w = bitcast <2 x double> %v to <2 x i64>
+  ret <2 x i64> %w
 }
 
 define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
Index: llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
===================================================================
--- llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
+++ llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
@@ -2965,32 +2965,55 @@
 }
 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
 
-define <2 x double> @test_mm_undefined_pd() nounwind {
-; CHECK-LABEL: test_mm_undefined_pd:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    ret{{[l|q]}}
-  ret <2 x double> undef
-}
-
 define <4 x double> @test_mm256_undefined_pd() nounwind {
 ; CHECK-LABEL: test_mm256_undefined_pd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}}
-  ret <4 x double> undef
+  %v = freeze <4 x double> poison
+  ret <4 x double> %v
 }
 
 define <8 x float> @test_mm256_undefined_ps() nounwind {
 ; CHECK-LABEL: test_mm256_undefined_ps:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}}
-  ret <8 x float> undef
+  %v = freeze <4 x double> poison
+  %w = bitcast <4 x double> %v to <8 x float>
+  ret <8 x float> %w
 }
 
 define <4 x i64> @test_mm256_undefined_si256() nounwind {
 ; CHECK-LABEL: test_mm256_undefined_si256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}}
-  ret <4 x i64> undef
+  %v = freeze <4 x double> poison
+  %w = bitcast <4 x double> %v to <4 x i64>
+  ret <4 x i64> %w
+}
+
+define <16 x float> @test_mm512_undefined() nounwind {
+; CHECK-LABEL: test_mm512_undefined:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}}
+  %v = freeze <8 x double> poison
+  %w = bitcast <8 x double> %v to <16 x float>
+  ret <16 x float> %w
+}
+
+define <8 x double> @test_mm512_undefined_pd() nounwind {
+; CHECK-LABEL: test_mm512_undefined_pd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}}
+  %v = freeze <8 x double> poison
+  ret <8 x double> %v
+}
+
+define <8 x i64> @test_mm512_undefined_epi32() nounwind {
+; CHECK-LABEL: test_mm512_undefined_epi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}}
+  %v = freeze <8 x i64> poison
+  ret <8 x i64> %v
 }
 
 define <4 x double> @test_mm256_unpackhi_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26011,10 +26011,11 @@
                                         TLI.getPointerTy(DAG.getDataLayout()));
   EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger();
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other);
-  // If source is undef or we know it won't be used, use a zero vector
-  // to break register dependency.
+  // If source is undef, frozen undef with one use only, or we
+  // know it won't be used, use a zero vector to break register dependency.
   // TODO: use undef instead and let BreakFalseDeps deal with it?
-  if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
+  if (Src.isUndef() || (Src.isFreezeUndef() && Src.hasOneUse()) ||
+      ISD::isBuildVectorAllOnes(Mask.getNode()))
     Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
 
   // Cast mask to an integer type.
@@ -26052,10 +26053,12 @@
     Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other);
-  // If source is undef or we know it won't be used, use a zero vector
-  // to break register dependency.
+  // If source is undef, frozen undef with one use only, or we
+  // know it won't be used, use a zero vector to break register dependency.
   // TODO: use undef instead and let BreakFalseDeps deal with it?
-  if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
+  // TODO: use undef instead and let BreakFalseDeps deal with it?
+  if (Src.isUndef() || (Src.isFreezeUndef() && Src.hasOneUse()) ||
+      ISD::isBuildVectorAllOnes(Mask.getNode()))
     Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
 
   MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12497,6 +12497,10 @@
   if (N0.isUndef())
     return DAG.getUNDEF(VT);
 
+  // bitcast (freeze undef) -> freeze undef
+  if (N0.isFreezeUndef() && N0.hasOneUse())
+    return DAG.getFreeze(DAG.getUNDEF(VT));
+
   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
   // Only do this before legalize types, unless both types are integer and the
   // scalar type is legal. Only do this before legalize ops, since the target
Index: llvm/include/llvm/CodeGen/SelectionDAGNodes.h
===================================================================
--- llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -207,6 +207,7 @@
   inline bool isTargetOpcode() const;
   inline bool isMachineOpcode() const;
   inline bool isUndef() const;
+  inline bool isFreezeUndef() const;
   inline unsigned getMachineOpcode() const;
   inline const DebugLoc &getDebugLoc() const;
   inline void dump() const;
@@ -1150,6 +1151,10 @@
   return Node->isUndef();
 }
 
+inline bool SDValue::isFreezeUndef() const {
+  return Node->getOpcode() == ISD::FREEZE && Node->getOperand(0).isUndef();
+}
+
 inline bool SDValue::use_empty() const {
   return !Node->hasAnyUseOfValue(ResNo);
 }
Index: clang/test/CodeGen/X86/sse2-builtins.c
===================================================================
--- clang/test/CodeGen/X86/sse2-builtins.c
+++ clang/test/CodeGen/X86/sse2-builtins.c
@@ -1630,13 +1630,16 @@
 
 __m128d test_mm_undefined_pd() {
   // CHECK-LABEL: test_mm_undefined_pd
-  // CHECK: ret <2 x double> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: ret <2 x double> %[[FR]]
   return _mm_undefined_pd();
 }
 
 __m128i test_mm_undefined_si128() {
   // CHECK-LABEL: test_mm_undefined_si128
-  // CHECK: ret <2 x i64> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64>
+  // CHECK: ret <2 x i64> %[[FR_BC]]
   return _mm_undefined_si128();
 }
 
Index: clang/test/CodeGen/X86/sse-builtins.c
===================================================================
--- clang/test/CodeGen/X86/sse-builtins.c
+++ clang/test/CodeGen/X86/sse-builtins.c
@@ -786,7 +786,9 @@
 
 __m128 test_mm_undefined_ps() {
   // CHECK-LABEL: test_mm_undefined_ps
-  // CHECK: ret <4 x float> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
+  // CHECK: ret <4 x float> %[[FR_BC]]
   return _mm_undefined_ps();
 }
 
Index: clang/test/CodeGen/X86/avx512f-builtins.c
===================================================================
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3780,25 +3780,32 @@
 
 __m512 test_mm512_undefined() {
   // CHECK-LABEL: @test_mm512_undefined
-  // CHECK: ret <16 x float> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <16 x float>
+  // CHECK: ret <16 x float> %[[FR_BC]]
   return _mm512_undefined();
 }
 
 __m512 test_mm512_undefined_ps() {
   // CHECK-LABEL: @test_mm512_undefined_ps
-  // CHECK: ret <16 x float> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <16 x float>
+  // CHECK: ret <16 x float> %[[FR_BC]]
   return _mm512_undefined_ps();
 }
 
 __m512d test_mm512_undefined_pd() {
   // CHECK-LABEL: @test_mm512_undefined_pd
-  // CHECK: ret <8 x double> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+  // CHECK: ret <8 x double> %[[FR]]
   return _mm512_undefined_pd();
 }
 
 __m512i test_mm512_undefined_epi32() {
   // CHECK-LABEL: @test_mm512_undefined_epi32
-  // CHECK: ret <8 x i64> zeroinitializer
+  // CHECK: %[[FR:.*]] = freeze <8 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <8 x double> %[[FR]] to <8 x i64>
+  // CHECK: ret <8 x i64> %[[FR_BC]]
   return _mm512_undefined_epi32();
 }
 
Index: clang/test/CodeGen/X86/avx2-builtins.c
===================================================================
--- clang/test/CodeGen/X86/avx2-builtins.c
+++ clang/test/CodeGen/X86/avx2-builtins.c
@@ -455,7 +455,9 @@
 
 __m128i test_mm_i32gather_epi64(long long const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i32gather_epi64
-  // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64>
+  // CHECK: call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
   return _mm_i32gather_epi64(b, c, 2);
 }
 
@@ -467,7 +469,9 @@
 
 __m256i test_mm256_i32gather_epi64(long long const *b, __m128i c) {
   // CHECK-LABEL: test_mm256_i32gather_epi64
-  // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
+  // CHECK: %[[FR:.*]] = freeze <4 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <4 x i64>
+  // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
   return _mm256_i32gather_epi64(b, c, 2);
 }
 
@@ -479,10 +483,11 @@
 
 __m128d test_mm_i32gather_pd(double const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i32gather_pd
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
   // CHECK:         [[CMP:%.*]] = fcmp oeq <2 x double>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
-  // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %[[FR]], i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
   return _mm_i32gather_pd(b, c, 2);
 }
 
@@ -494,10 +499,11 @@
 
 __m256d test_mm256_i32gather_pd(double const *b, __m128i c) {
   // CHECK-LABEL: test_mm256_i32gather_pd
+  // CHECK: %[[FR:.*]] = freeze <4 x double> poison
   // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x double>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double>
-  // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
+  // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %[[FR]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
   return _mm256_i32gather_pd(b, c, 2);
 }
 
@@ -509,10 +515,12 @@
 
 __m128 test_mm_i32gather_ps(float const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i32gather_ps
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
   // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x float>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
-  // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+  // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %[[FR_BC]], i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
   return _mm_i32gather_ps(b, c, 2);
 }
 
@@ -524,10 +532,12 @@
 
 __m256 test_mm256_i32gather_ps(float const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i32gather_ps
+  // CHECK: %[[FR:.*]] = freeze <4 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <8 x float>
   // CHECK:         [[CMP:%.*]] = fcmp oeq <8 x float>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float>
-  // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
+  // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %[[FR_BC]], i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
   return _mm256_i32gather_ps(b, c, 2);
 }
 
@@ -563,7 +573,9 @@
 
 __m128i test_mm_i64gather_epi64(long long const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i64gather_epi64
-  // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <2 x i64>
+  // CHECK: call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %[[FR_BC]], i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 2)
   return _mm_i64gather_epi64(b, c, 2);
 }
 
@@ -575,7 +587,9 @@
 
 __m256i test_mm256_i64gather_epi64(long long const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i64gather_epi64
-  // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
+  // CHECK: %[[FR:.*]] = freeze <4 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <4 x double> %[[FR]] to <4 x i64>
+  // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %[[FR_BC]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
   return _mm256_i64gather_epi64(b, c, 2);
 }
 
@@ -587,10 +601,11 @@
 
 __m128d test_mm_i64gather_pd(double const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i64gather_pd
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
   // CHECK:         [[CMP:%.*]] = fcmp oeq <2 x double>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
-  // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %[[FR]], i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
   return _mm_i64gather_pd(b, c, 2);
 }
 
@@ -602,8 +617,9 @@
 
 __m256d test_mm256_i64gather_pd(double const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i64gather_pd
+  // CHECK: %[[FR:.*]] = freeze <4 x double> poison
   // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
-  // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
+  // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %[[FR]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
   return _mm256_i64gather_pd(b, c, 2);
 }
 
@@ -615,10 +631,12 @@
 
 __m128 test_mm_i64gather_ps(float const *b, __m128i c) {
   // CHECK-LABEL: test_mm_i64gather_ps
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
   // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x float>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
-  // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+  // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %[[FR_BC]], i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
   return _mm_i64gather_ps(b, c, 2);
 }
 
@@ -630,10 +648,12 @@
 
 __m128 test_mm256_i64gather_ps(float const *b, __m256i c) {
   // CHECK-LABEL: test_mm256_i64gather_ps
+  // CHECK: %[[FR:.*]] = freeze <2 x double> poison
+  // CHECK: %[[FR_BC:.*]] = bitcast <2 x double> %[[FR]] to <4 x float>
   // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x float>
   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
-  // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
+  // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %[[FR_BC]], i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
   return _mm256_i64gather_ps(b, c, 2);
 }
 
Index: clang/test/CodeGen/X86/avx-builtins.c
===================================================================
--- clang/test/CodeGen/X86/avx-builtins.c
+++ clang/test/CodeGen/X86/avx-builtins.c
@@ -2063,19 +2063,24 @@
 
 __m256 test_mm256_undefined_ps() {
   // CHECK-LABEL: test_mm256_undefined_ps
-  // CHECK: ret <8 x float> zeroinitializer
+  // CHECK: freeze <4 x double> poison
+  // CHECK: bitcast <4 x double> %{{.*}} to <8 x float>
+  // CHECK: ret <8 x float> %{{.*}}
   return _mm256_undefined_ps();
 }
 
 __m256d test_mm256_undefined_pd() {
   // CHECK-LABEL: test_mm256_undefined_pd
-  // CHECK: ret <4 x double> zeroinitializer
+  // CHECK: freeze <4 x double> poison
+  // CHECK: ret <4 x double> %{{.*}}
   return _mm256_undefined_pd();
 }
 
 __m256i test_mm256_undefined_si256() {
   // CHECK-LABEL: test_mm256_undefined_si256
-  // CHECK: ret <4 x i64> zeroinitializer
+  // CHECK: freeze <4 x double> poison
+  // CHECK: bitcast <4 x double> %{{.*}} to <4 x i64>
+  // CHECK: ret <4 x i64> %{{.*}}
   return _mm256_undefined_si256();
 }
 
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -12491,12 +12491,9 @@
   case X86::BI__builtin_ia32_undef128:
   case X86::BI__builtin_ia32_undef256:
   case X86::BI__builtin_ia32_undef512:
-    // The x86 definition of "undef" is not the same as the LLVM definition
-    // (PR32176). We leave optimizing away an unnecessary zero constant to the
-    // IR optimizer and backend.
-    // TODO: If we had a "freeze" IR instruction to generate a fixed undef
-    // value, we should use that here instead of a zero.
-    return llvm::Constant::getNullValue(ConvertType(E->getType()));
+    // The x86 definition of "undef" is equivalent to "freeze poison" in LLVM
+    // (PR32176).
+    return Builder.CreateFreeze(PoisonValue::get(ConvertType(E->getType())));
   case X86::BI__builtin_ia32_vec_init_v8qi:
   case X86::BI__builtin_ia32_vec_init_v4hi:
   case X86::BI__builtin_ia32_vec_init_v2si:

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D104790: [x86] fix mm*_undefined* intrinsics to use arbitrary frozen bit pattern

Reply via email to

[PATCH] D104790: [x86] fix mm_undefined intrinsics to use arbitrary frozen bit pattern