kpn updated this revision to Diff 318567.
kpn added a comment.

Update for review comments: Move uses of CGFPOptionsRAII lower and closer to 
where they are needed. This should be less error prone as well.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D94614/new/

https://reviews.llvm.org/D94614

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c
  clang/test/CodeGen/X86/avx512dq-builtins-constrained.c
  clang/test/CodeGen/X86/avx512f-builtins-constrained.c
  clang/test/CodeGen/X86/fma-builtins-constrained.c
  clang/test/CodeGen/X86/sse-builtins-constrained.c

Index: clang/test/CodeGen/X86/sse-builtins-constrained.c
===================================================================
--- clang/test/CodeGen/X86/sse-builtins-constrained.c
+++ clang/test/CodeGen/X86/sse-builtins-constrained.c
@@ -1,8 +1,15 @@
 // REQUIRES: x86-registered-target
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=strict -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=maytrap -DSTRICT=1 -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+
+#ifdef STRICT
+// Test that the constrained intrinsics are picking up the exception
+// metadata from the AST instead of the global default from the command line.
+
+#pragma float_control(except, on)
+#endif
 
 
 #include <immintrin.h>
Index: clang/test/CodeGen/X86/fma-builtins-constrained.c
===================================================================
--- clang/test/CodeGen/X86/fma-builtins-constrained.c
+++ clang/test/CodeGen/X86/fma-builtins-constrained.c
@@ -1,8 +1,15 @@
 // REQUIRES: x86-registered-target
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,UNCONSTRAINED
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -ffp-exception-behavior=strict -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,CONSTRAINED
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -ffp-exception-behavior=maytrap -DSTRICT=1 -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,CONSTRAINED
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -ffp-exception-behavior=strict -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM
+
+#ifdef STRICT
+// Test that the constrained intrinsics are picking up the exception
+// metadata from the AST instead of the global default from the command line.
+
+#pragma float_control(except, on)
+#endif
 
 #include <immintrin.h>
 
Index: clang/test/CodeGen/X86/avx512f-builtins-constrained.c
===================================================================
--- clang/test/CodeGen/X86/avx512f-builtins-constrained.c
+++ clang/test/CodeGen/X86/avx512f-builtins-constrained.c
@@ -1,10 +1,17 @@
 // REQUIRES: x86-registered-target
 // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s
 // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s
-// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s
-// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-compatibility -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s
+// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s
+// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-compatibility -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s
 // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
-// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
+// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
+
+#ifdef STRICT
+// Test that the constrained intrinsics are picking up the exception
+// metadata from the AST instead of the global default from the command line.
+
+#pragma float_control(except, on)
+#endif
 
 #include <immintrin.h>
 
Index: clang/test/CodeGen/X86/avx512dq-builtins-constrained.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/X86/avx512dq-builtins-constrained.c
@@ -0,0 +1,269 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON
+
+// FIXME: Every instance of "fpexcept.maytrap" is wrong.
+#ifdef STRICT
+// Test that the constrained intrinsics are picking up the exception
+// metadata from the AST instead of the global default from the command line.
+
+#pragma float_control(except, on)
+#endif
+
+
+#include <immintrin.h>
+
+__m512d test_mm512_cvtepi64_pd(__m512i __A) {
+  // COMMON-LABEL: test_mm512_cvtepi64_pd
+  // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double>
+  // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
+  // CHECK-ASM: vcvtqq2pd
+  return _mm512_cvtepi64_pd(__A);
+}
+
+__m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_mask_cvtepi64_pd
+  // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double>
+  // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  // CHECK-ASM: vcvtqq2pd
+  return _mm512_mask_cvtepi64_pd(__W, __U, __A);
+}
+
+__m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_maskz_cvtepi64_pd
+  // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double>
+  // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  // CHECK-ASM: vcvtqq2pd
+  return _mm512_maskz_cvtepi64_pd(__U, __A);
+}
+
+__m512d test_mm512_cvt_roundepi64_pd(__m512i __A) {
+  // COMMON-LABEL: test_mm512_cvt_roundepi64_pd
+  // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64
+  // CHECK-ASM: vcvtqq2pd
+  return _mm512_cvt_roundepi64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m512d test_mm512_mask_cvt_roundepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_mask_cvt_roundepi64_pd
+  // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  // CHECK-ASM: vcvtqq2pd
+  return _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m512d test_mm512_maskz_cvt_roundepi64_pd(__mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_maskz_cvt_roundepi64_pd
+  // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  // CHECK-ASM: vcvtqq2pd
+  return _mm512_maskz_cvt_roundepi64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm512_cvtepi64_ps(__m512i __A) {
+  // COMMON-LABEL: test_mm512_cvtepi64_ps
+  // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float>
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK-ASM: vcvtqq2ps
+  return _mm512_cvtepi64_ps(__A);
+}
+
+__m256 test_mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_mask_cvtepi64_ps
+  // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float>
+  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // CHECK-ASM: vcvtqq2ps
+  return _mm512_mask_cvtepi64_ps(__W, __U, __A);
+}
+
+__m256 test_mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_maskz_cvtepi64_ps
+  // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float>
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // CHECK-ASM: vcvtqq2ps
+  return _mm512_maskz_cvtepi64_ps(__U, __A);
+}
+
+__m256 test_mm512_cvt_roundepi64_ps(__m512i __A) {
+  // COMMON-LABEL: test_mm512_cvt_roundepi64_ps
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64
+  // CHECK-ASM: vcvtqq2ps
+  return _mm512_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm512_mask_cvt_roundepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_mask_cvt_roundepi64_ps
+  // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // CHECK-ASM: vcvtqq2ps
+  return _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm512_maskz_cvt_roundepi64_ps(__mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_maskz_cvt_roundepi64_ps
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // CHECK-ASM: vcvtqq2ps
+  return _mm512_maskz_cvt_roundepi64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m512d test_mm512_cvtepu64_pd(__m512i __A) {
+  // COMMON-LABEL: test_mm512_cvtepu64_pd
+  // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double>
+  // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
+  // CHECK-ASM: vcvtuqq2pd
+  return _mm512_cvtepu64_pd(__A);
+}
+
+__m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_mask_cvtepu64_pd
+  // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double>
+  // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  // CHECK-ASM: vcvtuqq2pd
+  return _mm512_mask_cvtepu64_pd(__W, __U, __A);
+}
+
+__m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_maskz_cvtepu64_pd
+  // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double>
+  // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  // CHECK-ASM: vcvtuqq2pd
+  return _mm512_maskz_cvtepu64_pd(__U, __A);
+}
+
+__m512d test_mm512_cvt_roundepu64_pd(__m512i __A) {
+  // COMMON-LABEL: test_mm512_cvt_roundepu64_pd
+  // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64
+  // CHECK-ASM: vcvtuqq2pd
+  return _mm512_cvt_roundepu64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m512d test_mm512_mask_cvt_roundepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_mask_cvt_roundepu64_pd
+  // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  // CHECK-ASM: vcvtuqq2pd
+  return _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m512d test_mm512_maskz_cvt_roundepu64_pd(__mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_maskz_cvt_roundepu64_pd
+  // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  // CHECK-ASM: vcvtuqq2pd
+  return _mm512_maskz_cvt_roundepu64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm512_cvtepu64_ps(__m512i __A) {
+  // COMMON-LABEL: test_mm512_cvtepu64_ps
+  // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float>
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK-ASM: vcvtuqq2ps
+  return _mm512_cvtepu64_ps(__A);
+}
+
+__m256 test_mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_mask_cvtepu64_ps
+  // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float>
+  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // CHECK-ASM: vcvtuqq2ps
+  return _mm512_mask_cvtepu64_ps(__W, __U, __A);
+}
+
+__m256 test_mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_maskz_cvtepu64_ps
+  // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float>
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // CHECK-ASM: vcvtuqq2ps
+  return _mm512_maskz_cvtepu64_ps(__U, __A);
+}
+
+__m256 test_mm512_cvt_roundepu64_ps(__m512i __A) {
+  // COMMON-LABEL: test_mm512_cvt_roundepu64_ps
+  // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64
+  // CHECK-ASM: vcvtuqq2ps
+  return _mm512_cvt_roundepu64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm512_mask_cvt_roundepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_mask_cvt_roundepu64_ps
+  // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // CHECK-ASM: vcvtuqq2ps
+  return _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
+__m256 test_mm512_maskz_cvt_roundepu64_ps(__mmask8 __U, __m512i __A) {
+  // COMMON-LABEL: test_mm512_maskz_cvt_roundepu64_ps
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64
+  // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // CHECK-ASM: vcvtuqq2ps
+  return _mm512_maskz_cvt_roundepu64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
Index: clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c
===================================================================
--- clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c
+++ clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c
@@ -1,4 +1,9 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=maytrap -o - -Wall -Werror | FileCheck %s
+
+// Test that the constrained intrinsics are picking up the exception
+// metadata from the AST instead of the global default from the command line.
+
+#pragma float_control(except, on)
 
 
 #include <immintrin.h>
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -11544,7 +11544,7 @@
   return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
 }
 
-static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
+static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,
                                     ArrayRef<Value *> Ops, bool IsSigned) {
   unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
   llvm::Type *Ty = Ops[1]->getType();
@@ -11556,6 +11556,7 @@
     Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
     Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
   } else {
+    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
     Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
                    : CGF.Builder.CreateUIToFP(Ops[0], Ty);
   }
@@ -11564,8 +11565,9 @@
 }
 
 // Lowers X86 FMA intrinsics to IR.
-static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
-                             unsigned BuiltinID, bool IsAddSub) {
+static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
+                             ArrayRef<Value *> Ops, unsigned BuiltinID,
+                             bool IsAddSub) {
 
   bool Subtract = false;
   Intrinsic::ID IID = Intrinsic::not_intrinsic;
@@ -11622,6 +11624,7 @@
     llvm::Type *Ty = A->getType();
     Function *FMA;
     if (CGF.Builder.getIsFPConstrained()) {
+      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
       FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
       Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
     } else {
@@ -11663,10 +11666,10 @@
   return Res;
 }
 
-static Value *
-EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops,
-                  Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
-                  bool NegAcc = false) {
+static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
+                                MutableArrayRef<Value *> Ops, Value *Upper,
+                                bool ZeroMask = false, unsigned PTIdx = 0,
+                                bool NegAcc = false) {
   unsigned Rnd = 4;
   if (Ops.size() > 4)
     Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
@@ -11685,6 +11688,7 @@
     Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
                                  {Ops[0], Ops[1], Ops[2], Ops[4]});
   } else if (CGF.Builder.getIsFPConstrained()) {
+    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
     Function *FMA = CGF.CGM.getIntrinsic(
         Intrinsic::experimental_constrained_fma, Ops[0]->getType());
     Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
@@ -12022,8 +12026,9 @@
   // TODO: The builtins could be removed if the SSE header files used vector
   // extension comparisons directly (vector ordered/unordered may need
   // additional support via __builtin_isnan()).
-  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred,
-                                      bool IsSignaling) {
+  auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
+                                         bool IsSignaling) {
+    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
     Value *Cmp;
     if (IsSignaling)
       Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
@@ -12265,31 +12270,31 @@
   case X86::BI__builtin_ia32_cvtdq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2pd512_mask:
-    return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true);
+    return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
   case X86::BI__builtin_ia32_cvtudq2ps512_mask:
   case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
   case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
-    return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false);
+    return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
 
   case X86::BI__builtin_ia32_vfmaddss3:
   case X86::BI__builtin_ia32_vfmaddsd3:
   case X86::BI__builtin_ia32_vfmaddss3_mask:
   case X86::BI__builtin_ia32_vfmaddsd3_mask:
-    return EmitScalarFMAExpr(*this, Ops, Ops[0]);
+    return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
   case X86::BI__builtin_ia32_vfmaddss:
   case X86::BI__builtin_ia32_vfmaddsd:
-    return EmitScalarFMAExpr(*this, Ops,
+    return EmitScalarFMAExpr(*this, E, Ops,
                              Constant::getNullValue(Ops[0]->getType()));
   case X86::BI__builtin_ia32_vfmaddss3_maskz:
   case X86::BI__builtin_ia32_vfmaddsd3_maskz:
-    return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
+    return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
   case X86::BI__builtin_ia32_vfmaddss3_mask3:
   case X86::BI__builtin_ia32_vfmaddsd3_mask3:
-    return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
+    return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
   case X86::BI__builtin_ia32_vfmsubss3_mask3:
   case X86::BI__builtin_ia32_vfmsubsd3_mask3:
-    return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
-                             /*NegAcc*/true);
+    return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
+                             /*NegAcc*/ true);
   case X86::BI__builtin_ia32_vfmaddps:
   case X86::BI__builtin_ia32_vfmaddpd:
   case X86::BI__builtin_ia32_vfmaddps256:
@@ -12302,7 +12307,7 @@
   case X86::BI__builtin_ia32_vfmaddpd512_maskz:
   case X86::BI__builtin_ia32_vfmaddpd512_mask3:
   case X86::BI__builtin_ia32_vfmsubpd512_mask3:
-    return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
+    return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
   case X86::BI__builtin_ia32_vfmaddsubps512_mask:
   case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
   case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
@@ -12311,7 +12316,7 @@
   case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
   case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
   case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
-    return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);
+    return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
 
   case X86::BI__builtin_ia32_movdqa32store128_mask:
   case X86::BI__builtin_ia32_movdqa64store128_mask:
@@ -13457,6 +13462,7 @@
     Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
     Function *F;
     if (Builder.getIsFPConstrained()) {
+      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
       F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
                            A->getType());
       A = Builder.CreateConstrainedFPCall(F, {A});
@@ -13480,6 +13486,7 @@
     Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
     Function *F;
     if (Builder.getIsFPConstrained()) {
+      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
       F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
                            A->getType());
       A = Builder.CreateConstrainedFPCall(F, A);
@@ -13509,6 +13516,7 @@
       }
     }
     if (Builder.getIsFPConstrained()) {
+      CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
       Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
                                      Ops[0]->getType());
       return Builder.CreateConstrainedFPCall(F, Ops[0]);
@@ -14053,6 +14061,8 @@
     if (IsMaskFCmp) {
       // We ignore SAE if strict FP is disabled. We only keep precise
       // exception behavior under strict FP.
+      // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
+      // object will be required.
       unsigned NumElts =
           cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
       Value *Cmp;
@@ -14105,8 +14115,10 @@
   case X86::BI__builtin_ia32_vcvtph2ps256:
   case X86::BI__builtin_ia32_vcvtph2ps_mask:
   case X86::BI__builtin_ia32_vcvtph2ps256_mask:
-  case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
     return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
+  }
 
 // AVX512 bf16 intrinsics
   case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to