kpn created this revision. kpn added reviewers: craig.topper, andrew.w.kaylor. kpn added a project: clang. Herald added a subscriber: pengfei. kpn requested review of this revision. Herald added a subscriber: cfe-commits.
Currently clang is not correctly retrieving from the AST the metadata for constrained FP builtins. This patch fixes that for the X86 specific builtins. For previous work in this vein see D92122 <https://reviews.llvm.org/D92122> for example. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D94614 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c clang/test/CodeGen/X86/avx512dq-builtins-constrained.c clang/test/CodeGen/X86/avx512f-builtins-constrained.c clang/test/CodeGen/X86/fma-builtins-constrained.c clang/test/CodeGen/X86/sse-builtins-constrained.c
Index: clang/test/CodeGen/X86/sse-builtins-constrained.c =================================================================== --- clang/test/CodeGen/X86/sse-builtins-constrained.c +++ clang/test/CodeGen/X86/sse-builtins-constrained.c @@ -1,8 +1,15 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=strict -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +sse -ffp-exception-behavior=maytrap -DSTRICT=1 -S %s -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON + +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif #include <immintrin.h> Index: clang/test/CodeGen/X86/fma-builtins-constrained.c =================================================================== --- clang/test/CodeGen/X86/fma-builtins-constrained.c +++ clang/test/CodeGen/X86/fma-builtins-constrained.c @@ -1,8 +1,15 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,UNCONSTRAINED -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -ffp-exception-behavior=strict -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,CONSTRAINED +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -ffp-exception-behavior=maytrap -DSTRICT=1 -O2 -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,COMMONIR,CONSTRAINED // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -ffp-exception-behavior=strict -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +fma -O2 -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - | FileCheck %s --check-prefixes=COMMON,CHECK-ASM + +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif #include <immintrin.h> Index: clang/test/CodeGen/X86/avx512f-builtins-constrained.c =================================================================== --- clang/test/CodeGen/X86/avx512f-builtins-constrained.c +++ clang/test/CodeGen/X86/avx512f-builtins-constrained.c @@ -1,10 +1,17 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s -// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s -// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-compatibility -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s +// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s +// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -fms-compatibility -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s // RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s -// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=strict -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s +// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux-gnu -target-feature +avx512f -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s + +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif #include <immintrin.h> Index: clang/test/CodeGen/X86/avx512dq-builtins-constrained.c =================================================================== --- /dev/null +++ clang/test/CodeGen/X86/avx512dq-builtins-constrained.c @@ -0,0 +1,269 @@ +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=UNCONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -ffp-exception-behavior=maytrap -DSTRICT=1 -emit-llvm -o - -Wall -Werror | tee /tmp/X | FileCheck %s --check-prefix=CONSTRAINED --check-prefix=COMMON --check-prefix=COMMONIR +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON + +// FIXME: Every instance of "fpexcept.maytrap" is wrong. +#ifdef STRICT +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) +#endif + + +#include <immintrin.h> + +__m512d test_mm512_cvtepi64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepi64_pd + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CHECK-ASM: vcvtqq2pd + return _mm512_cvtepi64_pd(__A); +} + +__m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepi64_pd + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_mask_cvtepi64_pd(__W, __U, __A); +} + +__m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepi64_pd + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_maskz_cvtepi64_pd(__U, __A); +} + +__m512d test_mm512_cvt_roundepi64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepi64_pd + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64 + // CHECK-ASM: vcvtqq2pd + return _mm512_cvt_roundepi64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_mask_cvt_roundepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepi64_pd + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_maskz_cvt_roundepi64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepi64_pd + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtqq2pd + return _mm512_maskz_cvt_roundepi64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_cvtepi64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepi64_ps + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CHECK-ASM: vcvtqq2ps + return _mm512_cvtepi64_ps(__A); +} + +__m256 test_mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepi64_ps + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_mask_cvtepi64_ps(__W, __U, __A); +} + +__m256 test_mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepi64_ps + // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_maskz_cvtepi64_ps(__U, __A); +} + +__m256 test_mm512_cvt_roundepi64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepi64_ps + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64 + // CHECK-ASM: vcvtqq2ps + return _mm512_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_mask_cvt_roundepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepi64_ps + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_maskz_cvt_roundepi64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepi64_ps + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtqq2ps + return _mm512_maskz_cvt_roundepi64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_cvtepu64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepu64_pd + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CHECK-ASM: vcvtuqq2pd + return _mm512_cvtepu64_pd(__A); +} + +__m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepu64_pd + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_mask_cvtepu64_pd(__W, __U, __A); +} + +__m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepu64_pd + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_maskz_cvtepu64_pd(__U, __A); +} + +__m512d test_mm512_cvt_roundepu64_pd(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepu64_pd + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64 + // CHECK-ASM: vcvtuqq2pd + return _mm512_cvt_roundepu64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_mask_cvt_roundepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepu64_pd + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m512d test_mm512_maskz_cvt_roundepu64_pd(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepu64_pd + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f64.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + // CHECK-ASM: vcvtuqq2pd + return _mm512_maskz_cvt_roundepu64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_cvtepu64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvtepu64_ps + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CHECK-ASM: vcvtuqq2ps + return _mm512_cvtepu64_ps(__A); +} + +__m256 test_mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvtepu64_ps + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_mask_cvtepu64_ps(__W, __U, __A); +} + +__m256 test_mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvtepu64_ps + // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float> + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_maskz_cvtepu64_ps(__U, __A); +} + +__m256 test_mm512_cvt_roundepu64_ps(__m512i __A) { + // COMMON-LABEL: test_mm512_cvt_roundepu64_ps + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64 + // CHECK-ASM: vcvtuqq2ps + return _mm512_cvt_roundepu64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_mask_cvt_roundepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_mask_cvt_roundepu64_ps + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + +__m256 test_mm512_maskz_cvt_roundepu64_ps(__mmask8 __U, __m512i __A) { + // COMMON-LABEL: test_mm512_maskz_cvt_roundepu64_ps + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict") + // COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64 + // COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // CHECK-ASM: vcvtuqq2ps + return _mm512_maskz_cvt_roundepu64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +} + Index: clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c =================================================================== --- clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c +++ clang/test/CodeGen/X86/avx-builtins-constrained-cmp.c @@ -1,4 +1,9 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=maytrap -o - -Wall -Werror | FileCheck %s + +// Test that the constrained intrinsics are picking up the exception +// metadata from the AST instead of the global default from the command line. + +#pragma float_control(except, on) #include <immintrin.h> Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -12264,32 +12264,46 @@ case X86::BI__builtin_ia32_cvtdq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2ps512_mask: - case X86::BI__builtin_ia32_cvtqq2pd512_mask: + case X86::BI__builtin_ia32_cvtqq2pd512_mask: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true); + } case X86::BI__builtin_ia32_cvtudq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2ps512_mask: - case X86::BI__builtin_ia32_cvtuqq2pd512_mask: + case X86::BI__builtin_ia32_cvtuqq2pd512_mask: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false); + } case X86::BI__builtin_ia32_vfmaddss3: case X86::BI__builtin_ia32_vfmaddsd3: case X86::BI__builtin_ia32_vfmaddss3_mask: - case X86::BI__builtin_ia32_vfmaddsd3_mask: + case X86::BI__builtin_ia32_vfmaddsd3_mask: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitScalarFMAExpr(*this, Ops, Ops[0]); + } case X86::BI__builtin_ia32_vfmaddss: - case X86::BI__builtin_ia32_vfmaddsd: + case X86::BI__builtin_ia32_vfmaddsd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitScalarFMAExpr(*this, Ops, Constant::getNullValue(Ops[0]->getType())); + } case X86::BI__builtin_ia32_vfmaddss3_maskz: - case X86::BI__builtin_ia32_vfmaddsd3_maskz: + case X86::BI__builtin_ia32_vfmaddsd3_maskz: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true); + } case X86::BI__builtin_ia32_vfmaddss3_mask3: - case X86::BI__builtin_ia32_vfmaddsd3_mask3: + case X86::BI__builtin_ia32_vfmaddsd3_mask3: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2); + } case X86::BI__builtin_ia32_vfmsubss3_mask3: - case X86::BI__builtin_ia32_vfmsubsd3_mask3: + case X86::BI__builtin_ia32_vfmsubsd3_mask3: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2, /*NegAcc*/true); + } case X86::BI__builtin_ia32_vfmaddps: case X86::BI__builtin_ia32_vfmaddpd: case X86::BI__builtin_ia32_vfmaddps256: @@ -12301,8 +12315,10 @@ case X86::BI__builtin_ia32_vfmaddpd512_mask: case X86::BI__builtin_ia32_vfmaddpd512_maskz: case X86::BI__builtin_ia32_vfmaddpd512_mask3: - case X86::BI__builtin_ia32_vfmsubpd512_mask3: + case X86::BI__builtin_ia32_vfmsubpd512_mask3: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false); + } case X86::BI__builtin_ia32_vfmaddsubps512_mask: case X86::BI__builtin_ia32_vfmaddsubps512_maskz: case X86::BI__builtin_ia32_vfmaddsubps512_mask3: @@ -12310,8 +12326,10 @@ case X86::BI__builtin_ia32_vfmaddsubpd512_mask: case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: - case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: + case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true); + } case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: @@ -13457,6 +13475,7 @@ Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); Function *F; if (Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, A->getType()); A = Builder.CreateConstrainedFPCall(F, {A}); @@ -13480,6 +13499,7 @@ Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); Function *F; if (Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, A->getType()); A = Builder.CreateConstrainedFPCall(F, A); @@ -13509,6 +13529,7 @@ } } if (Builder.getIsFPConstrained()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, Ops[0]->getType()); return Builder.CreateConstrainedFPCall(F, Ops[0]); @@ -13917,29 +13938,45 @@ // packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: - case X86::BI__builtin_ia32_cmpeqpd: + case X86::BI__builtin_ia32_cmpeqpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false); + } case X86::BI__builtin_ia32_cmpltps: - case X86::BI__builtin_ia32_cmpltpd: + case X86::BI__builtin_ia32_cmpltpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true); + } case X86::BI__builtin_ia32_cmpleps: - case X86::BI__builtin_ia32_cmplepd: + case X86::BI__builtin_ia32_cmplepd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true); + } case X86::BI__builtin_ia32_cmpunordps: - case X86::BI__builtin_ia32_cmpunordpd: + case X86::BI__builtin_ia32_cmpunordpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false); + } case X86::BI__builtin_ia32_cmpneqps: - case X86::BI__builtin_ia32_cmpneqpd: + case X86::BI__builtin_ia32_cmpneqpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false); + } case X86::BI__builtin_ia32_cmpnltps: - case X86::BI__builtin_ia32_cmpnltpd: + case X86::BI__builtin_ia32_cmpnltpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true); + } case X86::BI__builtin_ia32_cmpnleps: - case X86::BI__builtin_ia32_cmpnlepd: + case X86::BI__builtin_ia32_cmpnlepd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true); + } case X86::BI__builtin_ia32_cmpordps: - case X86::BI__builtin_ia32_cmpordpd: + case X86::BI__builtin_ia32_cmpordpd: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false); + } case X86::BI__builtin_ia32_cmpps128_mask: case X86::BI__builtin_ia32_cmpps256_mask: case X86::BI__builtin_ia32_cmpps512_mask: @@ -14053,6 +14090,8 @@ if (IsMaskFCmp) { // We ignore SAE if strict FP is disabled. We only keep precise // exception behavior under strict FP. + // NOTE: If strict FP does ever go through here a CGFPOptionsRAII + // object will be required. unsigned NumElts = cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(); Value *Cmp; @@ -14063,6 +14102,7 @@ return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]); } + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return getVectorFCmpIR(Pred, IsSignaling); } @@ -14105,8 +14145,10 @@ case X86::BI__builtin_ia32_vcvtph2ps256: case X86::BI__builtin_ia32_vcvtph2ps_mask: case X86::BI__builtin_ia32_vcvtph2ps256_mask: - case X86::BI__builtin_ia32_vcvtph2ps512_mask: + case X86::BI__builtin_ia32_vcvtph2ps512_mask: { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType())); + } // AVX512 bf16 intrinsics case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits