leonardchan created this revision. leonardchan added reviewers: chandlerc, echristo, phosek, serge-sans-paille. leonardchan added a project: clang. Herald added subscribers: dmgreen, Anastasia.
With the new pass manager enabled by default, some tests produce slightly different IR from the legacy PM. This patch just adds separate new PM runs and checks for the new PM IR. This fixes: Clang :: CodeGen/avx512-reduceMinMaxIntrin.c Clang :: CodeGen/avx512f-builtins.c Clang :: CodeGen/avx512vl-builtins.c Clang :: CodeGen/avx512vlbw-builtins.c Clang :: CodeGenOpenCL/convergent.cl For `CodeGenOpenCL/convergent.cl`, the new PM produced a slightly different for loop, but this still checks for no loop unrolling as intended. For the avx-builtins tests, the new PM would produce extra bitcasts that did not affect the overall logic of the function, but I do not know a simple way to get rid of these bitcasts. For `CodeGen/avx512-reduceMinMaxIntrin.c`, temporary local variables are produced in the codegen, but the allocas are out of order and the tests were pretty strict on order even though the logic/use of these variables was the same. Nearly all checks in this large file were broken, so I was hoping we could run it with legacy only for now and come back to it later to make this easier. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D63174 Files: clang/test/CodeGen/avx512-reduceMinMaxIntrin.c clang/test/CodeGen/avx512f-builtins.c clang/test/CodeGen/avx512vl-builtins.c clang/test/CodeGen/avx512vlbw-builtins.c clang/test/CodeGenOpenCL/convergent.cl
Index: clang/test/CodeGenOpenCL/convergent.cl =================================================================== --- clang/test/CodeGenOpenCL/convergent.cl +++ clang/test/CodeGenOpenCL/convergent.cl @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fno-experimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-LEGACY +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - -fexperimental-new-pass-manager | opt -instnamer -S | FileCheck -enable-var-scope %s --check-prefixes=CHECK,CHECK-NEWPM // This is initially assumed convergent, but can be deduced to not require it. @@ -117,7 +118,12 @@ // CHECK: [[for_body]]: // CHECK: tail call spir_func void @nodupfun() #[[attr5:[0-9]+]] // CHECK-NOT: call spir_func void @nodupfun() -// CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] + +// The new PM produces a slightly different IR for the loop from the legacy PM, +// but the test still checks that the loop is not unrolled. +// CHECK-LEGACY: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] +// CHECK-NEW: br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]] +// CHECK-NEW: [[for_body_crit_edge]]: void test_not_unroll() { for (int i = 0; i < 10; i++) Index: clang/test/CodeGen/avx512vlbw-builtins.c =================================================================== --- clang/test/CodeGen/avx512vlbw-builtins.c +++ clang/test/CodeGen/avx512vlbw-builtins.c @@ -1,6 +1,10 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include <immintrin.h> @@ -901,6 +905,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} return _mm_mask_abs_epi8(__W,__U,__A); } @@ -910,6 +916,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[A]], <16 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i8> [[SEL]], <16 x i8> %{{.*}} return _mm_maskz_abs_epi8(__U,__A); } @@ -919,6 +927,8 @@ // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} return _mm256_mask_abs_epi8(__W,__U,__A); } @@ -928,6 +938,8 @@ // CHECK: [[SUB:%.*]] = sub <32 x i8> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[A]], <32 x i8> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> %{{.*}}, <32 x i8> [[SEL]], <32 x i8> %{{.*}} return _mm256_maskz_abs_epi8(__U,__A); } @@ -937,6 +949,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} return _mm_mask_abs_epi16(__W,__U,__A); } @@ -946,6 +960,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[A]], <8 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> %{{.*}}, <8 x i16> [[SEL]], <8 x i16> %{{.*}} return _mm_maskz_abs_epi16(__U,__A); } @@ -955,6 +971,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} return _mm256_mask_abs_epi16(__W,__U,__A); } @@ -964,6 +982,8 @@ // CHECK: [[SUB:%.*]] = sub <16 x i16> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[A]], <16 x i16> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[SEL]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[SEL:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> %{{.*}}, <16 x i16> [[SEL]], <16 x i16> %{{.*}} return _mm256_maskz_abs_epi16(__U,__A); } @@ -1229,6 +1249,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_max_epi8(__M,__A,__B); } @@ -1236,6 +1258,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_max_epi8(__W,__M,__A,__B); } @@ -1243,6 +1267,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_max_epi8(__M,__A,__B); } @@ -1250,6 +1276,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi8 // CHECK: [[CMP:%.*]] = icmp sgt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_max_epi8(__W,__M,__A,__B); } @@ -1257,6 +1285,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_max_epi16(__M,__A,__B); } @@ -1264,6 +1294,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_max_epi16(__W,__M,__A,__B); } @@ -1271,6 +1303,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_max_epi16(__M,__A,__B); } @@ -1278,6 +1312,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi16 // CHECK: [[CMP:%.*]] = icmp sgt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_max_epi16(__W,__M,__A,__B); } @@ -1285,6 +1321,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_max_epu8(__M,__A,__B); } @@ -1292,6 +1330,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_max_epu8(__W,__M,__A,__B); } @@ -1299,6 +1339,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_max_epu8(__M,__A,__B); } @@ -1306,6 +1348,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu8 // CHECK: [[CMP:%.*]] = icmp ugt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_max_epu8(__W,__M,__A,__B); } @@ -1313,6 +1357,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_max_epu16(__M,__A,__B); } @@ -1320,6 +1366,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_max_epu16(__W,__M,__A,__B); } @@ -1327,6 +1375,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_max_epu16(__M,__A,__B); } @@ -1334,6 +1384,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu16 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_max_epu16(__W,__M,__A,__B); } @@ -1341,6 +1393,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_min_epi8(__M,__A,__B); } @@ -1348,6 +1402,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_min_epi8(__W,__M,__A,__B); } @@ -1355,6 +1411,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_min_epi8(__M,__A,__B); } @@ -1362,6 +1420,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi8 // CHECK: [[CMP:%.*]] = icmp slt <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_min_epi8(__W,__M,__A,__B); } @@ -1369,6 +1429,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_min_epi16(__M,__A,__B); } @@ -1376,6 +1438,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_min_epi16(__W,__M,__A,__B); } @@ -1383,6 +1447,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_min_epi16(__M,__A,__B); } @@ -1390,6 +1456,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi16 // CHECK: [[CMP:%.*]] = icmp slt <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_min_epi16(__W,__M,__A,__B); } @@ -1397,6 +1465,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_maskz_min_epu8(__M,__A,__B); } @@ -1404,6 +1474,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i8> [[RES]], <16 x i8> {{.*}} return _mm_mask_min_epu8(__W,__M,__A,__B); } @@ -1411,6 +1483,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_maskz_min_epu8(__M,__A,__B); } @@ -1418,6 +1492,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu8 // CHECK: [[CMP:%.*]] = icmp ult <32 x i8> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <32 x i1> [[CMP]], <32 x i8> [[X]], <32 x i8> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <32 x i1> {{.*}}, <32 x i8> [[RES]], <32 x i8> {{.*}} return _mm256_mask_min_epu8(__W,__M,__A,__B); } @@ -1425,6 +1501,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_maskz_min_epu16(__M,__A,__B); } @@ -1432,6 +1510,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <8 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <8 x i1> {{.*}}, <8 x i16> [[RES]], <8 x i16> {{.*}} return _mm_mask_min_epu16(__W,__M,__A,__B); } @@ -1439,6 +1519,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_maskz_min_epu16(__M,__A,__B); } @@ -1446,6 +1528,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu16 // CHECK: [[CMP:%.*]] = icmp ult <16 x i16> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[X]], <16 x i16> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast [[SRCTY:<.*>]] [[RES]] to [[DSTTY:<.*>]] + // CHECK-NEWPM: [[RES:%.*]] = bitcast [[DSTTY]] [[TMP]] to [[SRCTY]] // CHECK: select <16 x i1> {{.*}}, <16 x i16> [[RES]], <16 x i16> {{.*}} return _mm256_mask_min_epu16(__W,__M,__A,__B); } Index: clang/test/CodeGen/avx512vl-builtins.c =================================================================== --- clang/test/CodeGen/avx512vl-builtins.c +++ clang/test/CodeGen/avx512vl-builtins.c @@ -1,5 +1,8 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -fno-experimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include <immintrin.h> @@ -4589,6 +4592,8 @@ // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} return _mm_mask_abs_epi32(__W,__U,__A); } @@ -4597,6 +4602,8 @@ // CHECK: [[SUB:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[A]], <4 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[SEL]] to <2 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> [[SEL]], <4 x i32> %{{.*}} return _mm_maskz_abs_epi32(__U,__A); } @@ -4605,6 +4612,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} return _mm256_mask_abs_epi32(__W,__U,__A); } @@ -4613,6 +4622,8 @@ // CHECK: [[SUB:%.*]] = sub <8 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[A]], <8 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[SEL]] to <4 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> [[SEL]], <8 x i32> %{{.*}} return _mm256_maskz_abs_epi32(__U,__A); } @@ -4668,6 +4679,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_max_epi32(__M,__A,__B); } @@ -4675,6 +4688,8 @@ // CHECK-LABEL: @test_mm_mask_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_max_epi32(__W,__M,__A,__B); } @@ -4682,6 +4697,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_max_epi32(__M,__A,__B); } @@ -4689,6 +4706,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epi32 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_max_epi32(__W,__M,__A,__B); } @@ -4736,6 +4755,8 @@ // CHECK-LABEL: @test_mm_maskz_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_max_epu32(__M,__A,__B); } @@ -4743,6 +4764,8 @@ // CHECK-LABEL: @test_mm_mask_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_max_epu32(__W,__M,__A,__B); } @@ -4750,6 +4773,8 @@ // CHECK-LABEL: @test_mm256_maskz_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_max_epu32(__M,__A,__B); } @@ -4757,6 +4782,8 @@ // CHECK-LABEL: @test_mm256_mask_max_epu32 // CHECK: [[CMP:%.*]] = icmp ugt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_max_epu32(__W,__M,__A,__B); } @@ -4804,6 +4831,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_min_epi32(__M,__A,__B); } @@ -4811,6 +4840,8 @@ // CHECK-LABEL: @test_mm_mask_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_min_epi32(__W,__M,__A,__B); } @@ -4818,6 +4849,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_min_epi32(__M,__A,__B); } @@ -4825,6 +4858,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epi32 // CHECK: [[CMP:%.*]] = icmp slt <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_min_epi32(__W,__M,__A,__B); } @@ -4872,6 +4907,8 @@ // CHECK-LABEL: @test_mm_maskz_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_maskz_min_epu32(__M,__A,__B); } @@ -4879,6 +4916,8 @@ // CHECK-LABEL: @test_mm_mask_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <4 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[X]], <4 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <2 x i64> [[TMP]] to <4 x i32> // CHECK: select <4 x i1> {{.*}}, <4 x i32> [[RES]], <4 x i32> {{.*}} return _mm_mask_min_epu32(__W,__M,__A,__B); } @@ -4886,6 +4925,8 @@ // CHECK-LABEL: @test_mm256_maskz_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_maskz_min_epu32(__M,__A,__B); } @@ -4893,6 +4934,8 @@ // CHECK-LABEL: @test_mm256_mask_min_epu32 // CHECK: [[CMP:%.*]] = icmp ult <8 x i32> [[X:%.*]], [[Y:%.*]] // CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[X]], <8 x i32> [[Y]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <8 x i32> [[RES]] to <4 x i64> + // CHECK-NEWPM: [[RES:%.*]] = bitcast <4 x i64> [[TMP]] to <8 x i32> // CHECK: select <8 x i1> {{.*}}, <8 x i32> [[RES]], <8 x i32> {{.*}} return _mm256_mask_min_epu32(__W,__M,__A,__B); } Index: clang/test/CodeGen/avx512f-builtins.c =================================================================== --- clang/test/CodeGen/avx512f-builtins.c +++ clang/test/CodeGen/avx512f-builtins.c @@ -1,5 +1,10 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s -// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -fno-experimental-new-pass-manager -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -fno-experimental-new-pass-manager -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s + +// There are a few cases where instead accpeting the result of an instruction +// directly as an argument to a select, it instead goes through some bitcasts. +// RUN: %clang_cc1 -fexperimental-new-pass-manager -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM +// RUN: %clang_cc1 -fexperimental-new-pass-manager -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,CHECK-NEWPM #include <immintrin.h> @@ -10480,20 +10485,24 @@ __m512i test_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_mask_abs_epi32 + // CHECK-LABEL: @test_mm512_mask_abs_epi32 // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} return _mm512_mask_abs_epi32 (__W,__U,__A); } __m512i test_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { - // CHECK-LABEL: @test_mm512_maskz_abs_epi32 + // CHECK-LABEL: @test_mm512_maskz_abs_epi32 // CHECK: [[SUB:%.*]] = sub <16 x i32> zeroinitializer, [[A:%.*]] // CHECK: [[CMP:%.*]] = icmp sgt <16 x i32> [[A]], zeroinitializer // CHECK: [[SEL:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[A]], <16 x i32> [[SUB]] + // CHECK-NEWPM: [[TMP:%.*]] = bitcast <16 x i32> [[SEL]] to <8 x i64> + // CHECK-NEWPM: [[SEL:%.*]] = bitcast <8 x i64> [[TMP]] to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> [[SEL]], <16 x i32> %{{.*}} return _mm512_maskz_abs_epi32 (__U,__A); } Index: clang/test/CodeGen/avx512-reduceMinMaxIntrin.c =================================================================== --- clang/test/CodeGen/avx512-reduceMinMaxIntrin.c +++ clang/test/CodeGen/avx512-reduceMinMaxIntrin.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -fno-experimental-new-pass-manager -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s #include <immintrin.h>
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits