saghir created this revision. Herald added subscribers: shchenz, kbarton, nemanjai. saghir requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
This patch adds Big-Endian checks for the existing MMA test cases. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D109126 Files: clang/test/CodeGen/builtins-ppc-pair-mma.c clang/test/Sema/ppc-pair-mma-types.c clang/test/SemaCXX/ppc-pair-mma-types.cpp
Index: clang/test/SemaCXX/ppc-pair-mma-types.cpp =================================================================== --- clang/test/SemaCXX/ppc-pair-mma-types.cpp +++ clang/test/SemaCXX/ppc-pair-mma-types.cpp @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \ -// RUN: -fcxx-exceptions -target-cpu future %s -verify +// RUN: -fcxx-exceptions -target-cpu pwr10 %s -verify +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \ +// RUN: -fcxx-exceptions -target-cpu pwr10 %s -verify // vector quad Index: clang/test/Sema/ppc-pair-mma-types.c =================================================================== --- clang/test/Sema/ppc-pair-mma-types.c +++ clang/test/Sema/ppc-pair-mma-types.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -fsyntax-only \ -// RUN: -target-cpu future %s -verify +// RUN: -target-cpu pwr10 %s -verify +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown -fsyntax-only \ +// RUN: -target-cpu pwr10 %s -verify // The use of PPC MMA types is strongly restricted. Non-pointer MMA variables // can only be declared in functions and a limited number of operations are Index: clang/test/CodeGen/builtins-ppc-pair-mma.c =================================================================== --- clang/test/CodeGen/builtins-ppc-pair-mma.c +++ clang/test/CodeGen/builtins-ppc-pair-mma.c @@ -1,5 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \ +// RUN: -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \ +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-BE // CHECK-LABEL: @test1( // CHECK-NEXT: entry: @@ -8,6 +11,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2:![0-9]+]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test1( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2:![0-9]+]] +// CHECK-BE-NEXT: ret void +// void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -38,6 +48,28 @@ // CHECK-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP13]], align 16 // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test2( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64 +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>* +// CHECK-BE-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 0 +// CHECK-BE-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16 +// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 1 +// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16 +// CHECK-BE-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>* +// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16 +// CHECK-BE-NEXT: [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 2 +// CHECK-BE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 32 +// CHECK-BE-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>* +// CHECK-BE-NEXT: store <16 x i8> [[TMP8]], <16 x i8>* [[TMP10]], align 16 +// CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 3 +// CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 48 +// CHECK-BE-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <16 x i8>* +// CHECK-BE-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP13]], align 16 +// CHECK-BE-NEXT: ret void +// void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp); } @@ -49,6 +81,13 @@ // CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6:![0-9]+]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test3( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6:![0-9]+]] +// CHECK-BE-NEXT: ret void +// void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -71,6 +110,20 @@ // CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16 // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test4( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32 +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>* +// CHECK-BE-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0 +// CHECK-BE-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16 +// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1 +// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16 +// CHECK-BE-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>* +// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16 +// CHECK-BE-NEXT: ret void +// void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp); } @@ -84,6 +137,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test5( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -100,6 +162,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test6( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -114,6 +185,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test7( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -128,6 +206,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test8( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -142,6 +227,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test9( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -156,6 +248,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test10( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -170,6 +269,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test11( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -184,6 +290,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test12( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -198,6 +311,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test13( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -214,6 +334,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test14( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa [[TBAA6]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -228,6 +357,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test15( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -242,6 +378,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test16( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -256,6 +399,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test17( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -270,6 +420,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test18( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -284,6 +441,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test19( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -298,6 +462,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test20( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -314,6 +485,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test21( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa [[TBAA6]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -330,6 +510,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test22( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -346,6 +535,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test23( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -362,6 +560,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test24( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -378,6 +585,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test25( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -394,6 +610,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test26( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -410,6 +635,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test27( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -426,6 +660,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test28( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -442,6 +685,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test29( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -458,6 +710,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test30( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -474,6 +735,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test31( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -490,6 +760,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test32( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -506,6 +785,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test33( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -522,6 +810,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test34( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -538,6 +835,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test35( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -554,6 +860,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test36( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -570,6 +885,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test37( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -586,6 +910,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test38( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -602,6 +935,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test39( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -618,6 +960,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test40( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -634,6 +985,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test41( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -650,6 +1010,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test42( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -666,6 +1035,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test43( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -682,6 +1060,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test44( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -698,6 +1085,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test45( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -714,6 +1110,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test46( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -730,6 +1135,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test47( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -748,6 +1162,17 @@ // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test48( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) +// CHECK-BE-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -766,6 +1191,17 @@ // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test49( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) +// CHECK-BE-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -784,6 +1220,17 @@ // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test50( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) +// CHECK-BE-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -802,6 +1249,17 @@ // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test51( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) +// CHECK-BE-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -820,6 +1278,17 @@ // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test52( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -838,6 +1307,17 @@ // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test53( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -856,6 +1336,17 @@ // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test54( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -874,6 +1365,17 @@ // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test55( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]] +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -888,6 +1390,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test56( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -902,6 +1411,13 @@ // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test57( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -918,6 +1434,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test58( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -934,6 +1459,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test59( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -950,6 +1484,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test60( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -966,6 +1509,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test61( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -982,6 +1534,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test62( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -998,6 +1559,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test63( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -1014,6 +1584,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test64( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -1030,6 +1609,15 @@ // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test65( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -1045,6 +1633,14 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test66( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP0]]) +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]]) +// CHECK-BE-NEXT: ret void +// void test66(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_vsx_lxvp(0LL, vpp); __builtin_vsx_stxvp(vp, 0LL, vp2); @@ -1060,6 +1656,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test67( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[OFFSET:%.*]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[OFFSET]] +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test67(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) { __vector_pair vp = __builtin_vsx_lxvp(offset, vpp); __builtin_vsx_stxvp(vp, offset, vp2); @@ -1075,6 +1681,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test68( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 18 +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 18 +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test68(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_vsx_lxvp(18LL, vpp); __builtin_vsx_stxvp(vp, 18LL, vp2); @@ -1090,6 +1706,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test69( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 1 +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 1 +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test69(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_vsx_lxvp(1LL, vpp); __builtin_vsx_stxvp(vp, 1LL, vp2); @@ -1105,6 +1731,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test70( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 42 +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 42 +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test70(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_vsx_lxvp(42LL, vpp); __builtin_vsx_stxvp(vp, 42LL, vp2); @@ -1120,6 +1756,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test71( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VPP:%.*]], i64 1024 +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast <256 x i1>* [[TMP0]] to i8* +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VP2:%.*]], i64 1024 +// CHECK-BE-NEXT: [[TMP4:%.*]] = bitcast <256 x i1>* [[TMP3]] to i8* +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test71(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_vsx_lxvp(32768LL, vpp); __builtin_vsx_stxvp(vp, 32768LL, vp2); @@ -1135,6 +1781,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test72( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 32799 +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 32799 +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test72(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_vsx_lxvp(32799LL, vpp); __builtin_vsx_stxvp(vp, 32799LL, vp2); @@ -1152,6 +1808,18 @@ // CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test73( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8 +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]]) +// CHECK-BE-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = __builtin_vsx_lxvp(8LL, vpp); @@ -1170,6 +1838,17 @@ // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test74( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP2]]) +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) +// CHECK-BE-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = __builtin_vsx_lxvp(0LL, vpp); @@ -1189,6 +1868,18 @@ // CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test75( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]] +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]]) +// CHECK-BE-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]]) +// CHECK-BE-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test75(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = __builtin_vsx_lxvp(offs, vpp); @@ -1203,6 +1894,13 @@ // CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test76( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]]) +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6]] +// CHECK-BE-NEXT: ret void +// void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = *((__vector_pair *)vpp); @@ -1225,6 +1923,20 @@ // CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16 // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test77( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32 +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>* +// CHECK-BE-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0 +// CHECK-BE-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16 +// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1 +// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16 +// CHECK-BE-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>* +// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16 +// CHECK-BE-NEXT: ret void +// void test77(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) { __builtin_mma_disassemble_pair(resp, (__vector_pair*)vpp); } @@ -1237,6 +1949,14 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test78( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP0]]) +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]]) +// CHECK-BE-NEXT: ret void +// void test78(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_mma_lxvp(0LL, vpp); __builtin_mma_stxvp(vp, 0LL, vp2); @@ -1252,6 +1972,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test79( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[OFFSET:%.*]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[OFFSET]] +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test79(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) { __vector_pair vp = __builtin_mma_lxvp(offset, vpp); __builtin_mma_stxvp(vp, offset, vp2); @@ -1267,6 +1997,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test80( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 18 +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 18 +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test80(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_mma_lxvp(18LL, vpp); __builtin_mma_stxvp(vp, 18LL, vp2); @@ -1282,6 +2022,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test81( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 1 +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 1 +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test81(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_mma_lxvp(1LL, vpp); __builtin_mma_stxvp(vp, 1LL, vp2); @@ -1297,6 +2047,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test82( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 42 +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 42 +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test82(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_mma_lxvp(42LL, vpp); __builtin_mma_stxvp(vp, 42LL, vp2); @@ -1312,6 +2072,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test83( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VPP:%.*]], i64 1024 +// CHECK-BE-NEXT: [[TMP1:%.*]] = bitcast <256 x i1>* [[TMP0]] to i8* +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VP2:%.*]], i64 1024 +// CHECK-BE-NEXT: [[TMP4:%.*]] = bitcast <256 x i1>* [[TMP3]] to i8* +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test83(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_mma_lxvp(32768LL, vpp); __builtin_mma_stxvp(vp, 32768LL, vp2); @@ -1327,6 +2097,16 @@ // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test84( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 32799 +// CHECK-BE-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]]) +// CHECK-BE-NEXT: [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 32799 +// CHECK-BE-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]]) +// CHECK-BE-NEXT: ret void +// void test84(const __vector_pair *vpp, const __vector_pair *vp2) { __vector_pair vp = __builtin_mma_lxvp(32799LL, vpp); __builtin_mma_stxvp(vp, 32799LL, vp2); @@ -1344,6 +2124,18 @@ // CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test85( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8 +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]]) +// CHECK-BE-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0) +// CHECK-BE-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = __builtin_mma_lxvp(8LL, vpp); @@ -1362,6 +2154,17 @@ // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test86( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP2]]) +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]]) +// CHECK-BE-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = __builtin_mma_lxvp(0LL, vpp); @@ -1381,6 +2184,18 @@ // CHECK-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] // CHECK-NEXT: ret void // +// CHECK-BE-LABEL: @test87( +// CHECK-BE-NEXT: entry: +// CHECK-BE-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8* +// CHECK-BE-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]] +// CHECK-BE-NEXT: [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]]) +// CHECK-BE-NEXT: [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]]) +// CHECK-BE-NEXT: [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>* +// CHECK-BE-NEXT: store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]] +// CHECK-BE-NEXT: ret void +// void test87(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); __vector_pair vp = __builtin_mma_lxvp(offs, vpp);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits