david-arm created this revision. david-arm added reviewers: sdesmalen, paulwalker-arm, kmclaughlin. Herald added subscribers: ctetreau, psnobl, hiraditya, tschuett. Herald added a reviewer: efriedma. Herald added a project: All. david-arm requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits.
Almost all of the other SVE LLVM IR intrinsics take i32 values for lane indices or other immediates. We should bring the bfloat intrinsics in line with that. It will also make it easier to add support for the SVE2.1 float intrinsics in future, since they reuse the same underlying instruction classes. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D138788 Files: clang/include/clang/Basic/arm_sve.td clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/SVEInstrFormats.td llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll @@ -19,7 +19,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[0] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0) ret <vscale x 4 x float> %out } @@ -28,7 +28,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 1) ret <vscale x 4 x float> %out } @@ -37,7 +37,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[2] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 2) ret <vscale x 4 x float> %out } @@ -46,7 +46,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[3] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 3) ret <vscale x 4 x float> %out } @@ -68,7 +68,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[0] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0) ret <vscale x 4 x float> %out } @@ -77,7 +77,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 1) ret <vscale x 4 x float> %out } @@ -86,7 +86,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[2] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 2) ret <vscale x 4 x float> %out } @@ -95,7 +95,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[3] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 3) ret <vscale x 4 x float> %out } @@ -104,7 +104,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[4] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 4) ret <vscale x 4 x float> %out } @@ -113,7 +113,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[5] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 5) ret <vscale x 4 x float> %out } @@ -122,7 +122,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[6] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 6) ret <vscale x 4 x float> %out } @@ -131,7 +131,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 7) ret <vscale x 4 x float> %out } @@ -153,7 +153,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[0] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0) ret <vscale x 4 x float> %out } @@ -162,7 +162,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 1) ret <vscale x 4 x float> %out } @@ -171,7 +171,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[2] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 2) ret <vscale x 4 x float> %out } @@ -180,7 +180,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[3] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 3) ret <vscale x 4 x float> %out } @@ -189,7 +189,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[4] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 4) ret <vscale x 4 x float> %out } @@ -198,7 +198,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[5] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 5) ret <vscale x 4 x float> %out } @@ -207,7 +207,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[6] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 6) ret <vscale x 4 x float> %out } @@ -216,7 +216,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 7) ret <vscale x 4 x float> %out } @@ -260,11 +260,11 @@ } declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) -declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64) +declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) -declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64) +declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) -declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64) +declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) declare <vscale x 4 x float> @llvm.aarch64.sve.bfmmla(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 4 x float>) declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 4 x float>) Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -8315,13 +8315,13 @@ let DestructiveInstType = DestructiveOther; } -multiclass sve_bfloat_dot<string asm, SDPatternOperator op> { - def NAME : sve_float_dot<0b1, asm>; - def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>; +multiclass sve_float_dot<bit bf, string asm, ValueType InVT, SDPatternOperator op> { + def NAME : sve_float_dot<bf, asm>; + def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, InVT, InVT, !cast<Instruction>(NAME)>; } class sve_float_dot_indexed<bit bf, string asm> -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop), +: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS32b:$iop), asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; @@ -8340,9 +8340,9 @@ let DestructiveInstType = DestructiveOther; } -multiclass sve_bfloat_dot_indexed<string asm, SDPatternOperator op> { - def NAME : sve_float_dot_indexed<0b1, asm>; - def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i64, VectorIndexS_timm, !cast<Instruction>(NAME)>; +multiclass sve_float_dot_indexed<bit bf, string asm, ValueType InVT, SDPatternOperator op> { + def NAME : sve_float_dot_indexed<bf, asm>; + def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>; } class sve_bfloat_matmul<string asm> @@ -8381,7 +8381,7 @@ } class sve_bfloat_matmul_longvecl_idx<bit BT, bit sub, string asm> -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH:$iop), +: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH32b:$iop), asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; @@ -8405,7 +8405,7 @@ multiclass sve_bfloat_matmul_longvecl_idx<bit BT, bit sub, string asm, SDPatternOperator op> { def NAME : sve_bfloat_matmul_longvecl_idx<BT, sub, asm>; - def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i64, VectorIndexH_timm, !cast<Instruction>(NAME)>; + def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME)>; } class sve_bfloat_convert<bit N, string asm> @@ -8782,7 +8782,7 @@ // SVE two-way dot product (indexed) class sve2p1_two_way_dot_vvi<string mnemonic, bit u> - : I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$i2), + : I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS32b:$i2), mnemonic, "\t$Zda, $Zn, $Zm$i2", "", []>, Sched<[]> { bits<5> Zda; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2159,8 +2159,8 @@ } // End HasSVEorSME let Predicates = [HasBF16, HasSVEorSME] in { - defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>; - defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>; + defm BFDOT_ZZZ : sve_float_dot<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>; + defm BFDOT_ZZI : sve_float_dot_indexed<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane>; } // End HasBF16, HasSVEorSME let Predicates = [HasBF16, HasSVE] in { Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1516,7 +1516,7 @@ class SVE_4Vec_BF16_Indexed : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty], - [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i64_ty], + [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>; // Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c @@ -31,12 +31,12 @@ // CHECK-LABEL: @test_bfmlalt_lane_0_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalt_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfmlalt_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { @@ -45,12 +45,12 @@ // CHECK-LABEL: @test_bfmlalt_lane_7_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalt_lane_7_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfmlalt_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c @@ -31,12 +31,12 @@ // CHECK-LABEL: @test_bfmlalb_lane_0_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalb_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfmlalb_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { @@ -45,12 +45,12 @@ // CHECK-LABEL: @test_bfmlalb_lane_7_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalb_lane_7_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfmlalb_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c @@ -31,12 +31,12 @@ // CHECK-LABEL: @test_bfdot_lane_0_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_bfdot_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfdot_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { @@ -45,12 +45,12 @@ // CHECK-LABEL: @test_bfdot_lane_3_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 3) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 3) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_bfdot_lane_3_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 3) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 3) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfdot_lane_3_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { Index: clang/include/clang/Basic/arm_sve.td =================================================================== --- clang/include/clang/Basic/arm_sve.td +++ clang/include/clang/Basic/arm_sve.td @@ -537,9 +537,9 @@ def SVBFDOT_N : SInst<"svbfdot[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone]>; def SVBFMLAL_N : SInst<"svbfmlalb[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone]>; def SVBFMLALT_N : SInst<"svbfmlalt[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone]>; - def SVBFDOT_LANE : SInst<"svbfdot_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfdot_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; - def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; - def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFDOT_LANE : SInst<"svbfdot_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfdot_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; + def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } ////////////////////////////////////////////////////////////////////////////////
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits