This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rGbfb6f47e9ea4: [SVE] Change some bfloat lane intrinsics to use i32 immediates (authored by david-arm).
Changed prior to commit: https://reviews.llvm.org/D138788?vs=480451&id=480808#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D138788/new/ https://reviews.llvm.org/D138788 Files: clang/include/clang/Basic/arm_sve.td clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/IR/AutoUpgrade.cpp llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/SVEInstrFormats.td llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll @@ -19,7 +19,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[0] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0) ret <vscale x 4 x float> %out } @@ -28,7 +28,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 1) ret <vscale x 4 x float> %out } @@ -37,7 +37,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[2] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 2) ret <vscale x 4 x float> %out } @@ -46,7 +46,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[3] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 3) ret <vscale x 4 x float> %out } @@ -68,7 +68,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[0] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0) ret <vscale x 4 x float> %out } @@ -77,7 +77,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 1) ret <vscale x 4 x float> %out } @@ -86,7 +86,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[2] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 2) ret <vscale x 4 x float> %out } @@ -95,7 +95,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[3] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 3) ret <vscale x 4 x float> %out } @@ -104,7 +104,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[4] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 4) ret <vscale x 4 x float> %out } @@ -113,7 +113,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[5] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 5) ret <vscale x 4 x float> %out } @@ -122,7 +122,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[6] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 6) ret <vscale x 4 x float> %out } @@ -131,7 +131,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 7) ret <vscale x 4 x float> %out } @@ -153,7 +153,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[0] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0) ret <vscale x 4 x float> %out } @@ -162,7 +162,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[1] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 1) ret <vscale x 4 x float> %out } @@ -171,7 +171,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[2] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 2) ret <vscale x 4 x float> %out } @@ -180,7 +180,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[3] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 3) ret <vscale x 4 x float> %out } @@ -189,7 +189,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[4] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 4) ret <vscale x 4 x float> %out } @@ -198,7 +198,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[5] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 5) ret <vscale x 4 x float> %out } @@ -207,7 +207,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[6] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 6) ret <vscale x 4 x float> %out } @@ -216,7 +216,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[7] ; CHECK-NEXT: ret - %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7) + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 7) ret <vscale x 4 x float> %out } @@ -260,11 +260,11 @@ } declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) -declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64) +declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) -declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64) +declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) -declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64) +declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) declare <vscale x 4 x float> @llvm.aarch64.sve.bfmmla(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 4 x float>) declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 4 x float>) Index: llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll =================================================================== --- llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll +++ llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll @@ -160,6 +160,33 @@ ret <vscale x 4 x i32> %ext } +; bfdot +define <vscale x 4 x float> @bfdot_lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind { +; CHECK-LABEL: @bfdot_lane +; CHECK: %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0) +; CHECK-NEXT: ret <vscale x 4 x float> %out + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0) + ret <vscale x 4 x float> %out +} + +; bfmlalb +define <vscale x 4 x float> @bfmlalb_lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind { +; CHECK-LABEL: @bfmlalb_lane +; CHECK: %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0) +; CHECK-NEXT: ret <vscale x 4 x float> %out + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0) + ret <vscale x 4 x float> %out +} + +; bfmlalt +define <vscale x 4 x float> @bfmlalt_lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind { +; CHECK-LABEL: @bfmlalt_lane +; CHECK: %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0) +; CHECK-NEXT: ret <vscale x 4 x float> %out + %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0) + ret <vscale x 4 x float> %out +} + declare <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) declare <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8(<vscale x 16 x i8>, <vscale x 16 x i8>) declare <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2(<vscale x 16 x i8>, <vscale x 16 x i8>) @@ -168,3 +195,6 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.create1.nxv16i8.nxv16i8(<vscale x 16 x i8>) declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>) declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32) +declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64) +declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64) +declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64) Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -8317,13 +8317,13 @@ let DestructiveInstType = DestructiveOther; } -multiclass sve_bfloat_dot<string asm, SDPatternOperator op> { - def NAME : sve_float_dot<0b1, asm>; - def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>; +multiclass sve_float_dot<bit bf, string asm, ValueType InVT, SDPatternOperator op> { + def NAME : sve_float_dot<bf, asm>; + def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, InVT, InVT, !cast<Instruction>(NAME)>; } class sve_float_dot_indexed<bit bf, string asm> -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop), +: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS32b:$iop), asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; @@ -8342,9 +8342,9 @@ let DestructiveInstType = DestructiveOther; } -multiclass sve_bfloat_dot_indexed<string asm, SDPatternOperator op> { - def NAME : sve_float_dot_indexed<0b1, asm>; - def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i64, VectorIndexS_timm, !cast<Instruction>(NAME)>; +multiclass sve_float_dot_indexed<bit bf, string asm, ValueType InVT, SDPatternOperator op> { + def NAME : sve_float_dot_indexed<bf, asm>; + def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>; } class sve_bfloat_matmul<string asm> @@ -8383,7 +8383,7 @@ } class sve_bfloat_matmul_longvecl_idx<bit BT, bit sub, string asm> -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH:$iop), +: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH32b:$iop), asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; @@ -8407,7 +8407,7 @@ multiclass sve_bfloat_matmul_longvecl_idx<bit BT, bit sub, string asm, SDPatternOperator op> { def NAME : sve_bfloat_matmul_longvecl_idx<BT, sub, asm>; - def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i64, VectorIndexH_timm, !cast<Instruction>(NAME)>; + def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME)>; } class sve_bfloat_convert<bit N, string asm> @@ -8784,7 +8784,7 @@ // SVE two-way dot product (indexed) class sve2p1_two_way_dot_vvi<string mnemonic, bit u> - : I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$i2), + : I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS32b:$i2), mnemonic, "\t$Zda, $Zn, $Zm$i2", "", []>, Sched<[]> { bits<5> Zda; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2162,8 +2162,8 @@ } // End HasSVEorSME let Predicates = [HasBF16, HasSVEorSME] in { - defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>; - defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>; + defm BFDOT_ZZZ : sve_float_dot<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>; + defm BFDOT_ZZI : sve_float_dot_indexed<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>; } // End HasBF16, HasSVEorSME let Predicates = [HasBF16, HasSVE] in { @@ -2173,8 +2173,8 @@ let Predicates = [HasBF16, HasSVEorSME] in { defm BFMLALB_ZZZ : sve_bfloat_matmul_longvecl<0b0, 0b0, "bfmlalb", int_aarch64_sve_bfmlalb>; defm BFMLALT_ZZZ : sve_bfloat_matmul_longvecl<0b1, 0b0, "bfmlalt", int_aarch64_sve_bfmlalt>; - defm BFMLALB_ZZZI : sve_bfloat_matmul_longvecl_idx<0b0, 0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>; - defm BFMLALT_ZZZI : sve_bfloat_matmul_longvecl_idx<0b1, 0b0, "bfmlalt", int_aarch64_sve_bfmlalt_lane>; + defm BFMLALB_ZZZI : sve_bfloat_matmul_longvecl_idx<0b0, 0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane_v2>; + defm BFMLALT_ZZZI : sve_bfloat_matmul_longvecl_idx<0b1, 0b0, "bfmlalt", int_aarch64_sve_bfmlalt_lane_v2>; defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>; defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>; } // End HasBF16, HasSVEorSME Index: llvm/lib/IR/AutoUpgrade.cpp =================================================================== --- llvm/lib/IR/AutoUpgrade.cpp +++ llvm/lib/IR/AutoUpgrade.cpp @@ -605,6 +605,21 @@ F->arg_begin()->getType()); return true; } + if (Name == "aarch64.sve.bfdot.lane") { + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::aarch64_sve_bfdot_lane_v2); + return true; + } + if (Name == "aarch64.sve.bfmlalb.lane") { + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::aarch64_sve_bfmlalb_lane_v2); + return true; + } + if (Name == "aarch64.sve.bfmlalt.lane") { + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::aarch64_sve_bfmlalt_lane_v2); + return true; + } static const Regex LdRegex("^aarch64\\.sve\\.ld[234](.nxv[a-z0-9]+|$)"); if (LdRegex.match(Name)) { Type *ScalarTy = @@ -3955,6 +3970,16 @@ NewCall = Builder.CreateCall(NewFn, Args); break; } + case Intrinsic::aarch64_sve_bfmlalb_lane_v2: + case Intrinsic::aarch64_sve_bfmlalt_lane_v2: + case Intrinsic::aarch64_sve_bfdot_lane_v2: { + LLVMContext &Ctx = F->getParent()->getContext(); + SmallVector<Value *, 4> Args(CI->args()); + Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx), + cast<ConstantInt>(Args[3])->getZExtValue()); + NewCall = Builder.CreateCall(NewFn, Args); + break; + } case Intrinsic::aarch64_sve_ld3_sret: case Intrinsic::aarch64_sve_ld4_sret: case Intrinsic::aarch64_sve_ld2_sret: { Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1516,7 +1516,7 @@ class SVE_4Vec_BF16_Indexed : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty], - [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i64_ty], + [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>; // @@ -2519,9 +2519,9 @@ def int_aarch64_sve_bfmmla : SVE_4Vec_BF16; -def int_aarch64_sve_bfdot_lane : SVE_4Vec_BF16_Indexed; -def int_aarch64_sve_bfmlalb_lane : SVE_4Vec_BF16_Indexed; -def int_aarch64_sve_bfmlalt_lane : SVE_4Vec_BF16_Indexed; +def int_aarch64_sve_bfdot_lane_v2 : SVE_4Vec_BF16_Indexed; +def int_aarch64_sve_bfmlalb_lane_v2 : SVE_4Vec_BF16_Indexed; +def int_aarch64_sve_bfmlalt_lane_v2 : SVE_4Vec_BF16_Indexed; } // Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c @@ -31,12 +31,12 @@ // CHECK-LABEL: @test_bfmlalt_lane_0_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalt_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfmlalt_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { @@ -45,12 +45,12 @@ // CHECK-LABEL: @test_bfmlalt_lane_7_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalt_lane_7_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfmlalt_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c @@ -31,12 +31,12 @@ // CHECK-LABEL: @test_bfmlalb_lane_0_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalb_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfmlalb_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { @@ -45,12 +45,12 @@ // CHECK-LABEL: @test_bfmlalb_lane_7_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_bfmlalb_lane_7_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfmlalb_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c =================================================================== --- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c +++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c @@ -31,12 +31,12 @@ // CHECK-LABEL: @test_bfdot_lane_0_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_bfdot_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfdot_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { @@ -45,12 +45,12 @@ // CHECK-LABEL: @test_bfdot_lane_3_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 3) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 3) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_bfdot_lane_3_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 3) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 3) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_bfdot_lane_3_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) { Index: clang/include/clang/Basic/arm_sve.td =================================================================== --- clang/include/clang/Basic/arm_sve.td +++ clang/include/clang/Basic/arm_sve.td @@ -537,9 +537,9 @@ def SVBFDOT_N : SInst<"svbfdot[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone]>; def SVBFMLAL_N : SInst<"svbfmlalb[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone]>; def SVBFMLALT_N : SInst<"svbfmlalt[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone]>; - def SVBFDOT_LANE : SInst<"svbfdot_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfdot_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; - def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; - def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFDOT_LANE : SInst<"svbfdot_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfdot_lane_v2", [IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; + def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalb_lane_v2", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalt_lane_v2", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } ////////////////////////////////////////////////////////////////////////////////
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits