[PATCH] D138788: [SVE] Change some bfloat lane intrinsics to use i32 immediates

David Sherwood via Phabricator via cfe-commits Mon, 28 Nov 2022 03:58:15 -0800

david-arm created this revision.
david-arm added reviewers: sdesmalen, paulwalker-arm, kmclaughlin.
Herald added subscribers: ctetreau, psnobl, hiraditya, tschuett.
Herald added a reviewer: efriedma.
Herald added a project: All.
david-arm requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.


Almost all of the other SVE LLVM IR intrinsics take i32 values
for lane indices or other immediates. We should bring the bfloat
intrinsics in line with that. It will also make it easier to
add support for the SVE2.1 float intrinsics in future, since
they reuse the same underlying instruction classes.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D138788

Files:
  clang/include/clang/Basic/arm_sve.td
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll

Index: llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-bfloat.ll
@@ -19,7 +19,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfdot z0.s, z1.h, z2.h[0]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0)
   ret <vscale x 4 x float> %out
 }
 
@@ -28,7 +28,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfdot z0.s, z1.h, z2.h[1]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 1)
   ret <vscale x 4 x float> %out
 }
 
@@ -37,7 +37,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfdot z0.s, z1.h, z2.h[2]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 2)
   ret <vscale x 4 x float> %out
 }
 
@@ -46,7 +46,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfdot z0.s, z1.h, z2.h[3]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 3)
   ret <vscale x 4 x float> %out
 }
 
@@ -68,7 +68,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[0]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0)
   ret <vscale x 4 x float> %out
 }
 
@@ -77,7 +77,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[1]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 1)
   ret <vscale x 4 x float> %out
 }
 
@@ -86,7 +86,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[2]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 2)
   ret <vscale x 4 x float> %out
 }
 
@@ -95,7 +95,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[3]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 3)
   ret <vscale x 4 x float> %out
 }
 
@@ -104,7 +104,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[4]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 4)
   ret <vscale x 4 x float> %out
 }
 
@@ -113,7 +113,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[5]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 5)
   ret <vscale x 4 x float> %out
 }
 
@@ -122,7 +122,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[6]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 6)
   ret <vscale x 4 x float> %out
 }
 
@@ -131,7 +131,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalb z0.s, z1.h, z2.h[7]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 7)
   ret <vscale x 4 x float> %out
 }
 
@@ -153,7 +153,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[0]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 0)
   ret <vscale x 4 x float> %out
 }
 
@@ -162,7 +162,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[1]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 1)
   ret <vscale x 4 x float> %out
 }
 
@@ -171,7 +171,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[2]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 2)
   ret <vscale x 4 x float> %out
 }
 
@@ -180,7 +180,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[3]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 3)
   ret <vscale x 4 x float> %out
 }
 
@@ -189,7 +189,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[4]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 4)
   ret <vscale x 4 x float> %out
 }
 
@@ -198,7 +198,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[5]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 5)
   ret <vscale x 4 x float> %out
 }
 
@@ -207,7 +207,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[6]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 6)
   ret <vscale x 4 x float> %out
 }
 
@@ -216,7 +216,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bfmlalt z0.s, z1.h, z2.h[7]
 ; CHECK-NEXT:    ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 7)
   ret <vscale x 4 x float> %out
 }
 
@@ -260,11 +260,11 @@
 }
 
 declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64)
+declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
 declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64)
+declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
 declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64)
+declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
 declare <vscale x 4 x float> @llvm.aarch64.sve.bfmmla(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 4 x float>)
 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 4 x float>)
Index: llvm/lib/Target/AArch64/SVEInstrFormats.td
===================================================================
--- llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -8315,13 +8315,13 @@
   let DestructiveInstType = DestructiveOther;
 }
 
-multiclass sve_bfloat_dot<string asm, SDPatternOperator op> {
-  def NAME : sve_float_dot<0b1, asm>;
-  def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>;
+multiclass sve_float_dot<bit bf, string asm, ValueType InVT, SDPatternOperator op> {
+  def NAME : sve_float_dot<bf, asm>;
+  def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, InVT, InVT, !cast<Instruction>(NAME)>;
 }
 
 class sve_float_dot_indexed<bit bf, string asm>
-: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop),
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS32b:$iop),
     asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> {
   bits<5> Zda;
   bits<5> Zn;
@@ -8340,9 +8340,9 @@
   let DestructiveInstType = DestructiveOther;
 }
 
-multiclass sve_bfloat_dot_indexed<string asm, SDPatternOperator op> {
-  def NAME : sve_float_dot_indexed<0b1, asm>;
-  def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i64, VectorIndexS_timm, !cast<Instruction>(NAME)>;
+multiclass sve_float_dot_indexed<bit bf, string asm, ValueType InVT, SDPatternOperator op> {
+  def NAME : sve_float_dot_indexed<bf, asm>;
+  def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
 }
 
 class sve_bfloat_matmul<string asm>
@@ -8381,7 +8381,7 @@
 }
 
 class sve_bfloat_matmul_longvecl_idx<bit BT, bit sub, string asm>
-: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH:$iop),
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH32b:$iop),
     asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> {
   bits<5> Zda;
   bits<5> Zn;
@@ -8405,7 +8405,7 @@
 
 multiclass sve_bfloat_matmul_longvecl_idx<bit BT, bit sub, string asm, SDPatternOperator op> {
   def NAME : sve_bfloat_matmul_longvecl_idx<BT, sub, asm>;
-  def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i64, VectorIndexH_timm, !cast<Instruction>(NAME)>;
+  def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME)>;
 }
 
 class sve_bfloat_convert<bit N, string asm>
@@ -8782,7 +8782,7 @@
 
 // SVE two-way dot product (indexed)
 class sve2p1_two_way_dot_vvi<string mnemonic, bit u>
-    : I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$i2),
+    : I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS32b:$i2),
         mnemonic, "\t$Zda, $Zn, $Zm$i2",
         "", []>, Sched<[]> {
   bits<5> Zda;
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2159,8 +2159,8 @@
 } // End HasSVEorSME
 
 let Predicates = [HasBF16, HasSVEorSME] in {
-  defm BFDOT_ZZZ    : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
-  defm BFDOT_ZZI    : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>;
+  defm BFDOT_ZZZ    : sve_float_dot<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>;
+  defm BFDOT_ZZI    : sve_float_dot_indexed<0b1, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane>;
 } // End HasBF16, HasSVEorSME
 
 let Predicates = [HasBF16, HasSVE] in {
Index: llvm/include/llvm/IR/IntrinsicsAArch64.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1516,7 +1516,7 @@
 
 class SVE_4Vec_BF16_Indexed
     : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
-                [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i64_ty],
+                [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i32_ty],
                 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
 
 //
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c
@@ -31,12 +31,12 @@
 
 // CHECK-LABEL: @test_bfmlalt_lane_0_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z23test_bfmlalt_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_bfmlalt_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
@@ -45,12 +45,12 @@
 
 // CHECK-LABEL: @test_bfmlalt_lane_7_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z23test_bfmlalt_lane_7_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_bfmlalt_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c
@@ -31,12 +31,12 @@
 
 // CHECK-LABEL: @test_bfmlalb_lane_0_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z23test_bfmlalb_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_bfmlalb_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
@@ -45,12 +45,12 @@
 
 // CHECK-LABEL: @test_bfmlalb_lane_7_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z23test_bfmlalb_lane_7_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 7)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_bfmlalb_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c
@@ -31,12 +31,12 @@
 
 // CHECK-LABEL: @test_bfdot_lane_0_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z21test_bfdot_lane_0_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_bfdot_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
@@ -45,12 +45,12 @@
 
 // CHECK-LABEL: @test_bfdot_lane_3_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 3)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 3)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z21test_bfdot_lane_3_f32u13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i64 3)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_bfdot_lane_3_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
Index: clang/include/clang/Basic/arm_sve.td
===================================================================
--- clang/include/clang/Basic/arm_sve.td
+++ clang/include/clang/Basic/arm_sve.td
@@ -537,9 +537,9 @@
   def SVBFDOT_N      : SInst<"svbfdot[_n_{0}]",      "MMda",  "b", MergeNone, "aarch64_sve_bfdot",        [IsOverloadNone]>;
   def SVBFMLAL_N     : SInst<"svbfmlalb[_n_{0}]",    "MMda",  "b", MergeNone, "aarch64_sve_bfmlalb",      [IsOverloadNone]>;
   def SVBFMLALT_N    : SInst<"svbfmlalt[_n_{0}]",    "MMda",  "b", MergeNone, "aarch64_sve_bfmlalt",      [IsOverloadNone]>;
-  def SVBFDOT_LANE   : SInst<"svbfdot_lane[_{0}]",   "MMddn", "b", MergeNone, "aarch64_sve_bfdot_lane",   [IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>;
-  def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
-  def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVBFDOT_LANE   : SInst<"svbfdot_lane[_{0}]",   "MMddi", "b", MergeNone, "aarch64_sve_bfdot_lane",   [IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>;
+  def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D138788: [SVE] Change some bfloat lane intrinsics to use i32 immediates

Reply via email to