[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)

via cfe-commits Mon, 22 Apr 2024 05:42:31 -0700

================
@@ -0,0 +1,457 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p1 -verify-machineinstrs < 
%s | FileCheck %s
+
+;MOVAZ (tile to vector, Multi)
+
+
+;;
+; X2 - Horiz
+;;
+
+define {<vscale x 16 x i8>, <vscale x 16 x i8>} @test_readz_hor_z8_i8_x2(i32 
%tile, i32 %slice) {
+; CHECK-LABEL: test_readz_hor_z8_i8_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w12, w1
+; CHECK-NEXT:    movaz { z0.b, z1.b }, za0h.b[w12, 0:1]
+; CHECK-NEXT:    movaz { z0.b, z1.b }, za0h.b[w12, 14:15]
+; CHECK-NEXT:    ret
+  %res = call  {<vscale x 16 x i8>, <vscale x 16 x i8>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 %slice)
+  %slice.max = add i32 %slice, 14
+  %res2 = call {<vscale x 16 x i8>, <vscale x 16 x i8>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 %slice.max)
+  ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %res2
+}
+define {<vscale x 8 x i16>, <vscale x 8 x i16>} @test_readz_hor_z16_i16_x2(i32 
%slice) {
+; CHECK-LABEL: test_readz_hor_z16_i16_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    movaz { z0.h, z1.h }, za0h.h[w12, 0:1]
+; CHECK-NEXT:    movaz { z0.h, z1.h }, za1h.h[w12, 6:7]
+; CHECK-NEXT:    ret
+  %res = call  {<vscale x 8 x i16>, <vscale x 8 x i16>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 %slice)
+  %slice.max = add i32 %slice, 6
+  %res2 = call {<vscale x 8 x i16>, <vscale x 8 x i16>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 %slice.max)
+  ret {<vscale x 8 x i16>, <vscale x 8 x i16>} %res2
+}
+
+define {<vscale x 4 x i32>, <vscale x 4 x i32>} @test_readz_hor_z32_i32_x2(i32 
%slice) {
+; CHECK-LABEL: test_readz_hor_z32_i32_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    movaz { z0.s, z1.s }, za0h.s[w12, 0:1]
+; CHECK-NEXT:    movaz { z0.s, z1.s }, za3h.s[w12, 2:3]
+; CHECK-NEXT:    ret
+  %res = call  {<vscale x 4 x i32>, <vscale x 4 x i32>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 %slice)
+  %slice.max = add i32 %slice, 2
+  %res2 = call {<vscale x 4 x i32>, <vscale x 4 x i32>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 3, i32 %slice.max)
+  ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res2
+}
+
+define {<vscale x 2 x i64>, <vscale x 2 x i64>} @test_readz_hor_z64_i64_x2(i32 
%slice) {
+; CHECK-LABEL: test_readz_hor_z64_i64_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    movaz { z0.d, z1.d }, za0h.d[w12, 0:1]
+; CHECK-NEXT:    movaz { z2.d, z3.d }, za7h.d[w12, 0:1]
+; CHECK-NEXT:    ret
+  %res = call  {<vscale x 2 x i64>, <vscale x 2 x i64>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 %slice)
+  %res2 = call {<vscale x 2 x i64>, <vscale x 2 x i64>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 7, i32 %slice)
+  ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %res
+}
+
+define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} 
@test_readz_hor_z16_bf16_x2(i32 %slice) {
+; CHECK-LABEL: test_readz_hor_z16_bf16_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    movaz { z0.h, z1.h }, za0h.h[w12, 0:1]
+; CHECK-NEXT:    movaz { z0.h, z1.h }, za1h.h[w12, 6:7]
+; CHECK-NEXT:    ret
+  %res = call  {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 0, i32 %slice)
+  %slice.max = add i32 %slice, 6
+  %res2 = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 %slice.max)
+  ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %res2
+}
+
+define {<vscale x 8 x half>, <vscale x 8 x half>} 
@test_readz_hor_z16_f16_x2(i32 %slice) {
+; CHECK-LABEL: test_readz_hor_z16_f16_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    movaz { z0.h, z1.h }, za0h.h[w12, 0:1]
+; CHECK-NEXT:    movaz { z0.h, z1.h }, za1h.h[w12, 6:7]
+; CHECK-NEXT:    ret
+  %res = call  {<vscale x 8 x half>, <vscale x 8 x half>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 %slice)
+  %slice.max = add i32 %slice, 6
+  %res2 = call {<vscale x 8 x half>, <vscale x 8 x half>} 
@llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 1, i32 %slice.max)
+  ret {<vscale x 8 x half>, <vscale x 8 x half>} %res2
+}
+
+define {<vscale x 4 x float>, <vscale x 4 x float>} 
@test_readz_hor_z32_f32_x2(i32 %slice) {
+; CHECK-LABEL: test_readz_hor_z32_f32_x2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    movaz { z0.s, z1.s }, za0h.s[w12, 0:1]
----------------
Lukacma wrote:


I am using this as an example, but the comment is more general to whole 
assembly. Looking at latest 
[spec](https://aig.euhpc.arm.com/job/mrs-release-nightly-dpisa/lastSuccessfulBuild/artifact/output/ISA/v9Ap5-A/xml/release/package/ISA_A64_xml_dpisa-nightly/movaz_mz_za2.xml)
 for the instruction it seems assembly should look like this: 

`MOVAZ { <Zd1>.D-<Zd2>.D }, ZA.D[<Wv>, <offs>{, VGx2}]
`
which is not what is produced currently. The `ZA` field has tile identification 
info attached and we have .S instead of .D (Thought from reading spec this is 
fine and only matters for dissasembly?). Is this an error in assembly 
generation or am I missing smth here ?  

https://github.com/llvm/llvm-project/pull/88710
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)

Reply via email to