kmclaughlin created this revision. kmclaughlin added reviewers: efriedma, sdesmalen, dancgr, cameron.mcinally, c-rhodes. Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett. Herald added a reviewer: rengolin. Herald added a project: LLVM. kmclaughlin added a parent revision: D73493: [AArch64][SVE] Add SVE2 intrinsics for uniform DSP operations.
Implements the following intrinsics: - @llvm.aarch64.sve.[s|u]qadd - @llvm.aarch64.sve.[s|u]qsub - @llvm.aarch64.sve.suqadd - @llvm.aarch64.sve.usqadd - @llvm.aarch64.sve.[s|u]qsubr - @llvm.aarch64.sve.[s|u]rshl - @llvm.aarch64.sve.[s|u]qshl - @llvm.aarch64.sve.[s|u]qrshl - @llvm.aarch64.sve.[s|u]rshr - @llvm.aarch64.sve.sqshlu - @llvm.aarch64.sve.sri - @llvm.aarch64.sve.sli - @llvm.aarch64.sve.[s|u]sra - @llvm.aarch64.sve.[s|u]rsra - @llvm.aarch64.sve.[s|u]aba Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D73551 Files: llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/SVEInstrFormats.td llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll +++ llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll @@ -1,6 +1,50 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s ; +; SABA +; + +define <vscale x 16 x i8> @saba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: saba_i8: +; CHECK: saba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.saba.nxv16i8(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %c) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @saba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: saba_i16: +; CHECK: saba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saba.nxv8i16(<vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + <vscale x 8 x i16> %c) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @saba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { +; CHECK-LABEL: saba_i32: +; CHECK: saba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saba.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + <vscale x 4 x i32> %c) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @saba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { +; CHECK-LABEL: saba_i64: +; CHECK: saba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saba.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + <vscale x 2 x i64> %c) + ret <vscale x 2 x i64> %out +} + +; ; SHADD ; @@ -133,6 +177,50 @@ } ; +; SLI +; + +define <vscale x 16 x i8> @sli_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: sli_i8: +; CHECK: sli z0.b, z1.b, #0 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sli.nxv16i8(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + i32 0) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sli_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sli_i16: +; CHECK: sli z0.h, z1.h, #1 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sli.nxv8i16(<vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + i32 1) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sli_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sli_i32: +; CHECK: sli z0.s, z1.s, #30 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sli.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + i32 30); + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @sli_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sli_i64: +; CHECK: sli z0.d, z1.d, #63 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sli.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + i32 63) + ret <vscale x 2 x i64> %out +} + +; ; SQABS ; @@ -177,6 +265,50 @@ } ; +; SQADD +; + +define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: sqadd_i8: +; CHECK: sqadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sqadd_i16: +; CHECK: sqadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sqadd_i32: +; CHECK: sqadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sqadd_i64: +; CHECK: sqadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; ; SQDMULH (Vector) ; @@ -531,281 +663,1264 @@ } ; -; SRHADD +; SQRSHL ; -define <vscale x 16 x i8> @srhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { -; CHECK-LABEL: srhadd_i8: -; CHECK: srhadd z0.b, p0/m, z0.b, z1.b +define <vscale x 16 x i8> @sqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: sqrshl_i8: +; CHECK: sqrshl z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret - %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1> %pg, + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) ret <vscale x 16 x i8> %out } -define <vscale x 8 x i16> @srhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { -; CHECK-LABEL: srhadd_i16: -; CHECK: srhadd z0.h, p0/m, z0.h, z1.h +define <vscale x 8 x i16> @sqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sqrshl_i16: +; CHECK: sqrshl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1> %pg, + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) ret <vscale x 8 x i16> %out } -define <vscale x 4 x i32> @srhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { -; CHECK-LABEL: srhadd_i32: -; CHECK: srhadd z0.s, p0/m, z0.s, z1.s +define <vscale x 4 x i32> @sqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sqrshl_i32: +; CHECK: sqrshl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1> %pg, + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ret <vscale x 4 x i32> %out } -define <vscale x 2 x i64> @srhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { -; CHECK-LABEL: srhadd_i64: -; CHECK: srhadd z0.d, p0/m, z0.d, z1.d +define <vscale x 2 x i64> @sqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sqrshl_i64: +; CHECK: sqrshl z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1> %pg, + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) ret <vscale x 2 x i64> %out } ; -; UHADD +; SQSHL (Vectors) ; -define <vscale x 16 x i8> @uhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { -; CHECK-LABEL: uhadd_i8: -; CHECK: uhadd z0.b, p0/m, z0.b, z1.b +define <vscale x 16 x i8> @sqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: sqshl_i8: +; CHECK: sqshl z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret - %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1> %pg, + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) ret <vscale x 16 x i8> %out } -define <vscale x 8 x i16> @uhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { -; CHECK-LABEL: uhadd_i16: -; CHECK: uhadd z0.h, p0/m, z0.h, z1.h +define <vscale x 8 x i16> @sqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sqshl_i16: +; CHECK: sqshl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1> %pg, + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) ret <vscale x 8 x i16> %out } -define <vscale x 4 x i32> @uhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { -; CHECK-LABEL: uhadd_i32: -; CHECK: uhadd z0.s, p0/m, z0.s, z1.s +define <vscale x 4 x i32> @sqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sqshl_i32: +; CHECK: sqshl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1> %pg, + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ret <vscale x 4 x i32> %out } -define <vscale x 2 x i64> @uhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { -; CHECK-LABEL: uhadd_i64: -; CHECK: uhadd z0.d, p0/m, z0.d, z1.d +define <vscale x 2 x i64> @sqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sqshl_i64: +; CHECK: sqshl z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhadd.nxv2i64(<vscale x 2 x i1> %pg, + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) ret <vscale x 2 x i64> %out } ; -; UHSUB +; SQSHLU ; -define <vscale x 16 x i8> @uhsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { -; CHECK-LABEL: uhsub_i8: -; CHECK: uhsub z0.b, p0/m, z0.b, z1.b +define <vscale x 16 x i8> @sqshlu_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { +; CHECK-LABEL: sqshlu_i8: +; CHECK: sqshlu z0.b, p0/m, z0.b, #2 ; CHECK-NEXT: ret - %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> %pg, + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + i32 2) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @sqshlu_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { +; CHECK-LABEL: sqshlu_i16: +; CHECK: sqshlu z0.h, p0/m, z0.h, #3 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + i32 3) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @sqshlu_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { +; CHECK-LABEL: sqshlu_i32: +; CHECK: sqshlu z0.s, p0/m, z0.s, #29 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + i32 29) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @sqshlu_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { +; CHECK-LABEL: sqshlu_i64: +; CHECK: sqshlu z0.d, p0/m, z0.d, #62 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + i32 62) + ret <vscale x 2 x i64> %out +} + +; +; SQSUB +; + +define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: sqsub_i8: +; CHECK: sqsub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) ret <vscale x 16 x i8> %out } -define <vscale x 8 x i16> @uhsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { -; CHECK-LABEL: uhsub_i16: -; CHECK: uhsub z0.h, p0/m, z0.h, z1.h +define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sqsub_i16: +; CHECK: sqsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> %pg, + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) ret <vscale x 8 x i16> %out } -define <vscale x 4 x i32> @uhsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { -; CHECK-LABEL: uhsub_i32: -; CHECK: uhsub z0.s, p0/m, z0.s, z1.s +define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sqsub_i32: +; CHECK: sqsub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> %pg, + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ret <vscale x 4 x i32> %out } -define <vscale x 2 x i64> @uhsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { -; CHECK-LABEL: uhsub_i64: -; CHECK: uhsub z0.d, p0/m, z0.d, z1.d +define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sqsub_i64: +; CHECK: sqsub z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> %pg, + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) ret <vscale x 2 x i64> %out } ; -; UHSUBR +; SQSUBR ; -define <vscale x 16 x i8> @uhsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { -; CHECK-LABEL: uhsubr_i8: -; CHECK: uhsubr z0.b, p0/m, z0.b, z1.b +define <vscale x 16 x i8> @sqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: sqsubr_i8: +; CHECK: sqsubr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret - %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> %pg, + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsubr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) ret <vscale x 16 x i8> %out } -define <vscale x 8 x i16> @uhsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { -; CHECK-LABEL: uhsubr_i16: -; CHECK: uhsubr z0.h, p0/m, z0.h, z1.h +define <vscale x 8 x i16> @sqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sqsubr_i16: +; CHECK: sqsubr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> %pg, + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsubr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) ret <vscale x 8 x i16> %out } -define <vscale x 4 x i32> @uhsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { -; CHECK-LABEL: uhsubr_i32: -; CHECK: uhsubr z0.s, p0/m, z0.s, z1.s +define <vscale x 4 x i32> @sqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sqsubr_i32: +; CHECK: sqsubr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> %pg, + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ret <vscale x 4 x i32> %out } -define <vscale x 2 x i64> @uhsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { -; CHECK-LABEL: uhsubr_i64: -; CHECK: uhsubr z0.d, p0/m, z0.d, z1.d +define <vscale x 2 x i64> @sqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sqsubr_i64: +; CHECK: sqsubr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> %pg, + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsubr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) ret <vscale x 2 x i64> %out } ; -; URECPE -; - -define <vscale x 4 x i32> @urecpe_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { -; CHECK-LABEL: urecpe_i32: -; CHECK: urecpe z0.s, p0/m, z1.s -; CHECK-NEXT: ret - %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, - <vscale x 4 x i1> %pg, - <vscale x 4 x i32> %b) - ret <vscale x 4 x i32> %out -} - -; -; URHADD +; SRHADD ; -define <vscale x 16 x i8> @urhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { -; CHECK-LABEL: urhadd_i8: -; CHECK: urhadd z0.b, p0/m, z0.b, z1.b +define <vscale x 16 x i8> @srhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: srhadd_i8: +; CHECK: srhadd z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret - %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1> %pg, + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) ret <vscale x 16 x i8> %out } -define <vscale x 8 x i16> @urhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { -; CHECK-LABEL: urhadd_i16: -; CHECK: urhadd z0.h, p0/m, z0.h, z1.h +define <vscale x 8 x i16> @srhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: srhadd_i16: +; CHECK: srhadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1> %pg, + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) ret <vscale x 8 x i16> %out } -define <vscale x 4 x i32> @urhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { -; CHECK-LABEL: urhadd_i32: -; CHECK: urhadd z0.s, p0/m, z0.s, z1.s +define <vscale x 4 x i32> @srhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: srhadd_i32: +; CHECK: srhadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1> %pg, + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ret <vscale x 4 x i32> %out } -define <vscale x 2 x i64> @urhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { -; CHECK-LABEL: urhadd_i64: -; CHECK: urhadd z0.d, p0/m, z0.d, z1.d +define <vscale x 2 x i64> @srhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: srhadd_i64: +; CHECK: srhadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1> %pg, - <vscale x 2 x i64> %a, - <vscale x 2 x i64> %b) + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) ret <vscale x 2 x i64> %out } ; -; URSQRTE +; SRI ; -define <vscale x 4 x i32> @ursqrte_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { -; CHECK-LABEL: ursqrte_i32: -; CHECK: ursqrte z0.s, p0/m, z1.s +define <vscale x 16 x i8> @sri_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: sri_i8: +; CHECK: sri z0.b, z1.b, #1 ; CHECK-NEXT: ret - %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> %a, - <vscale x 4 x i1> %pg, - <vscale x 4 x i32> %b) - ret <vscale x 4 x i32> %out + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sri.nxv16i8(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + i32 1) + ret <vscale x 16 x i8> %out } -declare <vscale x 16 x i8> @llvm.aarch64.sve.shadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) -declare <vscale x 8 x i16> @llvm.aarch64.sve.shadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) -declare <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) -declare <vscale x 2 x i64> @llvm.aarch64.sve.shadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) +define <vscale x 8 x i16> @sri_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: sri_i16: +; CHECK: sri z0.h, z1.h, #16 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sri.nxv8i16(<vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + i32 16) + ret <vscale x 8 x i16> %out +} -declare <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) -declare <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) -declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) -declare <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) +define <vscale x 4 x i32> @sri_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: sri_i32: +; CHECK: sri z0.s, z1.s, #32 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sri.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + i32 32); + ret <vscale x 4 x i32> %out +} -declare <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) -declare <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) -declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) -declare <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) +define <vscale x 2 x i64> @sri_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: sri_i64: +; CHECK: sri z0.d, z1.d, #64 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sri.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + i32 64) + ret <vscale x 2 x i64> %out +} -declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) -declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) -declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) -declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) +; +; SRSHL +; -declare <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) -declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) -declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) -declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) +define <vscale x 16 x i8> @srshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: srshl_i8: +; CHECK: srshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} -declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) -declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) -declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) +define <vscale x 8 x i16> @srshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: srshl_i16: +; CHECK: srshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @srshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: srshl_i32: +; CHECK: srshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @srshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: srshl_i64: +; CHECK: srshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; SRSHR +; + +define <vscale x 16 x i8> @srshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { +; CHECK-LABEL: srshr_i8: +; CHECK: srshr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshr.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + i32 8) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @srshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { +; CHECK-LABEL: srshr_i16: +; CHECK: srshr z0.h, p0/m, z0.h, #1 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshr.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + i32 1) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @srshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { +; CHECK-LABEL: srshr_i32: +; CHECK: srshr z0.s, p0/m, z0.s, #22 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshr.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + i32 22) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @srshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { +; CHECK-LABEL: srshr_i64: +; CHECK: srshr z0.d, p0/m, z0.d, #54 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshr.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + i32 54) + ret <vscale x 2 x i64> %out +} + +; +; SRSRA +; + +define <vscale x 16 x i8> @srsra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: srsra_i8: +; CHECK: srsra z0.b, z1.b, #2 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srsra.nxv16i8(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + i32 2) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @srsra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: srsra_i16: +; CHECK: srsra z0.h, z1.h, #15 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srsra.nxv8i16(<vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + i32 15) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @srsra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: srsra_i32: +; CHECK: srsra z0.s, z1.s, #12 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srsra.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + i32 12) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @srsra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: srsra_i64: +; CHECK: srsra z0.d, z1.d, #44 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srsra.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + i32 44) + ret <vscale x 2 x i64> %out +} + +; +; SSRA +; + +define <vscale x 16 x i8> @ssra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: ssra_i8: +; CHECK: ssra z0.b, z1.b, #3 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ssra.nxv16i8(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + i32 3) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @ssra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: ssra_i16: +; CHECK: ssra z0.h, z1.h, #14 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssra.nxv8i16(<vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + i32 14) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @ssra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: ssra_i32: +; CHECK: ssra z0.s, z1.s, #2 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssra.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + i32 2) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @ssra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: ssra_i64: +; CHECK: ssra z0.d, z1.d, #34 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssra.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + i32 34) + ret <vscale x 2 x i64> %out +} + +; +; SUQADD +; + +define <vscale x 16 x i8> @suqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: suqadd_i8: +; CHECK: suqadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.suqadd.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @suqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: suqadd_i16: +; CHECK: suqadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.suqadd.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @suqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: suqadd_i32: +; CHECK: suqadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @suqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: suqadd_i64: +; CHECK: suqadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.suqadd.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; UABA +; + +define <vscale x 16 x i8> @uaba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: uaba_i8: +; CHECK: uaba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uaba.nxv16i8(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %c) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uaba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: uaba_i16: +; CHECK: uaba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaba.nxv8i16(<vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + <vscale x 8 x i16> %c) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uaba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { +; CHECK-LABEL: uaba_i32: +; CHECK: uaba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaba.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + <vscale x 4 x i32> %c) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uaba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { +; CHECK-LABEL: uaba_i64: +; CHECK: uaba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaba.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + <vscale x 2 x i64> %c) + ret <vscale x 2 x i64> %out +} + +; +; UHADD +; + +define <vscale x 16 x i8> @uhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uhadd_i8: +; CHECK: uhadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uhadd_i16: +; CHECK: uhadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uhadd_i32: +; CHECK: uhadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uhadd_i64: +; CHECK: uhadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhadd.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; UHSUB +; + +define <vscale x 16 x i8> @uhsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uhsub_i8: +; CHECK: uhsub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uhsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uhsub_i16: +; CHECK: uhsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uhsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uhsub_i32: +; CHECK: uhsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uhsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uhsub_i64: +; CHECK: uhsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; UHSUBR +; + +define <vscale x 16 x i8> @uhsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uhsubr_i8: +; CHECK: uhsubr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uhsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uhsubr_i16: +; CHECK: uhsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uhsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uhsubr_i32: +; CHECK: uhsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uhsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uhsubr_i64: +; CHECK: uhsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; UQADD +; + +define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uqadd_i8: +; CHECK: uqadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uqadd_i16: +; CHECK: uqadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uqadd_i32: +; CHECK: uqadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uqadd_i64: +; CHECK: uqadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; UQRSHL +; + +define <vscale x 16 x i8> @uqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uqrshl_i8: +; CHECK: uqrshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uqrshl_i16: +; CHECK: uqrshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uqrshl_i32: +; CHECK: uqrshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uqrshl_i64: +; CHECK: uqrshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; UQSHL (Vectors) +; + +define <vscale x 16 x i8> @uqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uqshl_i8: +; CHECK: uqshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uqshl_i16: +; CHECK: uqshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uqshl_i32: +; CHECK: uqshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uqshl_i64: +; CHECK: uqshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; UQSUB +; + +define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uqsub_i8: +; CHECK: uqsub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uqsub_i16: +; CHECK: uqsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uqsub_i32: +; CHECK: uqsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uqsub_i64: +; CHECK: uqsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; UQSUBR +; + +define <vscale x 16 x i8> @uqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uqsubr_i8: +; CHECK: uqsubr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsubr.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uqsubr_i16: +; CHECK: uqsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsubr.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uqsubr_i32: +; CHECK: uqsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uqsubr_i64: +; CHECK: uqsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsubr.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; URECPE +; + +define <vscale x 4 x i32> @urecpe_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { +; CHECK-LABEL: urecpe_i32: +; CHECK: urecpe z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i1> %pg, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +; +; URHADD +; + +define <vscale x 16 x i8> @urhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: urhadd_i8: +; CHECK: urhadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @urhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: urhadd_i16: +; CHECK: urhadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @urhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: urhadd_i32: +; CHECK: urhadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @urhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: urhadd_i64: +; CHECK: urhadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; URSHL +; + +define <vscale x 16 x i8> @urshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: urshl_i8: +; CHECK: urshl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @urshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: urshl_i16: +; CHECK: urshl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @urshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: urshl_i32: +; CHECK: urshl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @urshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: urshl_i64: +; CHECK: urshl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; URSHR +; + +define <vscale x 16 x i8> @urshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { +; CHECK-LABEL: urshr_i8: +; CHECK: urshr z0.b, p0/m, z0.b, #4 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshr.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + i32 4) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @urshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { +; CHECK-LABEL: urshr_i16: +; CHECK: urshr z0.h, p0/m, z0.h, #13 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshr.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + i32 13) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @urshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { +; CHECK-LABEL: urshr_i32: +; CHECK: urshr z0.s, p0/m, z0.s, #1 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshr.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + i32 1) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @urshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { +; CHECK-LABEL: urshr_i64: +; CHECK: urshr z0.d, p0/m, z0.d, #24 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshr.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + i32 24) + ret <vscale x 2 x i64> %out +} + +; +; URSQRTE +; + +define <vscale x 4 x i32> @ursqrte_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { +; CHECK-LABEL: ursqrte_i32: +; CHECK: ursqrte z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i1> %pg, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +; +; URSRA +; + +define <vscale x 16 x i8> @ursra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: ursra_i8: +; CHECK: ursra z0.b, z1.b, #5 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ursra.nxv16i8(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + i32 5) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @ursra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: ursra_i16: +; CHECK: ursra z0.h, z1.h, #12 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ursra.nxv8i16(<vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + i32 12) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @ursra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: ursra_i32: +; CHECK: ursra z0.s, z1.s, #31 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursra.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + i32 31) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @ursra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: ursra_i64: +; CHECK: ursra z0.d, z1.d, #14 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ursra.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + i32 14) + ret <vscale x 2 x i64> %out +} + +; +; USQADD +; + +define <vscale x 16 x i8> @usqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: usqadd_i8: +; CHECK: usqadd z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usqadd.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @usqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: usqadd_i16: +; CHECK: usqadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usqadd.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @usqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: usqadd_i32: +; CHECK: usqadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @usqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: usqadd_i64: +; CHECK: usqadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usqadd.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; USRA +; + +define <vscale x 16 x i8> @usra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: usra_i8: +; CHECK: usra z0.b, z1.b, #6 +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, + i32 6) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @usra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: usra_i16: +; CHECK: usra z0.h, z1.h, #11 +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16> %a, + <vscale x 8 x i16> %b, + i32 11) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @usra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: usra_i32: +; CHECK: usra z0.s, z1.s, #21 +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i32> %b, + i32 21) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @usra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: usra_i64: +; CHECK: usra z0.d, z1.d, #4 +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 2 x i64> %b, + i32 4) + ret <vscale x 2 x i64> %out +} + +declare <vscale x 16 x i8> @llvm.aarch64.sve.saba.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.saba.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.saba.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.saba.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.shadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.shadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.shadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sli.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sli.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sli.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sli.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) declare <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) declare <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) @@ -839,11 +1954,71 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + declare <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) declare <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) declare <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) declare <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare <vscale x 16 x i8> @llvm.aarch64.sve.sri.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.sri.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.sri.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sri.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.srshr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.srshr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.srshr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.srshr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.srsra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.srsra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.srsra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.srsra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.ssra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.ssra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.ssra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.ssra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.suqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.suqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.suqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.uaba.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.uaba.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.uaba.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.uaba.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) + declare <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) declare <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) declare <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) @@ -859,6 +2034,31 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) declare <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) declare <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) @@ -866,4 +2066,29 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) declare <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.urshr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.urshr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.urshr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.urshr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32) + declare <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.ursra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.ursra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.ursra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.ursra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.usqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.usqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.usqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) +declare <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) +declare <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2985,7 +2985,8 @@ let Constraints = "$Zd = $_Zd"; } -multiclass sve2_int_bin_shift_imm_left<bit opc, string asm> { +multiclass sve2_int_bin_shift_imm_left<bit opc, string asm, + SDPatternOperator op> { def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{19} = imm{3}; @@ -2997,9 +2998,15 @@ let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } + + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>; } -multiclass sve2_int_bin_shift_imm_right<bit opc, string asm> { +multiclass sve2_int_bin_shift_imm_right<bit opc, string asm, + SDPatternOperator op> { def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; @@ -3011,6 +3018,11 @@ let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } + + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; } class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm, @@ -3036,7 +3048,8 @@ let ElementSize = ElementSizeNone; } -multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm> { +multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm, + SDPatternOperator op> { def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; @@ -3048,6 +3061,11 @@ let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } + + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>; } class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty> @@ -3098,11 +3116,16 @@ let ElementSize = ElementSizeNone; } -multiclass sve2_int_absdiff_accum<bit opc, string asm> { +multiclass sve2_int_absdiff_accum<bit opc, string asm, SDPatternOperator op> { def _B : sve2_int_absdiff_accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>; def _H : sve2_int_absdiff_accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>; def _S : sve2_int_absdiff_accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>; def _D : sve2_int_absdiff_accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>; + + def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; } multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm> { @@ -4388,8 +4411,7 @@ // SVE Bitwise Shift - Predicated Group //===----------------------------------------------------------------------===// class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm, - ZPRRegOp zprty, Operand immtype, - ElementSizeEnum size> + ZPRRegOp zprty, Operand immtype> : I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm), asm, "\t$Zdn, $Pg/m, $_Zdn, $imm", "", @@ -4409,41 +4431,53 @@ let Constraints = "$Zdn = $_Zdn"; let DestructiveInstType = Destructive; - let ElementSize = size; + let ElementSize = zprty.ElementSize; } multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm> { - def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8, - ElementSizeB>; - def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16, - ElementSizeH> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{8} = imm{3}; } - def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32, - ElementSizeS> { + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { let Inst{9-8} = imm{4-3}; } - def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64, - ElementSizeD> { + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } } +multiclass sve2_int_bin_pred_shift_imm_left<bits<4> opc, string asm, + SDPatternOperator op> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { + let Inst{8} = imm{3}; + } + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { + let Inst{9-8} = imm{4-3}; + } + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { + let Inst{22} = imm{5}; + let Inst{9-8} = imm{4-3}; + } + + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>; +} + multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, SDPatternOperator op = null_frag> { - def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8, - ElementSizeB>; - def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16, - ElementSizeH> { + def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{8} = imm{3}; } - def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32, - ElementSizeS> { + def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{9-8} = imm{4-3}; } - def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64, - ElementSizeD> { + def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; } Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1530,35 +1530,35 @@ defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg", int_aarch64_sve_sqneg>; // SVE2 saturating add/subtract - defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", null_frag>; - defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", null_frag>; - defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", null_frag>; - defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", null_frag>; - defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", null_frag>; - defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", null_frag>; - defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", null_frag>; - defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", null_frag>; + defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", int_aarch64_sve_sqadd>; + defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", int_aarch64_sve_uqadd>; + defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", int_aarch64_sve_sqsub>; + defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", int_aarch64_sve_uqsub>; + defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", int_aarch64_sve_suqadd>; + defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", int_aarch64_sve_usqadd>; + defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", int_aarch64_sve_sqsubr>; + defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", int_aarch64_sve_uqsubr>; // SVE2 saturating/rounding bitwise shift left (predicated) - defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", null_frag>; - defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", null_frag>; + defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl>; + defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl>; defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag>; defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag>; - defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", null_frag>; - defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", null_frag>; - defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", null_frag>; - defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", null_frag>; + defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl>; + defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl>; + defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl>; + defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl>; defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag>; defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag>; defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag>; defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>; // SVE2 predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">; - defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">; + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", int_aarch64_sve_srshr>; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", int_aarch64_sve_urshr>; + defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", int_aarch64_sve_sqshlu>; // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">; @@ -1595,22 +1595,22 @@ defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">; // SVE2 bitwise shift and insert - defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">; - defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">; + defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri", int_aarch64_sve_sri>; + defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", int_aarch64_sve_sli>; // SVE2 bitwise shift right and accumulate - defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">; - defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">; - defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">; - defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">; + defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", int_aarch64_sve_ssra>; + defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", int_aarch64_sve_usra>; + defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra>; + defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra>; // SVE2 complex integer add defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">; defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd">; // SVE2 integer absolute difference and accumulate - defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba">; - defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba">; + defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba", int_aarch64_sve_saba>; + defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba", int_aarch64_sve_uaba>; // SVE2 integer absolute difference and accumulate long defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb">; Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -692,6 +692,37 @@ let ParserMatchClass = Imm0_63Operand; } +// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant +// (ImmLeaf) +def tvecshiftL8 : Operand<i32>, TImmLeaf<i32, [{ + return (((uint32_t)Imm) < 8); +}]> { + let EncoderMethod = "getVecShiftL8OpValue"; + let DecoderMethod = "DecodeVecShiftL8Imm"; + let ParserMatchClass = Imm0_7Operand; +} +def tvecshiftL16 : Operand<i32>, TImmLeaf<i32, [{ + return (((uint32_t)Imm) < 16); +}]> { + let EncoderMethod = "getVecShiftL16OpValue"; + let DecoderMethod = "DecodeVecShiftL16Imm"; + let ParserMatchClass = Imm0_15Operand; +} +def tvecshiftL32 : Operand<i32>, TImmLeaf<i32, [{ + return (((uint32_t)Imm) < 32); +}]> { + let EncoderMethod = "getVecShiftL32OpValue"; + let DecoderMethod = "DecodeVecShiftL32Imm"; + let ParserMatchClass = Imm0_31Operand; +} +def tvecshiftL64 : Operand<i32>, TImmLeaf<i32, [{ + return (((uint32_t)Imm) < 64); +}]> { + let EncoderMethod = "getVecShiftL64OpValue"; + let DecoderMethod = "DecodeVecShiftL64Imm"; + let ParserMatchClass = Imm0_63Operand; +} + // Crazy immediate formats used by 32-bit and 64-bit logical immediate // instructions for splatting repeating bit patterns across the immediate. def logical_imm32_XFORM : SDNodeXForm<imm, [{ Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1659,10 +1659,13 @@ // SVE2 - Uniform DSP operations // +def int_aarch64_sve_saba : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_sve_shadd : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_shsub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_shsubr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sli : AdvSIMD_2VectorArgIndexed_Intrinsic; def int_aarch64_sve_sqabs : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_sqadd : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_sqdmulh : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_sve_sqdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; def int_aarch64_sve_sqneg : AdvSIMD_Merged1VectorArg_Intrinsic; @@ -1672,13 +1675,35 @@ def int_aarch64_sve_sqrdmlsh_lane : AdvSIMD_3VectorArgIndexed_Intrinsic; def int_aarch64_sve_sqrdmulh : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_sve_sqrdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_sqrshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sqshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sqshlu : AdvSIMD_SVE_ShiftByImm_Intrinsic; +def int_aarch64_sve_sqsub : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sqsubr : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_srhadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_sri : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_srshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_srshr : AdvSIMD_SVE_ShiftByImm_Intrinsic; +def int_aarch64_sve_srsra : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_ssra : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_suqadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uaba : AdvSIMD_3VectorArg_Intrinsic; def int_aarch64_sve_uhadd : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_uhsub : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_uhsubr : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uqadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uqrshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uqshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uqsub : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_uqsubr : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_urecpe : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_urhadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_urshl : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_urshr : AdvSIMD_SVE_ShiftByImm_Intrinsic; def int_aarch64_sve_ursqrte : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_ursra : AdvSIMD_2VectorArgIndexed_Intrinsic; +def int_aarch64_sve_usqadd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_usra : AdvSIMD_2VectorArgIndexed_Intrinsic; // // SVE2 - Non-widening pairwise arithmetic
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits