kmclaughlin created this revision. kmclaughlin added reviewers: sdesmalen, SjoerdMeijer, greened. Herald added subscribers: psnobl, rkruppe, kristof.beyls, tschuett. Herald added a reviewer: rengolin. kmclaughlin added a parent revision: D67549: [IntrinsicEmitter] Add overloaded types for SVE intrinsics (Subdivide2 & Subdivide4).
Implements the following arithmetic intrinsics: - int_aarch64_sve_sdot - int_aarch64_sve_sdot_lane - int_aarch64_sve_udot - int_aarch64_sve_udot_lane This patch includes tests for the Subdivide4Argument type added by D67549 <https://reviews.llvm.org/D67549> https://reviews.llvm.org/D67551 Files: include/llvm/IR/IntrinsicsAArch64.td lib/Target/AArch64/AArch64InstrFormats.td lib/Target/AArch64/AArch64SVEInstrInfo.td lib/Target/AArch64/SVEInstrFormats.td test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
Index: test/CodeGen/AArch64/sve-intrinsics-int-arith.ll =================================================================== --- test/CodeGen/AArch64/sve-intrinsics-int-arith.ll +++ test/CodeGen/AArch64/sve-intrinsics-int-arith.ll @@ -88,6 +88,87 @@ ret <vscale x 2 x i64> %out } +; SDOT + +define <vscale x 4 x i32> @sdot_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: sdot_i32: +; CHECK: sdot z0.s, z1.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %c) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @sdot_i64(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: sdot_i64: +; CHECK: sdot z0.d, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 8 x i16> %b, + <vscale x 8 x i16> %c) + ret <vscale x 2 x i64> %out +} + +; SDOT (Indexed) + +define <vscale x 4 x i32> @sdot_lane_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: sdot_lane_i32: +; CHECK: sdot z0.s, z1.b, z2.b[2] +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %c, + i32 2) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @sdot_lane_i64(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: sdot_lane_i64: +; CHECK: sdot z0.d, z1.h, z2.h[1] +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 8 x i16> %b, + <vscale x 8 x i16> %c, + i32 1) + ret <vscale x 2 x i64> %out +} + +; UDOT + +define <vscale x 4 x i32> @udot_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: udot_i32: +; CHECK: udot z0.s, z1.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.udot.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %c) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @udot_i64(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { +; CHECK-LABEL: udot_i64: +; CHECK: udot z0.d, z1.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.udot.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 8 x i16> %b, + <vscale x 8 x i16> %c) + ret <vscale x 2 x i64> %out +} + +; UDOT (Indexed) + +define <vscale x 4 x i32> @udot_lane_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: udot_lane_i32: +; CHECK: udot z0.s, z1.b, z2.b[2] +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %c, + i32 2) + ret <vscale x 4 x i32> %out +} + declare <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) declare <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) declare <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) @@ -97,3 +178,15 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) declare <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) declare <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) + +declare <vscale x 4 x i32> @llvm.aarch64.sve.sdot.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sdot.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>) + +declare <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32) + +declare <vscale x 4 x i32> @llvm.aarch64.sve.udot.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.udot.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>) + +declare <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32) +declare <vscale x 2 x i64> @llvm.aarch64.sve.udot.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32) Index: lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- lib/Target/AArch64/SVEInstrFormats.td +++ lib/Target/AArch64/SVEInstrFormats.td @@ -2017,12 +2017,14 @@ let Constraints = "$Zda = $_Zda"; let DestructiveInstType = Destructive; - let ElementSize = zprty1.ElementSize; } -multiclass sve_intx_dot<bit opc, string asm> { +multiclass sve_intx_dot<bit opc, string asm, SDPatternOperator op> { def _S : sve_intx_dot<0b0, opc, asm, ZPR32, ZPR8>; def _D : sve_intx_dot<0b1, opc, asm, ZPR64, ZPR16>; + + def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -2047,22 +2049,27 @@ let Constraints = "$Zda = $_Zda"; let DestructiveInstType = Destructive; - let ElementSize = ElementSizeNone; } -multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> { - def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> { +multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm, + SDPatternOperator op> { + def _S : sve_intx_dot_by_indexed_elem<0b0, opc, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS32b> { bits<2> iop; bits<3> Zm; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> { + def _D : sve_intx_dot_by_indexed_elem<0b1, opc, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD32b> { bits<1> iop; bits<4> Zm; let Inst{20} = iop; let Inst{19-16} = Zm; } + + def : Pat<(nxv4i32 (op nxv4i32:$Op1, nxv16i8:$Op2, nxv16i8:$Op3, (i32 VectorIndexS32b:$idx))), + (!cast<Instruction>(NAME # _S) $Op1, $Op2, $Op3, VectorIndexS32b:$idx)>; + def : Pat<(nxv2i64 (op nxv2i64:$Op1, nxv8i16:$Op2, nxv8i16:$Op3, (i32 VectorIndexD32b:$idx))), + (!cast<Instruction>(NAME # _D) $Op1, $Op2, $Op3, VectorIndexD32b:$idx)>; } //===----------------------------------------------------------------------===// Index: lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64SVEInstrInfo.td +++ lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -82,11 +82,11 @@ defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">; defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">; - defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot">; - defm UDOT_ZZZ : sve_intx_dot<0b1, "udot">; + defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>; + defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>; - defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot">; - defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot">; + defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>; + defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>; defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb">; defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb">; Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -1011,8 +1011,8 @@ let RenderMethod = "addVectorIndexOperands"; } -class AsmVectorIndexOpnd<AsmOperandClass mc, code pred> - : Operand<i64>, ImmLeaf<i64, pred> { +class AsmVectorIndexOpnd<ValueType ty, AsmOperandClass mc, code pred> + : Operand<ty>, ImmLeaf<ty, pred> { let ParserMatchClass = mc; let PrintMethod = "printVectorIndex"; } @@ -1023,11 +1023,17 @@ def VectorIndexSOperand : AsmVectorIndex<0, 3>; def VectorIndexDOperand : AsmVectorIndex<0, 1>; -def VectorIndex1 : AsmVectorIndexOpnd<VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>; -def VectorIndexB : AsmVectorIndexOpnd<VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>; -def VectorIndexH : AsmVectorIndexOpnd<VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>; -def VectorIndexS : AsmVectorIndexOpnd<VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>; -def VectorIndexD : AsmVectorIndexOpnd<VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>; +def VectorIndex1 : AsmVectorIndexOpnd<i64, VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>; +def VectorIndexB : AsmVectorIndexOpnd<i64, VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>; +def VectorIndexH : AsmVectorIndexOpnd<i64, VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>; +def VectorIndexS : AsmVectorIndexOpnd<i64, VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>; +def VectorIndexD : AsmVectorIndexOpnd<i64, VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>; + +def VectorIndex132b : AsmVectorIndexOpnd<i32, VectorIndex1Operand, [{ return ((uint64_t)Imm) == 1; }]>; +def VectorIndexB32b : AsmVectorIndexOpnd<i32, VectorIndexBOperand, [{ return ((uint64_t)Imm) < 16; }]>; +def VectorIndexH32b : AsmVectorIndexOpnd<i32, VectorIndexHOperand, [{ return ((uint64_t)Imm) < 8; }]>; +def VectorIndexS32b : AsmVectorIndexOpnd<i32, VectorIndexSOperand, [{ return ((uint64_t)Imm) < 4; }]>; +def VectorIndexD32b : AsmVectorIndexOpnd<i32, VectorIndexDOperand, [{ return ((uint64_t)Imm) < 2; }]>; def SVEVectorIndexExtDupBOperand : AsmVectorIndex<0, 63, "SVE">; def SVEVectorIndexExtDupHOperand : AsmVectorIndex<0, 31, "SVE">; @@ -1036,15 +1042,15 @@ def SVEVectorIndexExtDupQOperand : AsmVectorIndex<0, 3, "SVE">; def sve_elm_idx_extdup_b - : AsmVectorIndexOpnd<SVEVectorIndexExtDupBOperand, [{ return ((uint64_t)Imm) < 64; }]>; + : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupBOperand, [{ return ((uint64_t)Imm) < 64; }]>; def sve_elm_idx_extdup_h - : AsmVectorIndexOpnd<SVEVectorIndexExtDupHOperand, [{ return ((uint64_t)Imm) < 32; }]>; + : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupHOperand, [{ return ((uint64_t)Imm) < 32; }]>; def sve_elm_idx_extdup_s - : AsmVectorIndexOpnd<SVEVectorIndexExtDupSOperand, [{ return ((uint64_t)Imm) < 16; }]>; + : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupSOperand, [{ return ((uint64_t)Imm) < 16; }]>; def sve_elm_idx_extdup_d - : AsmVectorIndexOpnd<SVEVectorIndexExtDupDOperand, [{ return ((uint64_t)Imm) < 8; }]>; + : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupDOperand, [{ return ((uint64_t)Imm) < 8; }]>; def sve_elm_idx_extdup_q - : AsmVectorIndexOpnd<SVEVectorIndexExtDupQOperand, [{ return ((uint64_t)Imm) < 4; }]>; + : AsmVectorIndexOpnd<i64, SVEVectorIndexExtDupQOperand, [{ return ((uint64_t)Imm) < 4; }]>; // 8-bit immediate for AdvSIMD where 64-bit values of the form: // aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh Index: include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- include/llvm/IR/IntrinsicsAArch64.td +++ include/llvm/IR/IntrinsicsAArch64.td @@ -768,6 +768,21 @@ LLVMMatchType<0>], [IntrNoMem]>; + class AdvSIMD_SVE_DOT_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide4VectorType<0>, + LLVMSubdivide4VectorType<0>], + [IntrNoMem]>; + + class AdvSIMD_SVE_DOT_Indexed_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMSubdivide4VectorType<0>, + LLVMSubdivide4VectorType<0>, + llvm_i32_ty], + [IntrNoMem]>; + // This class of intrinsics are not intended to be useful within LLVM IR but // are instead here to support some of the more regid parts of the ACLE. class Builtin_SVCVT<string name, LLVMType OUT, LLVMType IN> @@ -787,6 +802,12 @@ def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic; +def int_aarch64_sve_sdot : AdvSIMD_SVE_DOT_Intrinsic; +def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; + +def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic; +def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic; + // // Floating-point comparisons //
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits