Bobby Bruce has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/70736?usp=email )
(
7 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the
submitted one.
)Change subject: arch-arm: Add support for Armv8.2-DotProd NEON extension.
......................................................................
arch-arm: Add support for Armv8.2-DotProd NEON extension.
Add support for the Armv8.2-DotProd NEON extension. This provides the
SDOT and UDOT SIMD Dot Product instructions.
For more information please refer to the Arm Architecture Reference
Manual (https://developer.arm.com/documentation/ddi0487/latest/).
Change-Id: I4caa3b97a74c65f32421487c55c3e36427194e61
Reviewed-by: Richard Cooper <richard.coo...@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70736
Maintainer: Jason Lowe-Power <power...@gmail.com>
Reviewed-by: Andreas Sandberg <andreas.sandb...@arm.com>
Maintainer: Andreas Sandberg <andreas.sandb...@arm.com>
Reviewed-by: Jason Lowe-Power <power...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
---
M src/arch/arm/ArmISA.py
M src/arch/arm/ArmSystem.py
M src/arch/arm/isa/formats/neon64.isa
M src/arch/arm/isa/insts/neon64.isa
M src/arch/arm/regs/misc.cc
5 files changed, 107 insertions(+), 0 deletions(-)
Approvals:
Andreas Sandberg: Looks good to me, approved; Looks good to me, approved
Jason Lowe-Power: Looks good to me, but someone else must approve; Looks
good to me, approved
kokoro: Regressions pass
diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py
index ffe63eb..8c1ee5a 100644
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -57,6 +57,7 @@
"FEAT_F64MM",
"FEAT_SVE",
"FEAT_I8MM",
+ "FEAT_DOTPROD",
# Armv8.3
"FEAT_FCMA",
"FEAT_JSCVT",
diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index c5c0f43..eaaf4b1 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -81,6 +81,7 @@
"FEAT_F32MM", # Optional in Armv8.2
"FEAT_F64MM", # Optional in Armv8.2
"FEAT_I8MM", # Optional in Armv8.2
+ "FEAT_DOTPROD", # Optional in Armv8.2
# Armv8.3
"FEAT_FCMA",
"FEAT_JSCVT",
@@ -169,6 +170,7 @@
"FEAT_F32MM",
"FEAT_F64MM",
"FEAT_I8MM",
+ "FEAT_DOTPROD",
# Armv8.3
"FEAT_FCMA",
"FEAT_JSCVT",
@@ -205,6 +207,7 @@
"FEAT_F32MM",
"FEAT_F64MM",
"FEAT_I8MM",
+ "FEAT_DOTPROD",
]
diff --git a/src/arch/arm/isa/formats/neon64.isa
b/src/arch/arm/isa/formats/neon64.isa
index 5cce0d7..e083f6f 100644
--- a/src/arch/arm/isa/formats/neon64.isa
+++ b/src/arch/arm/isa/formats/neon64.isa
@@ -510,6 +510,7 @@
decodeNeon3RegExtension(ExtMachInst machInst)
{
uint8_t q = bits(machInst, 30);
+ uint8_t qu = bits(machInst, 30, 29);
uint8_t size = bits(machInst, 23, 22);
uint8_t opcode = bits(machInst, 15, 11);
@@ -532,6 +533,19 @@
else
return decodeNeonSThreeHAndWReg<SqrdmlshDX>(
size, machInst, vd, vn, vm);
+ case 0x12:
+ switch (qu) {
+ case 0b00:
+ return new SdotDX<int32_t>(machInst, vd, vn, vm);
+ case 0b01:
+ return new UdotDX<uint32_t>(machInst, vd, vn, vm);
+ case 0b10:
+ return new SdotQX<int32_t>(machInst, vd, vn, vm);
+ case 0b11:
+ return new UdotQX<uint32_t>(machInst, vd, vn, vm);
+ default:
+ return new Unknown64(machInst);
+ }
case 0x18:
case 0x19:
case 0x1a:
@@ -1351,6 +1365,7 @@
{
uint8_t q = bits(machInst, 30);
uint8_t u = bits(machInst, 29);
+ uint8_t qu = bits(machInst, 30, 29);
uint8_t size = bits(machInst, 23, 22);
uint8_t L = bits(machInst, 21);
uint8_t M = bits(machInst, 20);
@@ -1387,6 +1402,11 @@
}
RegIndex vm_fp = (RegIndex) (uint8_t) (vmh << 4 | vm_bf);
+ // Index and 2nd register operand for FEAT_DOTPROD and
+ // FEAT_I8MM instructions
+ uint8_t index_dp = (H << 1) | L;
+ RegIndex vm_dp = (RegIndex) (uint8_t) (M << 4 | vm_bf);
+
switch (opcode) {
case 0x0:
if (!u || (size == 0x0 || size == 0x3))
@@ -1573,6 +1593,23 @@
case 0xf:
return decodeNeonSThreeImmHAndWReg<SqrdmlshElemDX,
SqrdmlshElemQX>(
q, size, machInst, vd, vn, vm, index);
+ case 0xe:
+ switch (qu) {
+ case 0b00:
+ return new SdotElemDX<int32_t>(machInst,
+ vd, vn, vm_dp, index_dp);
+ case 0b01:
+ return new UdotElemDX<uint32_t>(machInst,
+ vd, vn, vm_dp, index_dp);
+ case 0b10:
+ return new SdotElemQX<int32_t>(machInst,
+ vd, vn, vm_dp, index_dp);
+ case 0b11:
+ return new UdotElemQX<uint32_t>(machInst,
+ vd, vn, vm_dp, index_dp);
+ default:
+ return new Unknown64(machInst);
+ }
default:
return new Unknown64(machInst);
}
diff --git a/src/arch/arm/isa/insts/neon64.isa
b/src/arch/arm/isa/insts/neon64.isa
index 0da7f06..53c0f11 100644
--- a/src/arch/arm/isa/insts/neon64.isa
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -1082,6 +1082,71 @@
complex=True)
threeEqualRegInstX("fcmla", "FcmlaQX", "SimdFloatMultAccOp",
floatTypes, 4, fcmla_vec, True, complex=True)
+
+ def intDotInst(name, Name, opClass,
+ destIsSigned, src1IsSigned, src2IsSigned,
+ rCount, byElem):
+ destType = "int32_t" if destIsSigned else "uint32_t"
+ src1Type = "int8_t" if src1IsSigned else "uint8_t"
+ src2Type = "int8_t" if src2IsSigned else "uint8_t"
+ dotCode = '''
+ using Src1Element = %(src1Type)s;
+ using Src2Element = %(src2Type)s;
+
+ // Neon dot instructions always generate one output element
+ // from 4 pairs of source elements.
+ static_assert(sizeof(Element) == 4 * sizeof(Src1Element));
+ static_assert(sizeof(Element) == 4 * sizeof(Src2Element));
+
+ // Extended source element types to avoid overflow of intermediate
+ // calculations.
+ using ExtendedSrc1Element =
+ typename vector_element_traits::
+ extend_element<Element, Src1Element>::type;
+ using ExtendedSrc2Element =
+ typename vector_element_traits::
+ extend_element<Element, Src2Element>::type;
+
+ for (unsigned i = 0; i < eCount; ++i) {
+ Element src1ElemsPacked = letoh(srcReg1.elements[i]);
+ Element src2ElemsPacked =
letoh(srcReg2.elements[%(src2Index)s]);
+
+ Src1Element *src1Elems =
+ reinterpret_cast<Src1Element*>(&src1ElemsPacked);
+ Src2Element *src2Elems =
+ reinterpret_cast<Src2Element*>(&src2ElemsPacked);
+
+ // Dot instructions accumulate into the dest reg
+ Element destElem = letoh(destReg.elements[i]);
+
+ for (unsigned j = 0; j < 4; ++j) {
+ ExtendedSrc1Element src1Elem =
+ static_cast<ExtendedSrc1Element>(src1Elems[j]);
+ ExtendedSrc2Element src2Elem =
+ static_cast<ExtendedSrc2Element>(src2Elems[j]);
+ destElem += src1Elem * src2Elem;
+ }
+ destReg.elements[i] = htole(destElem);
+ }
+ ''' % dict(src1Type=src1Type, src2Type=src2Type,
+ src2Index="imm" if byElem else "i")
+ threeEqualRegInstX(name, Name, opClass, (destType,), rCount,
+ dotCode, readDest=True, byElem=byElem,
+ complex=True)
+
+ # SDOT (vector)
+ intDotInst('sdot', 'SdotDX', 'SimdAluOp', True, True, True, 2, False)
+ intDotInst('sdot', 'SdotQX', 'SimdAluOp', True, True, True, 4, False)
+ # SDOT (element)
+ intDotInst('sdot', 'SdotElemDX', 'SimdAluOp', True, True, True, 2,
True)
+ intDotInst('sdot', 'SdotElemQX', 'SimdAluOp', True, True, True, 4,
True)
+ # UDOT (vector)
+ intDotInst('udot', 'UdotDX', 'SimdAluOp', False, False, False, 2,
False)
+ intDotInst('udot', 'UdotQX', 'SimdAluOp', False, False, False, 4,
False)
+ # UDOT (element)
+ intDotInst('udot', 'UdotElemDX', 'SimdAluOp', False, False, False, 2,
True)
+ intDotInst('udot', 'UdotElemQX', 'SimdAluOp', False, False, False, 4,
True)
+
# CLS
clsCode = '''
unsigned count = 0;
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index b978044..ed15f25 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -3988,6 +3988,7 @@
isar0_el1.sha1 = 0;
isar0_el1.aes = 0;
}
+ isar0_el1.dp = release->has(ArmExtension::FEAT_DOTPROD) ? 0x1 :
0x0;
isar0_el1.atomic = release->has(ArmExtension::FEAT_LSE) ? 0x2 :
0x0;
isar0_el1.rdm = release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0;
isar0_el1.tme = release->has(ArmExtension::TME) ? 0x1 : 0x0;
--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70736?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings?usp=email
Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I4caa3b97a74c65f32421487c55c3e36427194e61
Gerrit-Change-Number: 70736
Gerrit-PatchSet: 9
Gerrit-Owner: Giacomo Travaglini <giacomo.travagl...@arm.com>
Gerrit-Reviewer: Andreas Sandberg <andreas.sandb...@arm.com>
Gerrit-Reviewer: Bobby Bruce <bbr...@ucdavis.edu>
Gerrit-Reviewer: Jason Lowe-Power <power...@gmail.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-CC: Richard Cooper <richard.coo...@arm.com>
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org