Bobby Bruce has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/70728?usp=email )
(
6 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the
submitted one.
)Change subject: arch-arm: Added 128-bit encodings of SVE TRN, UZP, and
ZIP insts.
......................................................................
arch-arm: Added 128-bit encodings of SVE TRN, UZP, and ZIP insts.
Add support for the 128-bit element encodings of the TRN1, TRN2, UZP1,
UZP2, ZIP1, and ZIP2 instructions, required by the Armv8.2 SVE
Double-precision floating-point Matrix Multiplication
instructions (ARMv8.2-F64MM).
For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)
Change-Id: I496576340c48410fedb2cf6fc7d1a02e219b3bd4
Reviewed-by: Richard Cooper <richard.coo...@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70728
Maintainer: Jason Lowe-Power <power...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
Reviewed-by: Jason Lowe-Power <power...@gmail.com>
Reviewed-by: Andreas Sandberg <andreas.sandb...@arm.com>
Reviewed-by: Giacomo Travaglini <giacomo.travagl...@arm.com>
Maintainer: Andreas Sandberg <andreas.sandb...@arm.com>
---
M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/formats/sve_top_level.isa
M src/arch/arm/isa/insts/sve.isa
3 files changed, 108 insertions(+), 35 deletions(-)
Approvals:
Andreas Sandberg: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass
Jason Lowe-Power: Looks good to me, but someone else must approve; Looks
good to me, approved
Giacomo Travaglini: Looks good to me, approved
diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa
b/src/arch/arm/isa/formats/sve_2nd_level.isa
index f74181a..3d211bc 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -1145,29 +1145,31 @@
} // decodeSvePermPredicates
StaticInstPtr
- decodeSvePermIntlv(ExtMachInst machInst)
+ decodeSvePermIntlv(ExtMachInst machInst, bool f64mm)
{
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
- uint8_t size = bits(machInst, 23, 22);
+ uint8_t size = f64mm ? 4 : (uint8_t)bits(machInst, 23, 22);
uint8_t opc = bits(machInst, 12, 10);
switch (opc) {
case 0x0:
- return decodeSveBinUnpredU<SveZip1>(size, machInst, zd, zn,
zm);
+ return decodeSveBinUnpredUQ<SveZip1>(size, machInst, zd, zn,
zm);
case 0x1:
- return decodeSveBinUnpredU<SveZip2>(size, machInst, zd, zn,
zm);
+ return decodeSveBinUnpredUQ<SveZip2>(size, machInst, zd, zn,
zm);
case 0x2:
- return decodeSveBinUnpredU<SveUzp1>(size, machInst, zd, zn,
zm);
+ return decodeSveBinUnpredUQ<SveUzp1>(size, machInst, zd, zn,
zm);
case 0x3:
- return decodeSveBinUnpredU<SveUzp2>(size, machInst, zd, zn,
zm);
+ return decodeSveBinUnpredUQ<SveUzp2>(size, machInst, zd, zn,
zm);
case 0x4:
- return decodeSveBinUnpredU<SveTrn1>(size, machInst, zd, zn,
zm);
+ case 0x6:
+ return decodeSveBinUnpredUQ<SveTrn1>(size, machInst, zd, zn,
zm);
case 0x5:
- return decodeSveBinUnpredU<SveTrn2>(size, machInst, zd, zn,
zm);
+ case 0x7:
+ return decodeSveBinUnpredUQ<SveTrn2>(size, machInst, zd, zn,
zm);
}
return new Unknown64(machInst);
} // decodeSvePermIntlv
diff --git a/src/arch/arm/isa/formats/sve_top_level.isa
b/src/arch/arm/isa/formats/sve_top_level.isa
index b0579fb..61f2f5c 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -57,7 +57,7 @@
StaticInstPtr decodeSvePermExtract(ExtMachInst machInst);
StaticInstPtr decodeSvePermUnpred(ExtMachInst machInst);
StaticInstPtr decodeSvePermPredicates(ExtMachInst machInst);
- StaticInstPtr decodeSvePermIntlv(ExtMachInst machInst);
+ StaticInstPtr decodeSvePermIntlv(ExtMachInst machInst, bool f64mm);
StaticInstPtr decodeSvePermPred(ExtMachInst machInst);
StaticInstPtr decodeSveSelVec(ExtMachInst machInst);
StaticInstPtr decodeSveIntCmpVec(ExtMachInst machInst);
@@ -202,11 +202,18 @@
if (b_13) {
return decodeSvePermUnpred(machInst);
} else {
- return decodeSvePermExtract(machInst);
+ uint8_t b_23 = bits(machInst, 23);
+ if (b_23) {
+ // 128-bit element encodings for Armv8.6 F64MM
+ return decodeSvePermIntlv(machInst, true);
+ } else {
+ return decodeSvePermExtract(machInst);
+ }
}
case 0x1:
if (b_13) {
- return decodeSvePermIntlv(machInst);
+ // 8,16,32,64-bit element encodings
+ return decodeSvePermIntlv(machInst, false);
} else {
return decodeSvePermPredicates(machInst);
}
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 74eacb8..cbaa2b5 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -633,6 +633,29 @@
}
// Decodes binary, constructive, unpredicated SVE instructions.
+ // Unsigned instructions only, including Quadword variants.
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeSveBinUnpredUQ(unsigned size, ExtMachInst machInst, RegIndex
dest,
+ RegIndex op1, RegIndex op2)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint8_t>(machInst, dest, op1, op2);
+ case 1:
+ return new Base<uint16_t>(machInst, dest, op1, op2);
+ case 2:
+ return new Base<uint32_t>(machInst, dest, op1, op2);
+ case 3:
+ return new Base<uint64_t>(machInst, dest, op1, op2);
+ case 4:
+ return new Base<__uint128_t>(machInst, dest, op1, op2);
+ default:
+ return new Unknown64(machInst);
+ }
+ }
+
+ // Decodes binary, constructive, unpredicated SVE instructions.
// Signed instructions only.
template <template <typename T> class Base>
StaticInstPtr
@@ -3299,6 +3322,8 @@
fpTypes = ('uint16_t', 'uint32_t', 'uint64_t')
signedTypes = ('int8_t', 'int16_t', 'int32_t', 'int64_t')
unsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t')
+ extendedUnsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t',
+ '__uint128_t')
smallSignedTypes = ('int8_t', 'int16_t', 'int32_t')
bigSignedTypes = ('int16_t', 'int32_t', 'int64_t')
@@ -4754,23 +4779,36 @@
trnPredIterCode % 1)
# TRN1, TRN2 (vectors)
trnIterCode = '''
+ // SVE F64MM support requires that there are at least two elements
+ // in the vector.
+ if (eCount < 2) {
+ return std::make_shared<UndefinedInstruction>(machInst, false,
+ "%(mnemonic)s");
+ }
int s;
- int part = %d;
+ int part = %(part)d;
ArmISA::VecRegContainer tmpVecC;
auto auxDest = tmpVecC.as<Element>();
- for (unsigned i = 0; i < eCount / 2; i++) {
+ const unsigned eltPairsCount = eCount / 2;
+ const unsigned eltsInPairsCount = eltPairsCount * 2;
+ for (unsigned i = 0; i < eltPairsCount; i++) {
s = 2 * i + part;
auxDest[2 * i] = AA64FpOp1_x[s];
auxDest[2 * i + 1] = AA64FpOp2_x[s];
}
- for (unsigned i = 0; i < eCount; i++) {
+ // Fill output vector with pairs of elements
+ for (unsigned i = 0; i < eltsInPairsCount; i++) {
AA64FpDest_x[i] = auxDest[i];
}
+ // Fill any trailing non-full pairs with zeros
+ for (unsigned i = eltsInPairsCount; i < eCount; i++) {
+ AA64FpDest_x[i] = 0;
+ }
'''
- sveBinInst('trn1', 'Trn1', 'SimdAluOp', unsignedTypes, '',
- customIterCode=trnIterCode % 0)
- sveBinInst('trn2', 'Trn2', 'SimdAluOp', unsignedTypes, '',
- customIterCode=trnIterCode % 1)
+ sveBinInst('trn1', 'Trn1', 'SimdAluOp', extendedUnsignedTypes, '',
+ customIterCode=trnIterCode % dict(mnemonic='trn1', part=0))
+ sveBinInst('trn2', 'Trn2', 'SimdAluOp', extendedUnsignedTypes, '',
+ customIterCode=trnIterCode % dict(mnemonic='trn2', part=1))
# UABD
sveBinInst('uabd', 'Uabd', 'SimdAddOp', unsignedTypes, abdCode,
PredType.MERGE, True)
@@ -4976,26 +5014,39 @@
uzpPredIterCode % 1)
# UZP1, UZP2 (vectors)
uzpIterCode = '''
+ // SVE F64MM support requires that there are at least two elements
+ // in the vector.
+ if (eCount < 2) {
+ return std::make_shared<UndefinedInstruction>(machInst, false,
+ "%(mnemonic)s");
+ }
int s;
- int part = %d;
+ int part = %(part)d;
ArmISA::VecRegContainer tmpVecC;
auto auxDest = tmpVecC.as<Element>();
- for (unsigned i = 0; i < eCount; i++) {
+ const unsigned eltPairsCount = eCount / 2;
+ const unsigned eltsInPairsCount = eltPairsCount * 2;
+ for (unsigned i = 0; i < eltsInPairsCount; i++) {
s = 2 * i + part;
- if (s < eCount) {
+ if (s < eltsInPairsCount) {
auxDest[i] = AA64FpOp1_x[s];
} else {
- auxDest[i] = AA64FpOp2_x[s - eCount];
+ auxDest[i] = AA64FpOp2_x[s - eltsInPairsCount];
}
}
- for (unsigned i = 0; i < eCount; i++) {
+ // Fill output vector with pairs of elements
+ for (unsigned i = 0; i < eltsInPairsCount; i++) {
AA64FpDest_x[i] = auxDest[i];
}
+ // Fill any trailing non-full pairs with zeros
+ for (unsigned i = eltsInPairsCount; i < eCount; i++) {
+ AA64FpDest_x[i] = 0;
+ }
'''
- sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', unsignedTypes, '',
- customIterCode=uzpIterCode % 0)
- sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', unsignedTypes, '',
- customIterCode=uzpIterCode % 1)
+ sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', extendedUnsignedTypes, '',
+ customIterCode=uzpIterCode % dict(mnemonic='uzp1', part=0))
+ sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', extendedUnsignedTypes, '',
+ customIterCode=uzpIterCode % dict(mnemonic='uzp2', part=1))
# WHILELE (32-bit)
whileLECode = '''
cond = srcElem1 <= srcElem2;
@@ -5058,22 +5109,35 @@
zipPredIterCode % 1)
# ZIP1, ZIP2 (vectors)
zipIterCode = '''
+ // SVE F64MM support requires that there are at least two elements
+ // in the vector.
+ if (eCount < 2) {
+ return std::make_shared<UndefinedInstruction>(machInst, false,
+ "%(mnemonic)s");
+ }
int s;
- int part = %d;
+ int part = %(part)d;
ArmISA::VecRegContainer tmpVecC;
auto auxDest = tmpVecC.as<Element>();
- for (unsigned i = 0; i < eCount / 2; i++) {
- s = i + (part * (eCount / 2));
+ const unsigned eltPairsCount = eCount / 2;
+ const unsigned eltsInPairsCount = eltPairsCount * 2;
+ for (unsigned i = 0; i < eltPairsCount; i++) {
+ s = i + (part * (eltsInPairsCount / 2));
auxDest[2 * i] = AA64FpOp1_x[s];
auxDest[2 * i + 1] = AA64FpOp2_x[s];
}
- for (unsigned i = 0; i < eCount; i++) {
+ // Fill output vector with pairs of elements
+ for (unsigned i = 0; i < eltsInPairsCount; i++) {
AA64FpDest_x[i] = auxDest[i];
}
+ // Fill any trailing non-full pairs with zeros
+ for (unsigned i = eltsInPairsCount; i < eCount; i++) {
+ AA64FpDest_x[i] = 0;
+ }
'''
- sveBinInst('zip1', 'Zip1', 'SimdAluOp', unsignedTypes, '',
- customIterCode=zipIterCode % 0)
- sveBinInst('zip2', 'Zip2', 'SimdAluOp', unsignedTypes, '',
- customIterCode=zipIterCode % 1)
+ sveBinInst('zip1', 'Zip1', 'SimdAluOp', extendedUnsignedTypes, '',
+ customIterCode=zipIterCode % dict(mnemonic='zip1', part=0))
+ sveBinInst('zip2', 'Zip2', 'SimdAluOp', extendedUnsignedTypes, '',
+ customIterCode=zipIterCode % dict(mnemonic='zip2', part=1))
}};
--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70728?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings?usp=email
Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I496576340c48410fedb2cf6fc7d1a02e219b3bd4
Gerrit-Change-Number: 70728
Gerrit-PatchSet: 8
Gerrit-Owner: Giacomo Travaglini <giacomo.travagl...@arm.com>
Gerrit-Reviewer: Andreas Sandberg <andreas.sandb...@arm.com>
Gerrit-Reviewer: Bobby Bruce <bbr...@ucdavis.edu>
Gerrit-Reviewer: Giacomo Travaglini <giacomo.travagl...@arm.com>
Gerrit-Reviewer: Jason Lowe-Power <power...@gmail.com>
Gerrit-Reviewer: Richard Cooper <richard.coo...@arm.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org