Giacomo Travaglini has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/70277?usp=email )
Change subject: arch-arm: Partial SVE2 Implementation
......................................................................
arch-arm: Partial SVE2 Implementation
Instructions added:
ADCLB/T, SBCLB/T, BGRP, RAX1, EOR3, BCAX,
XAR & TBX, PMUL, PMULLB/T, SMULLB/T and UMULLB/T
Change-Id: Ia135ba9300eae312b24342bcbda835fef6867113
---
M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/formats/sve_top_level.isa
M src/arch/arm/isa/insts/sve.isa
3 files changed, 482 insertions(+), 16 deletions(-)
diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa
b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 2ee3817..dae6fc6 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -483,6 +483,196 @@
} // decodeSveIntArithUnpred
StaticInstPtr
+ decodeSveIntMulUnpred(ExtMachInst machInst)
+ {
+ RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+ RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+ RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+ uint8_t opc = bits(machInst, 11, 10);
+ uint8_t size = bits(machInst, 23, 22);
+
+ switch (opc) {
+ case 0x1:
+ if (size == 0x0) {
+ return new SvePmul<uint8_t>(machInst, zd, zn, zm);
+ }
+ [[fallthrough]];
+ case 0x0:
+ // MUL (vectors, unpredicated)
+ case 0x2:
+ // SMULH (unpredicated)
+ case 0x3:
+ // UMULH (unpredicated)
+ default:
+ return new Unknown64(machInst);
+ }
+
+ } // decodeSveIntMulUnpred
+
+ StaticInstPtr
+ decodeSveIntTerUnpred(ExtMachInst machInst)
+ {
+ RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+ RegIndex zk = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+ RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+ uint8_t opc = bits(machInst, 23, 22) << 1 | bits(machInst, 10);
+
+ switch (opc) {
+ case 0x0:
+ return new SveEor3<uint64_t>(machInst, zdn, zm, zk);
+ case 0x2:
+ return new SveBcax<uint64_t>(machInst, zdn, zm, zk);
+ case 0x1:
+ // BSL
+ case 0x3:
+ // BSL1N
+ case 0x5:
+ // BSL2N
+ case 0x7:
+ // NBSL
+ default:
+ return new Unknown64(machInst);
+ }
+ } // decodeSveIntTerUnpred
+
+ StaticInstPtr
+ decodeSveIntMulLong(ExtMachInst machInst)
+ {
+ RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+ RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+ RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+ uint8_t opc_u_t = bits(machInst, 12, 10);
+ uint8_t size = bits(machInst, 23, 22);
+
+ switch (opc_u_t) {
+ case 0x2:
+ return decodeSveBinUnpredS2<SvePmullb>(
+ size, machInst, zd, zn, zm);
+ case 0x3:
+ return decodeSveBinUnpredS2<SvePmullt>(
+ size, machInst, zd, zn, zm);
+ case 0x4:
+ return decodeSveBinUnpred2<SveSmullb>(
+ size, 0, machInst, zd, zn, zm);
+ case 0x5:
+ return decodeSveBinUnpred2<SveSmullt>(
+ size, 0, machInst, zd, zn, zm);
+ case 0x6:
+ return decodeSveBinUnpred2<SveUmullb>(
+ size, 1, machInst, zd, zn, zm);
+ case 0x7:
+ return decodeSveBinUnpred2<SveUmullt>(
+ size, 1, machInst, zd, zn, zm);
+ case 0x0:
+ // SQDMULLB
+ case 0x1:
+ // SQDMULLT
+ default:
+ return new Unknown64(machInst);
+ }
+ } // decodeSveIntMulLong
+
+ StaticInstPtr
+ decodeSveBitPerm(ExtMachInst machInst)
+ {
+ RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+ RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+ RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+ uint8_t opc = bits(machInst, 11, 10);
+ uint8_t size = bits(machInst, 23, 22);
+
+ switch (opc) {
+ case 0x2:
+ return decodeSveBinUnpredU<SveBgrp>(
+ size, machInst, zd, zn, zm);
+ case 0x0:
+ // BEXT
+ case 0x1:
+ // BDEP
+ default:
+ return new Unknown64(machInst);
+ }
+ } // decodeSveBitPerm
+
+ StaticInstPtr
+ decodeSveIntLongCarry(ExtMachInst machInst)
+ {
+ RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+ RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+ RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+ uint8_t opc = (bits(machInst, 23) << 1) | bits(machInst, 10);
+ uint8_t size = bits(machInst, 22);
+
+ switch (opc) {
+ case 0x0:
+ return decodeSveTerUnpredU<SveAdclb>(
+ size, machInst, zda, zn, zm);
+ case 0x1:
+ return decodeSveTerUnpredU<SveAdclt>(
+ size, machInst, zda, zn, zm);
+ case 0x2:
+ return decodeSveTerUnpredU<SveSbclb>(
+ size, machInst, zda, zn, zm);
+ case 0x3:
+ return decodeSveTerUnpredU<SveSbclt>(
+ size, machInst, zda, zn, zm);
+ default:
+ return new Unknown64(machInst);
+ }
+ } // decodeSveIntLongCarry
+
+ StaticInstPtr
+ decodeSveIntRotImm(ExtMachInst machInst)
+ {
+ RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+ RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+ uint8_t imm3 = (RegIndex) (uint8_t) bits(machInst, 18, 16);
+
+ uint8_t tsize = (bits(machInst, 23, 22) << 2) | bits(machInst, 20,
19);
+ uint8_t esize = 0;
+ uint8_t size = 0;
+
+ if (tsize == 0x0) {
+ return new Unknown64(machInst);
+ } else if (tsize == 0x1) {
+ esize = 8;
+ } else if ((tsize & 0x0E) == 0x2) {
+ esize = 16;
+ size = 1;
+ } else if ((tsize & 0x0C) == 0x4) {
+ esize = 32;
+ size = 2;
+ } else if ((tsize & 0x08) == 0x8) {
+ esize = 64;
+ size = 3;
+ }
+
+ unsigned rot_am = 2 * esize - ((tsize << 3) | imm3);
+ return decodeSveBinImmDestrUnpredU<SveXar>(
+ size, machInst, zdn, zm, rot_am);
+ } // decodeSveIntRotImm
+
+ StaticInstPtr
+ decodeSveCryptBinConstr(ExtMachInst machInst)
+ {
+ RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+ RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+ RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+ uint8_t size = bits(machInst, 23, 22);
+ uint8_t opc = bits(machInst, 10);
+ uint8_t size_opc = (size << 1) | opc;
+
+ switch (size_opc) {
+ case 0x1:
+ return new SveRax1<uint64_t>(machInst, zd, zn, zm);
+ case 0x0:
+ // SM4EKEY
+ default:
+ return new Unknown64(machInst);
+ }
+ } // decodeSveCryptBinConstr
+
+ StaticInstPtr
decodeSveIntLogUnpred(ExtMachInst machInst)
{
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
@@ -1014,12 +1204,19 @@
decodeSvePermUnpred(ExtMachInst machInst)
{
uint8_t b12_10 = bits(machInst, 12, 10);
- if (b12_10 == 0x4) {
+ if ((b12_10 == 0x4) || (bits(machInst, 12, 11) == 0x1)) {
unsigned size = (unsigned) bits(machInst, 23, 22);
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
- return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn, zm);
+ if (b12_10 == 0x4) { // TBL, two sources
+ return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn,
zm);
+ } else if (bits(machInst, 10) == 0x1) { // TBX
+ return decodeSveBinUnpredU<SveTbx>(size, machInst, zd, zn,
zm);
+ // } else { // TBL, three sources
+ // TBL, three sources
+ }
+ return new Unknown64(machInst);
} else if (bits(machInst, 20, 16) == 0x0 && b12_10 == 0x6) {
uint8_t size = bits(machInst, 23, 22);
RegIndex rn = makeSP(
@@ -1362,7 +1559,6 @@
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex pg = (RegIndex) (uint8_t) bits(machInst, 13, 10);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
-
uint8_t size = bits(machInst, 23, 22);
return decodeSveBinConstrPredU<SveSel>(size,
diff --git a/src/arch/arm/isa/formats/sve_top_level.isa
b/src/arch/arm/isa/formats/sve_top_level.isa
index 155ec1c..04642b8 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -44,7 +44,9 @@
StaticInstPtr decodeSveShiftByImmPred(ExtMachInst machInst);
StaticInstPtr decodeSveIntArithUnaryPred(ExtMachInst machInst);
StaticInstPtr decodeSveIntMulAdd(ExtMachInst machInst);
+ StaticInstPtr decodeSveIntMulUnpred(ExtMachInst machInst);
StaticInstPtr decodeSveIntArithUnpred(ExtMachInst machInst);
+ StaticInstPtr decodeSveIntTerUnpred(ExtMachInst machInst);
StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst);
StaticInstPtr decodeSveIndexGen(ExtMachInst machInst);
StaticInstPtr decodeSveStackAlloc(ExtMachInst machInst);
@@ -69,6 +71,11 @@
StaticInstPtr decodeSvePsel(ExtMachInst machInst);
StaticInstPtr decodeSveIntWideImmUnpred(ExtMachInst machInst);
StaticInstPtr decodeSveClamp(ExtMachInst machInst);
+ StaticInstPtr decodeSveIntRotImm(ExtMachInst machInst);
+ StaticInstPtr decodeSveCryptBinConstr(ExtMachInst machInst);
+ StaticInstPtr decodeSveBitPerm(ExtMachInst machInst);
+ StaticInstPtr decodeSveIntLongCarry(ExtMachInst machInst);
+ StaticInstPtr decodeSveIntMulLong(ExtMachInst machInst);
StaticInstPtr decodeSveMultiplyAddUnpred(ExtMachInst machInst);
StaticInstPtr decodeSveMultiplyIndexed(ExtMachInst machInst);
@@ -141,7 +148,15 @@
switch (b_15_14) {
case 0x0:
if (b_13) {
- return decodeSveIntLogUnpred(machInst);
+ if (bits(machInst, 11)) {
+ return decodeSveIntTerUnpred(machInst);
+ } else {
+ if (bits(machInst, 10)) {
+ return decodeSveIntRotImm(machInst);
+ } else {
+ return decodeSveIntLogUnpred(machInst);
+ }
+ }
} else {
if (bits(machInst, 30)) {
return decodeSveMultiplyIndexed(machInst);
@@ -151,7 +166,7 @@
}
case 0x1:
if (b_13) {
- return new Unknown64(machInst);
+ return decodeSveIntMulUnpred(machInst);
} else if (b_12) {
return decodeSveStackAlloc(machInst);
} else {
@@ -173,10 +188,23 @@
break;
}
case 0x2:
- if (bits(machInst, 20)) {
- return decodeSveIntWideImmPred(machInst);
+ if (bits(machInst, 30)) {
+ uint8_t b_15_14_13 = bits(machInst, 15, 13);
+ switch (b_15_14_13) {
+ case 0x3:
+ return decodeSveIntMulLong(machInst);
+ case 0x5:
+ return decodeSveBitPerm(machInst);
+ case 0x6:
+ return decodeSveIntLongCarry(machInst);
+ }
+ break;
} else {
- return decodeSveLogMaskImm(machInst);
+ if (bits(machInst, 20)) {
+ return decodeSveIntWideImmPred(machInst);
+ } else {
+ return decodeSveLogMaskImm(machInst);
+ }
}
case 0x3:
{
@@ -198,7 +226,11 @@
case 0x2:
return decodeSvePermPred(machInst);
case 0x3:
- return decodeSveSelVec(machInst);
+ if (bits(machInst, 30)) {
+ return decodeSveCryptBinConstr(machInst);
+ } else {
+ return decodeSveSelVec(machInst);
+ }
}
break;
}
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 97d4ec7..91ecb47 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -325,6 +325,28 @@
}
}
+
+ // Decodes binary with immediate operand, destructive, unpredicated
+ // SVE instructions, handling unsigned variants only.
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeSveBinImmDestrUnpredU(unsigned size, ExtMachInst machInst,
+ RegIndex dest, RegIndex op1, unsigned immediate)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint8_t>(machInst, dest, op1, immediate);
+ case 1:
+ return new Base<uint16_t>(machInst, dest, op1, immediate);
+ case 2:
+ return new Base<uint32_t>(machInst, dest, op1, immediate);
+ case 3:
+ return new Base<uint64_t>(machInst, dest, op1, immediate);
+ default:
+ return new Unknown64(machInst);
+ }
+ }
+
// Decodes binary with immediate operand, destructive, predicated
(merging)
// SVE instructions, handling unsigned variants only.
template <template <typename T> class Base>
@@ -612,6 +634,37 @@
}
// Decodes binary, constructive, unpredicated SVE instructions.
+ // limited variants
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeSveBinUnpred2(unsigned size, unsigned u, ExtMachInst machInst,
+ RegIndex dest, RegIndex op1, RegIndex op2)
+ {
+ switch (size) {
+ case 1:
+ if (u) {
+ return new Base<uint8_t>(machInst, dest, op1, op2);
+ } else {
+ return new Base<int8_t>(machInst, dest, op1, op2);
+ }
+ case 2:
+ if (u) {
+ return new Base<uint16_t>(machInst, dest, op1, op2);
+ } else {
+ return new Base<int16_t>(machInst, dest, op1, op2);
+ }
+ case 3:
+ if (u) {
+ return new Base<uint32_t>(machInst, dest, op1, op2);
+ } else {
+ return new Base<int32_t>(machInst, dest, op1, op2);
+ }
+ default:
+ return new Unknown64(machInst);
+ }
+ }
+
+ // Decodes binary, constructive, unpredicated SVE instructions.
// Unsigned instructions only.
template <template <typename T> class Base>
StaticInstPtr
@@ -653,6 +706,25 @@
}
}
+ // Decodes binary, constructive, unpredicated SVE instructions.
+ // unsigned instructions only, limited variants.
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeSveBinUnpredS2(unsigned size, ExtMachInst machInst, RegIndex
dest,
+ RegIndex op1, RegIndex op2)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint64_t>(machInst, dest, op1, op2);
+ case 1:
+ return new Base<uint8_t>(machInst, dest, op1, op2);
+ case 3:
+ return new Base<uint32_t>(machInst, dest, op1, op2);
+ default:
+ return new Unknown64(machInst);
+ }
+ }
+
// Decodes binary, costructive, unpredicated SVE instructions, handling
// floating-point variants only.
template <template <typename T> class Base>
@@ -926,6 +998,24 @@
}
}
+ // Decodes ternary, destructive, unpredicated SVE instructions,
+ // handling unsigned words & double words only.
+ template <template <typename T> class Base>
+ StaticInstPtr
+ decodeSveTerUnpredU(unsigned size, ExtMachInst machInst,
+ RegIndex dest, RegIndex op1, RegIndex op2)
+ {
+ switch (size) {
+ case 0:
+ return new Base<uint32_t>(machInst, dest, op1, op2);
+ case 1:
+ return new Base<uint64_t>(machInst, dest, op1, op2);
+ default:
+ return new Unknown64(machInst);
+ }
+ }
+
+
// Decodes ternary with immediate operand, destructive, unpredicated
SVE
// instructions handling floating-point variants only.
template <template <typename T> class Base>
@@ -1898,8 +1988,7 @@
def sveBinInst(name, Name, opClass, types, op, predType=PredType.NONE,
isDestructive=False, customIterCode=None,
decoder='Generic'):
- assert not (predType in (PredType.NONE, PredType.SELECT) and
- isDestructive)
+ assert not ((predType == PredType.SELECT) and isDestructive)
global header_output, exec_output, decoders
code = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
@@ -1914,7 +2003,12 @@
code += '''
const Element& srcElem1 = AA64FpOp1_x[i];'''
code += '''
- const Element& srcElem2 = AA64FpOp2_x[i];
+ const Element& srcElem2 = AA64FpOp2_x[i];'''
+ if (predType == PredType.NONE) and isDestructive:
+ code += '''
+ Element destElem = AA64FpDestMerge_x[i];'''
+ else:
+ code += '''
Element destElem = 0;'''
if predType != PredType.NONE:
code += '''
@@ -2592,8 +2686,8 @@
'class_name' : 'Sve' + Name}
exec_output += SveOpExecDeclare.subst(substDict)
- # Generate definitions for SVE TBL instructions
- def sveTblInst(name, Name, opClass, decoder = 'Generic'):
+ # Generate definitions for SVE table lookup instructions with 2 sources
+ def sveTblInst(name, Name, opClass, decoder = 'Generic', merging =
False):
global header_output, exec_output, decoders
code = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
@@ -2604,10 +2698,10 @@
if (idx < eCount) {
val = AA64FpOp1_x[idx];
} else {
- val = 0;
+ val = %(dest_elem)s;;
}
AA64FpDest_x[i] = val;
- }'''
+ }''' % {'dest_elem': 'AA64FpDestMerge_x[i]' if merging else '0'}
iop = ArmInstObjParams(name, 'Sve' + Name, 'SveTblOp',
{'code': code, 'op_class': opClass}, [])
header_output += SveBinUnpredOpDeclare.subst(iop)
@@ -2617,6 +2711,63 @@
'class_name' : 'Sve' + Name}
exec_output += SveOpExecDeclare.subst(substDict)
+ # Generate definitions for integer add/subtract long with carry
+ def sveLongCarryInst(name, Name, opClass, decoder = 'Generic',
+ uptTop = False, subtract = False):
+ global header_output, exec_output, decoders
+ code = sveEnabledCheckCode + '''
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+ for (int i = 0; i < eCount/2; ++i) {
+ const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s];
+ const Element& srcElem2 = AA64FpOp2_x[2*i+1];
+ const Element& srcElem3 = AA64FpDestMerge_x[2*i];
+ __uint128_t unsigned_sum = (__uint128_t)srcElem3 +
+ (%(op)ssrcElem1) +
+ (srcElem2 & 0x1);
+ AA64FpDest_x[2*i] = (Element)unsigned_sum;
+ AA64FpDest_x[2*i+1] = (Element)unsigned_sum !=
+ (__uint128_t)unsigned_sum;
+ }
+ ''' % {'offset': 1 if uptTop else 0,
+ 'op': '~' if subtract else '',
+ }
+ iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp',
+ {'code': code, 'op_class': opClass}, [])
+ header_output += SveBinUnpredOpDeclare.subst(iop)
+ exec_output += SveOpExecute.subst(iop)
+ for type in ('uint32_t', 'uint64_t'):
+ substDict = {'targs' : type,
+ 'class_name' : 'Sve' + Name}
+ exec_output += SveOpExecDeclare.subst(substDict)
+
+ # Generate definitions for long integer/poly multiplication instruction
+ def sveLongMulInst(name, Name, opClass, types, op, decoder = 'Generic',
+ uptTop = False):
+ global header_output, exec_output, decoders
+ code = sveEnabledCheckCode + '''
+ unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+ xc->tcBase());
+ for (int i = 0; i < eCount/2; ++i) {
+ const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s];
+ const Element& srcElem2 = AA64FpOp2_x[2*i+%(offset)s];
+ %(op)s
+ AA64FpDest_x[2*i] = (Element)destElem;
+ AA64FpDest_x[2*i+1] = (Element)(destElem >>
+ (sizeof(Element) << 3));
+ }
+ ''' % {'offset': 1 if uptTop else 0,
+ 'op': op,
+ }
+ iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp',
+ {'code': code, 'op_class': opClass}, [])
+ header_output += SveBinUnpredOpDeclare.subst(iop)
+ exec_output += SveOpExecute.subst(iop)
+ for type in types:
+ substDict = {'targs' : type,
+ 'class_name' : 'Sve' + Name}
+ exec_output += SveOpExecDeclare.subst(substDict)
+
# Generate definitions for SVE Unpack instructions
def sveUnpackInst(name, Name, opClass, sdtypes, unpackHalf,
regType, decoder = 'Generic'):
@@ -3167,6 +3318,10 @@
absCode = 'destElem = (Element) std::abs(srcElem1);'
sveUnaryInst('abs', 'Abs', 'SimdAluOp', signedTypes, absCode,
PredType.MERGE)
+ # ADCLB
+ sveLongCarryInst('adclb', 'Adclb', 'SimdAluOp')
+ # ADCLT
+ sveLongCarryInst('adclt', 'Adclt', 'SimdAluOp', uptTop = True)
# ADD (immediate)
sveWideImmInst('add', 'AddImm', 'SimdAddOp', unsignedTypes, addCode,
False)
# ADD (vectors, predicated)
@@ -3272,6 +3427,29 @@
'''
sveBinInst('asrr', 'Asrr', 'SimdAluOp', unsignedTypes, asrrCode,
PredType.MERGE, True)
+ # BCAX
+ bcaxCode = 'destElem ^= srcElem1 & (~srcElem2);'
+ sveBinInst('bcax', 'Bcax', 'SimdAluOp', ('uint64_t',), bcaxCode,
+ isDestructive=True)
+ # BGRP
+ bgrpCode = '''
+ int k = 0;
+ int len = sizeof(Element) * 8;
+ for(int j = 0; j < len; j++) {
+ if(((srcElem2>>j) & (Element)0x1) == ((Element)0x1)){
+ destElem |= (((srcElem1>>j) & (Element)0x1) << k);
+ k++;
+ }
+ }
+ k = len-1;
+ for(int j = len-1; j >= 0; j--) {
+ if(((srcElem2>>j) & ((Element)0x1)) == ((Element)0x0)){
+ destElem |= (((srcElem1>>j) & (Element)0x1) << k);
+ k--;
+ }
+ }
+ '''
+ sveBinInst('bgrp', 'Bgrp', 'SimdAluOp', unsignedTypes, bgrpCode)
# BIC (vectors, predicated)
bicCode = 'destElem = srcElem1 & ~srcElem2;'
sveBinInst('bic', 'BicPred', 'SimdAluOp', unsignedTypes, bicCode,
@@ -3555,6 +3733,10 @@
eorCode)
svePredLogicalInst('eors', 'PredEors', 'SimdPredAluOp', ('uint8_t',),
eorCode, isFlagSetting=True)
+ # EOR3
+ eorCode = 'destElem ^= srcElem1 ^ srcElem2;'
+ sveBinInst('eor', 'Eor3', 'SimdAluOp', ('uint64_t',), eorCode,
+ isDestructive=True)
# EORV
eorvCode = 'destElem ^= srcElem1;'
sveAssocReducInst('eorv', 'Eorv', 'SimdReduceAluOp', unsignedTypes,
@@ -4122,6 +4304,30 @@
pfalseCode)
# PFIRST
svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp')
+ # PMUL
+ exec_output += '''
+ __uint128_t poly_mul(uint64_t srcElem1, uint64_t srcElem2)
+ {
+ __uint128_t destElem = 0;
+ __uint128_t extendedElem2 = srcElem2;
+ int i;
+ for (i=0; i < 64; i++) {
+ if (((srcElem1 >> i) & 0x1) == 0x1) {
+ destElem ^= (extendedElem2 << i);
+ }
+ }
+ return destElem;
+ }'''
+ pmulCode = 'destElem = (uint8_t)poly_mul(srcElem1, srcElem2);'
+ sveBinInst('pmul', 'Pmul', 'SimdAluOp', ('uint8_t',), pmulCode)
+ # PMULLB
+ pmullCode = '__uint128_t destElem = poly_mul(srcElem1, srcElem2);'
+ sveLongMulInst('pmullb', 'Pmullb', 'SimdAluOp',
+ ('uint8_t','uint32_t','uint64_t',), pmullCode)
+ # PMULLT
+ sveLongMulInst('pmullt', 'Pmullt', 'SimdAluOp',
+ ('uint8_t','uint32_t','uint64_t',),
+ pmullCode, uptTop = True)
# PNEXT
svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes)
# PSEL
@@ -4138,6 +4344,9 @@
# PUNPKLO
sveUnpackInst('punpklo', 'Punpklo', 'SimdPredAluOp',
unsignedWideSDTypes,
unpackHalf = Unpack.Low, regType = SrcRegType.Predicate)
+ # RAX1
+ rax1Code = 'destElem = srcElem1 ^ ((srcElem2 << 1) | (srcElem2 >>
63));'
+ sveBinInst('rax', 'Rax1', 'SimdAluOp', ('uint64_t',), rax1Code)
# RBIT
rbitCode = '''
destElem = reverseBits(srcElem1);'''
@@ -4214,6 +4423,11 @@
'''
sveBinInst('sabd', 'Sabd', 'SimdAddOp', signedTypes, abdCode,
PredType.MERGE, True)
+ # SBCLB
+ sveLongCarryInst('sbclb', 'Sbclb', 'SimdAluOp', subtract = True)
+ # SBCLT
+ sveLongCarryInst('sbclt', 'Sbclt', 'SimdAluOp', uptTop = True,
+ subtract = True)
# SADDV
addvCode = 'destElem += srcElem1;'
sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp',
@@ -4372,6 +4586,13 @@
destElem = do_mulh(srcElem1, srcElem2);'''
sveBinInst('smulh', 'Smulh', 'SimdMultOp', signedTypes, mulhCode,
PredType.MERGE, True)
+ # SMULLB
+ smullCode = 'int64_t destElem = (int64_t)srcElem1 * (int64_t)srcElem2;'
+ sveLongMulInst('smullb', 'Smullb', 'SimdAluOp',
+ ('int8_t','int16_t','int32_t',), smullCode)
+ # SMULLT
+ sveLongMulInst('smullt', 'Smullt', 'SimdAluOp',
+ ('int8_t','int16_t','int32_t',), smullCode, uptTop =
True)
# SPLICE
sveSpliceInst('splice', 'Splice', 'SimdAluOp', unsignedTypes)
# SQADD (immediate)
@@ -4557,6 +4778,8 @@
sxtCode, PredType.MERGE)
# TBL
sveTblInst('tbl', 'Tbl', 'SimdAluOp')
+ # TBX
+ sveTblInst('tbx', 'Tbx', 'SimdAluOp', merging=True)
# TRN1, TRN2 (predicates)
trnPredIterCode = '''
constexpr unsigned sz = sizeof(Element);
@@ -4654,6 +4877,14 @@
# UMULH
sveBinInst('umulh', 'Umulh', 'SimdMultOp', unsignedTypes, mulhCode,
PredType.MERGE, True)
+ # UMULLB
+ umullCode = 'uint64_t destElem = (uint64_t)srcElem1 *
(uint64_t)srcElem2;'
+ sveLongMulInst('umullb', 'Umullb', 'SimdAluOp',
+ ('uint8_t','uint16_t','uint32_t',), umullCode)
+ # UMULLT
+ sveLongMulInst('umullt', 'Umullt', 'SimdAluOp',
+ ('uint8_t','uint16_t','uint32_t',), umullCode,
+ uptTop = True)
# UQADD (immediate)
uqaddCode = '''
destElem = srcElem1 + srcElem2;
@@ -4861,6 +5092,13 @@
Ffr_ub[i] = POp1_ub[i];
}'''
svePredWriteFfrInst('wrffr', 'Wrffr', 'SimdPredAluOp', wrffrCode,
False)
+ # XAR
+ xarCode = '''
+ destElem = AA64FpDestMerge_x[i] ^ srcElem1;
+ destElem = ((destElem >> srcElem2) |
+ (destElem << (sizeof(Element) * 8 - srcElem2)));
+ '''
+ sveBinImmInst('xar', 'Xar', 'SimdAluOp', unsignedTypes, xarCode)
# ZIP1, ZIP2 (predicates)
zipPredIterCode = '''
constexpr unsigned sz = sizeof(Element);
--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70277?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-MessageType: newchange
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ia135ba9300eae312b24342bcbda835fef6867113
Gerrit-Change-Number: 70277
Gerrit-PatchSet: 1
Gerrit-Owner: Giacomo Travaglini <giacomo.travagl...@arm.com>
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org