[gem5-dev] [L] Change in gem5/gem5[develop]: arch-arm: Partial SVE2 Implementation

Giacomo Travaglini (Gerrit) via gem5-dev Thu, 04 May 2023 02:03:35 -0700

Giacomo Travaglini has uploaded this change for review. (https://gem5-review.googlesource.com/c/public/gem5/+/70277?usp=email )


Change subject: arch-arm: Partial SVE2 Implementation
......................................................................

arch-arm: Partial SVE2 Implementation

Instructions added:

ADCLB/T, SBCLB/T, BGRP, RAX1, EOR3, BCAX,
XAR & TBX, PMUL, PMULLB/T, SMULLB/T and UMULLB/T

Change-Id: Ia135ba9300eae312b24342bcbda835fef6867113
---
M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/formats/sve_top_level.isa
M src/arch/arm/isa/insts/sve.isa
3 files changed, 482 insertions(+), 16 deletions(-)

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isab/src/arch/arm/isa/formats/sve_2nd_level.isa

index 2ee3817..dae6fc6 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -483,6 +483,196 @@
     }  // decodeSveIntArithUnpred

     StaticInstPtr
+    decodeSveIntMulUnpred(ExtMachInst machInst)
+    {
+        RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+        RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+        uint8_t opc = bits(machInst, 11, 10);
+        uint8_t size = bits(machInst, 23, 22);
+
+        switch (opc) {
+            case 0x1:
+              if (size == 0x0) {
+                  return new SvePmul<uint8_t>(machInst, zd, zn, zm);
+              }
+              [[fallthrough]];
+            case 0x0:
+              // MUL (vectors, unpredicated)
+            case 0x2:
+              // SMULH (unpredicated)
+            case 0x3:
+              // UMULH (unpredicated)
+            default:
+              return new Unknown64(machInst);
+        }
+
+    }  // decodeSveIntMulUnpred
+
+    StaticInstPtr
+    decodeSveIntTerUnpred(ExtMachInst machInst)
+    {
+        RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+        RegIndex zk = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+        uint8_t opc = bits(machInst, 23, 22) << 1 | bits(machInst, 10);
+
+        switch (opc) {
+          case 0x0:
+            return new SveEor3<uint64_t>(machInst, zdn, zm, zk);
+          case 0x2:
+            return new SveBcax<uint64_t>(machInst, zdn, zm, zk);
+          case 0x1:
+            // BSL
+          case 0x3:
+            // BSL1N
+          case 0x5:
+            // BSL2N
+          case 0x7:
+            // NBSL
+          default:
+            return new Unknown64(machInst);
+        }
+    }  // decodeSveIntTerUnpred
+
+    StaticInstPtr
+    decodeSveIntMulLong(ExtMachInst machInst)
+    {
+        RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+        RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+        uint8_t opc_u_t = bits(machInst, 12, 10);
+        uint8_t size = bits(machInst, 23, 22);
+
+        switch (opc_u_t) {
+            case 0x2:
+              return decodeSveBinUnpredS2<SvePmullb>(
+                      size, machInst, zd, zn, zm);
+            case 0x3:
+              return decodeSveBinUnpredS2<SvePmullt>(
+                      size, machInst, zd, zn, zm);
+            case 0x4:
+              return decodeSveBinUnpred2<SveSmullb>(
+                      size, 0, machInst, zd, zn, zm);
+            case 0x5:
+              return decodeSveBinUnpred2<SveSmullt>(
+                      size, 0, machInst, zd, zn, zm);
+            case 0x6:
+              return decodeSveBinUnpred2<SveUmullb>(
+                      size, 1, machInst, zd, zn, zm);
+            case 0x7:
+              return decodeSveBinUnpred2<SveUmullt>(
+                      size, 1, machInst, zd, zn, zm);
+            case 0x0:
+              // SQDMULLB
+            case 0x1:
+              // SQDMULLT
+            default:
+              return new Unknown64(machInst);
+        }
+    }  // decodeSveIntMulLong
+
+    StaticInstPtr
+    decodeSveBitPerm(ExtMachInst machInst)
+    {
+        RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+        RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+        uint8_t opc = bits(machInst, 11, 10);
+        uint8_t size = bits(machInst, 23, 22);
+
+        switch (opc) {
+          case 0x2:
+            return decodeSveBinUnpredU<SveBgrp>(
+                    size, machInst, zd, zn, zm);
+          case 0x0:
+            // BEXT
+          case 0x1:
+            // BDEP
+          default:
+            return new Unknown64(machInst);
+        }
+    }  // decodeSveBitPerm
+
+    StaticInstPtr
+    decodeSveIntLongCarry(ExtMachInst machInst)
+    {
+        RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+        RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+        uint8_t opc = (bits(machInst, 23) << 1) | bits(machInst, 10);
+        uint8_t size = bits(machInst, 22);
+
+        switch (opc) {
+            case 0x0:
+              return decodeSveTerUnpredU<SveAdclb>(
+                        size, machInst, zda, zn, zm);
+            case 0x1:
+              return decodeSveTerUnpredU<SveAdclt>(
+                        size, machInst, zda, zn, zm);
+            case 0x2:
+              return decodeSveTerUnpredU<SveSbclb>(
+                        size, machInst, zda, zn, zm);
+            case 0x3:
+              return decodeSveTerUnpredU<SveSbclt>(
+                        size, machInst, zda, zn, zm);
+            default:
+              return new Unknown64(machInst);
+        }
+    }  // decodeSveIntLongCarry
+
+    StaticInstPtr
+    decodeSveIntRotImm(ExtMachInst machInst)
+    {
+        RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        uint8_t imm3 = (RegIndex) (uint8_t) bits(machInst, 18, 16);
+

+ uint8_t tsize = (bits(machInst, 23, 22) << 2) | bits(machInst, 20,19);

+        uint8_t esize = 0;
+        uint8_t size = 0;
+
+        if (tsize == 0x0) {
+            return new Unknown64(machInst);
+        } else if (tsize == 0x1) {
+            esize = 8;
+        } else if ((tsize & 0x0E) == 0x2) {
+            esize = 16;
+            size = 1;
+        } else if ((tsize & 0x0C) == 0x4) {
+            esize = 32;
+            size = 2;
+        } else if ((tsize & 0x08) == 0x8) {
+            esize = 64;
+            size = 3;
+        }
+
+        unsigned rot_am = 2 * esize - ((tsize << 3) | imm3);
+        return decodeSveBinImmDestrUnpredU<SveXar>(
+                size, machInst, zdn, zm, rot_am);
+    }  // decodeSveIntRotImm
+
+    StaticInstPtr
+    decodeSveCryptBinConstr(ExtMachInst machInst)
+    {
+        RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+        RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t opc = bits(machInst, 10);
+        uint8_t size_opc = (size << 1) | opc;
+
+        switch (size_opc) {
+          case 0x1:
+            return new SveRax1<uint64_t>(machInst, zd, zn, zm);
+          case 0x0:
+            // SM4EKEY
+          default:
+            return new Unknown64(machInst);
+        }
+    }  // decodeSveCryptBinConstr
+
+    StaticInstPtr
     decodeSveIntLogUnpred(ExtMachInst machInst)
     {
         RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
@@ -1014,12 +1204,19 @@
     decodeSvePermUnpred(ExtMachInst machInst)
     {
         uint8_t b12_10 = bits(machInst, 12, 10);
-        if (b12_10 == 0x4) {
+        if ((b12_10 == 0x4) || (bits(machInst, 12, 11) == 0x1)) {
             unsigned size = (unsigned) bits(machInst, 23, 22);
             RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
             RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
             RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
-            return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn, zm);
+            if (b12_10 == 0x4) { // TBL, two sources

+ return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn,zm);

+            } else if (bits(machInst, 10) == 0x1) { // TBX

+ return decodeSveBinUnpredU<SveTbx>(size, machInst, zd, zn,zm);

+            // } else { // TBL, three sources
+                // TBL, three sources
+            }
+            return new Unknown64(machInst);
         } else if (bits(machInst, 20, 16) == 0x0 && b12_10 == 0x6) {
             uint8_t size = bits(machInst, 23, 22);
             RegIndex rn = makeSP(
@@ -1362,7 +1559,6 @@
         RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
         RegIndex pg = (RegIndex) (uint8_t) bits(machInst, 13, 10);
         RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
-
         uint8_t size = bits(machInst, 23, 22);

         return decodeSveBinConstrPredU<SveSel>(size,

diff --git a/src/arch/arm/isa/formats/sve_top_level.isab/src/arch/arm/isa/formats/sve_top_level.isa

index 155ec1c..04642b8 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -44,7 +44,9 @@
     StaticInstPtr decodeSveShiftByImmPred(ExtMachInst machInst);
     StaticInstPtr decodeSveIntArithUnaryPred(ExtMachInst machInst);
     StaticInstPtr decodeSveIntMulAdd(ExtMachInst machInst);
+    StaticInstPtr decodeSveIntMulUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSveIntArithUnpred(ExtMachInst machInst);
+    StaticInstPtr decodeSveIntTerUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSveIndexGen(ExtMachInst machInst);
     StaticInstPtr decodeSveStackAlloc(ExtMachInst machInst);
@@ -69,6 +71,11 @@
     StaticInstPtr decodeSvePsel(ExtMachInst machInst);
     StaticInstPtr decodeSveIntWideImmUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSveClamp(ExtMachInst machInst);
+    StaticInstPtr decodeSveIntRotImm(ExtMachInst machInst);
+    StaticInstPtr decodeSveCryptBinConstr(ExtMachInst machInst);
+    StaticInstPtr decodeSveBitPerm(ExtMachInst machInst);
+    StaticInstPtr decodeSveIntLongCarry(ExtMachInst machInst);
+    StaticInstPtr decodeSveIntMulLong(ExtMachInst machInst);

     StaticInstPtr decodeSveMultiplyAddUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSveMultiplyIndexed(ExtMachInst machInst);
@@ -141,7 +148,15 @@
                 switch (b_15_14) {
                   case 0x0:
                     if (b_13) {
-                        return decodeSveIntLogUnpred(machInst);
+                        if (bits(machInst, 11)) {
+                            return decodeSveIntTerUnpred(machInst);
+                        } else {
+                            if (bits(machInst, 10)) {
+                                return decodeSveIntRotImm(machInst);
+                            } else {
+                                return decodeSveIntLogUnpred(machInst);
+                            }
+                        }
                     } else {
                         if (bits(machInst, 30)) {
                             return decodeSveMultiplyIndexed(machInst);
@@ -151,7 +166,7 @@
                     }
                   case 0x1:
                     if (b_13) {
-                        return new Unknown64(machInst);
+                        return decodeSveIntMulUnpred(machInst);
                     } else if (b_12) {
                         return decodeSveStackAlloc(machInst);
                     } else {
@@ -173,10 +188,23 @@
                 break;
             }
           case 0x2:
-            if (bits(machInst, 20)) {
-                return decodeSveIntWideImmPred(machInst);
+            if (bits(machInst, 30)) {
+                uint8_t b_15_14_13 = bits(machInst, 15, 13);
+                switch (b_15_14_13) {
+                  case 0x3:
+                    return decodeSveIntMulLong(machInst);
+                  case 0x5:
+                    return decodeSveBitPerm(machInst);
+                  case 0x6:
+                    return decodeSveIntLongCarry(machInst);
+                }
+                break;
             } else {
-                return decodeSveLogMaskImm(machInst);
+                if (bits(machInst, 20)) {
+                    return decodeSveIntWideImmPred(machInst);
+                } else {
+                    return decodeSveLogMaskImm(machInst);
+                }
             }
           case 0x3:
             {
@@ -198,7 +226,11 @@
                   case 0x2:
                     return decodeSvePermPred(machInst);
                   case 0x3:
-                    return decodeSveSelVec(machInst);
+                    if (bits(machInst, 30)) {
+                        return decodeSveCryptBinConstr(machInst);
+                    } else {
+                        return decodeSveSelVec(machInst);
+                    }
                 }
                 break;
             }
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 97d4ec7..91ecb47 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -325,6 +325,28 @@
         }
     }

+
+    // Decodes binary with immediate operand, destructive, unpredicated
+    // SVE instructions, handling unsigned variants only.
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeSveBinImmDestrUnpredU(unsigned size, ExtMachInst machInst,
+            RegIndex dest, RegIndex op1, unsigned immediate)
+    {
+        switch (size) {
+          case 0:
+            return new Base<uint8_t>(machInst, dest, op1, immediate);
+          case 1:
+            return new Base<uint16_t>(machInst, dest, op1, immediate);
+          case 2:
+            return new Base<uint32_t>(machInst, dest, op1, immediate);
+          case 3:
+            return new Base<uint64_t>(machInst, dest, op1, immediate);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+

// Decodes binary with immediate operand, destructive, predicated(merging)

     // SVE instructions, handling unsigned variants only.
     template <template <typename T> class Base>
@@ -612,6 +634,37 @@
     }

     // Decodes binary, constructive, unpredicated SVE instructions.
+    // limited variants
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeSveBinUnpred2(unsigned size, unsigned u, ExtMachInst machInst,
+                       RegIndex dest, RegIndex op1, RegIndex op2)
+    {
+        switch (size) {
+          case 1:
+            if (u) {
+                return new Base<uint8_t>(machInst, dest, op1, op2);
+            } else {
+                return new Base<int8_t>(machInst, dest, op1, op2);
+            }
+          case 2:
+            if (u) {
+                return new Base<uint16_t>(machInst, dest, op1, op2);
+            } else {
+                return new Base<int16_t>(machInst, dest, op1, op2);
+            }
+          case 3:
+            if (u) {
+                return new Base<uint32_t>(machInst, dest, op1, op2);
+            } else {
+                return new Base<int32_t>(machInst, dest, op1, op2);
+            }
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    // Decodes binary, constructive, unpredicated SVE instructions.
     // Unsigned instructions only.
     template <template <typename T> class Base>
     StaticInstPtr
@@ -653,6 +706,25 @@
         }
     }

+    // Decodes binary, constructive, unpredicated SVE instructions.
+    // unsigned instructions only, limited variants.
+    template <template <typename T> class Base>
+    StaticInstPtr

+ decodeSveBinUnpredS2(unsigned size, ExtMachInst machInst, RegIndexdest,

+            RegIndex op1, RegIndex op2)
+    {
+        switch (size) {
+          case 0:
+            return new Base<uint64_t>(machInst, dest, op1, op2);
+          case 1:
+            return new Base<uint8_t>(machInst, dest, op1, op2);
+          case 3:
+            return new Base<uint32_t>(machInst, dest, op1, op2);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
     // Decodes binary, costructive, unpredicated SVE instructions, handling
     // floating-point variants only.
     template <template <typename T> class Base>
@@ -926,6 +998,24 @@
         }
     }

+    // Decodes ternary, destructive, unpredicated SVE instructions,
+    // handling unsigned words & double words only.
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeSveTerUnpredU(unsigned size, ExtMachInst machInst,
+                        RegIndex dest, RegIndex op1, RegIndex op2)
+    {
+        switch (size) {
+          case 0:
+            return new Base<uint32_t>(machInst, dest, op1, op2);
+          case 1:
+            return new Base<uint64_t>(machInst, dest, op1, op2);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+

// Decodes ternary with immediate operand, destructive, unpredicatedSVE

     // instructions handling floating-point variants only.
     template <template <typename T> class Base>
@@ -1898,8 +1988,7 @@
     def sveBinInst(name, Name, opClass, types, op, predType=PredType.NONE,
                    isDestructive=False, customIterCode=None,
                    decoder='Generic'):
-        assert not (predType in (PredType.NONE, PredType.SELECT) and
-                    isDestructive)
+        assert not ((predType == PredType.SELECT) and isDestructive)
         global header_output, exec_output, decoders
         code = sveEnabledCheckCode + '''
         unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
@@ -1914,7 +2003,12 @@
                 code += '''
                 const Element& srcElem1 = AA64FpOp1_x[i];'''
             code += '''
-                const Element& srcElem2 = AA64FpOp2_x[i];
+                const Element& srcElem2 = AA64FpOp2_x[i];'''
+            if (predType == PredType.NONE) and isDestructive:
+                code += '''
+                Element destElem = AA64FpDestMerge_x[i];'''
+            else:
+                code += '''
                 Element destElem = 0;'''
             if predType != PredType.NONE:
                 code += '''
@@ -2592,8 +2686,8 @@
                      'class_name' : 'Sve' + Name}
         exec_output += SveOpExecDeclare.subst(substDict)

-    # Generate definitions for SVE TBL instructions
-    def sveTblInst(name, Name, opClass, decoder = 'Generic'):
+    # Generate definitions for SVE table lookup instructions with 2 sources

+ def sveTblInst(name, Name, opClass, decoder = 'Generic', merging =False):

         global header_output, exec_output, decoders
         code = sveEnabledCheckCode + '''
         unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
@@ -2604,10 +2698,10 @@
             if (idx < eCount) {
                 val = AA64FpOp1_x[idx];
             } else {
-                val = 0;
+                val = %(dest_elem)s;;
             }
             AA64FpDest_x[i] = val;
-        }'''
+        }''' % {'dest_elem': 'AA64FpDestMerge_x[i]' if merging else '0'}
         iop = ArmInstObjParams(name, 'Sve' + Name, 'SveTblOp',
                 {'code': code, 'op_class': opClass}, [])
         header_output += SveBinUnpredOpDeclare.subst(iop)
@@ -2617,6 +2711,63 @@
                          'class_name' : 'Sve' + Name}
             exec_output += SveOpExecDeclare.subst(substDict)

+    # Generate definitions for integer add/subtract long with carry
+    def sveLongCarryInst(name, Name, opClass, decoder = 'Generic',
+            uptTop = False, subtract = False):
+        global header_output, exec_output, decoders
+        code = sveEnabledCheckCode + '''
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+                xc->tcBase());
+        for (int i = 0; i < eCount/2; ++i) {
+            const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s];
+            const Element& srcElem2 = AA64FpOp2_x[2*i+1];
+            const Element& srcElem3 = AA64FpDestMerge_x[2*i];
+            __uint128_t unsigned_sum = (__uint128_t)srcElem3 +
+                                       (%(op)ssrcElem1) +
+                                       (srcElem2 & 0x1);
+            AA64FpDest_x[2*i] = (Element)unsigned_sum;
+            AA64FpDest_x[2*i+1] = (Element)unsigned_sum !=
+                                  (__uint128_t)unsigned_sum;
+        }
+        ''' % {'offset': 1 if uptTop else 0,
+               'op': '~' if subtract else '',
+              }
+        iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp',
+                               {'code': code, 'op_class': opClass}, [])
+        header_output += SveBinUnpredOpDeclare.subst(iop)
+        exec_output += SveOpExecute.subst(iop)
+        for type in ('uint32_t', 'uint64_t'):
+            substDict = {'targs' : type,
+                         'class_name' : 'Sve' + Name}
+            exec_output += SveOpExecDeclare.subst(substDict)
+
+    # Generate definitions for long integer/poly multiplication instruction
+    def sveLongMulInst(name, Name, opClass, types, op, decoder = 'Generic',
+            uptTop = False):
+        global header_output, exec_output, decoders
+        code = sveEnabledCheckCode + '''
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
+                xc->tcBase());
+        for (int i = 0; i < eCount/2; ++i) {
+            const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s];
+            const Element& srcElem2 = AA64FpOp2_x[2*i+%(offset)s];
+            %(op)s
+            AA64FpDest_x[2*i] = (Element)destElem;
+            AA64FpDest_x[2*i+1] = (Element)(destElem >>
+                                   (sizeof(Element) << 3));
+        }
+        ''' % {'offset': 1 if uptTop else 0,
+               'op': op,
+              }
+        iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp',
+                               {'code': code, 'op_class': opClass}, [])
+        header_output += SveBinUnpredOpDeclare.subst(iop)
+        exec_output += SveOpExecute.subst(iop)
+        for type in types:
+            substDict = {'targs' : type,
+                         'class_name' : 'Sve' + Name}
+            exec_output += SveOpExecDeclare.subst(substDict)
+
     # Generate definitions for SVE Unpack instructions
     def sveUnpackInst(name, Name, opClass, sdtypes, unpackHalf,
                       regType, decoder = 'Generic'):
@@ -3167,6 +3318,10 @@
     absCode = 'destElem = (Element) std::abs(srcElem1);'
     sveUnaryInst('abs', 'Abs', 'SimdAluOp', signedTypes, absCode,
                  PredType.MERGE)
+    # ADCLB
+    sveLongCarryInst('adclb', 'Adclb', 'SimdAluOp')
+    # ADCLT
+    sveLongCarryInst('adclt', 'Adclt', 'SimdAluOp', uptTop = True)
     # ADD (immediate)

sveWideImmInst('add', 'AddImm', 'SimdAddOp', unsignedTypes, addCode,False)

     # ADD (vectors, predicated)
@@ -3272,6 +3427,29 @@
     '''
     sveBinInst('asrr', 'Asrr', 'SimdAluOp', unsignedTypes, asrrCode,
                PredType.MERGE, True)
+    # BCAX
+    bcaxCode = 'destElem ^= srcElem1 & (~srcElem2);'
+    sveBinInst('bcax', 'Bcax', 'SimdAluOp', ('uint64_t',), bcaxCode,
+                isDestructive=True)
+    # BGRP
+    bgrpCode = '''
+            int k = 0;
+            int len = sizeof(Element) * 8;
+            for(int j = 0; j < len; j++) {
+                if(((srcElem2>>j) & (Element)0x1) == ((Element)0x1)){
+                    destElem |= (((srcElem1>>j) & (Element)0x1) << k);
+                    k++;
+                }
+            }
+            k = len-1;
+            for(int j = len-1; j >= 0; j--) {
+                if(((srcElem2>>j) & ((Element)0x1)) == ((Element)0x0)){
+                    destElem |= (((srcElem1>>j) & (Element)0x1) << k);
+                    k--;
+                }
+            }
+    '''
+    sveBinInst('bgrp', 'Bgrp', 'SimdAluOp', unsignedTypes, bgrpCode)
     # BIC (vectors, predicated)
     bicCode = 'destElem = srcElem1 & ~srcElem2;'
     sveBinInst('bic', 'BicPred', 'SimdAluOp', unsignedTypes, bicCode,
@@ -3555,6 +3733,10 @@
                        eorCode)
     svePredLogicalInst('eors', 'PredEors', 'SimdPredAluOp', ('uint8_t',),
                        eorCode, isFlagSetting=True)
+    # EOR3
+    eorCode = 'destElem ^= srcElem1 ^ srcElem2;'
+    sveBinInst('eor', 'Eor3', 'SimdAluOp', ('uint64_t',), eorCode,
+                isDestructive=True)
     # EORV
     eorvCode = 'destElem ^= srcElem1;'
     sveAssocReducInst('eorv', 'Eorv', 'SimdReduceAluOp', unsignedTypes,
@@ -4122,6 +4304,30 @@
             pfalseCode)
     # PFIRST
     svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp')
+    # PMUL
+    exec_output += '''
+    __uint128_t poly_mul(uint64_t srcElem1, uint64_t srcElem2)
+    {
+        __uint128_t destElem = 0;
+        __uint128_t extendedElem2 = srcElem2;
+        int i;
+        for (i=0; i < 64; i++) {
+            if (((srcElem1 >> i) & 0x1) == 0x1) {
+                destElem ^= (extendedElem2 << i);
+            }
+        }
+        return destElem;
+    }'''
+    pmulCode = 'destElem = (uint8_t)poly_mul(srcElem1, srcElem2);'
+    sveBinInst('pmul', 'Pmul', 'SimdAluOp', ('uint8_t',), pmulCode)
+    # PMULLB
+    pmullCode = '__uint128_t destElem = poly_mul(srcElem1, srcElem2);'
+    sveLongMulInst('pmullb', 'Pmullb', 'SimdAluOp',
+                   ('uint8_t','uint32_t','uint64_t',), pmullCode)
+    # PMULLT
+    sveLongMulInst('pmullt', 'Pmullt', 'SimdAluOp',
+                    ('uint8_t','uint32_t','uint64_t',),
+                    pmullCode, uptTop = True)
     # PNEXT
     svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes)
     # PSEL
@@ -4138,6 +4344,9 @@
     # PUNPKLO

sveUnpackInst('punpklo', 'Punpklo', 'SimdPredAluOp',unsignedWideSDTypes,

             unpackHalf = Unpack.Low, regType = SrcRegType.Predicate)
+    # RAX1

+ rax1Code = 'destElem = srcElem1 ^ ((srcElem2 << 1) | (srcElem2 >>63));'

+    sveBinInst('rax', 'Rax1', 'SimdAluOp', ('uint64_t',), rax1Code)
     # RBIT
     rbitCode = '''
         destElem = reverseBits(srcElem1);'''
@@ -4214,6 +4423,11 @@
     '''
     sveBinInst('sabd', 'Sabd', 'SimdAddOp', signedTypes, abdCode,
                PredType.MERGE, True)
+    # SBCLB
+    sveLongCarryInst('sbclb', 'Sbclb', 'SimdAluOp', subtract = True)
+    # SBCLT
+    sveLongCarryInst('sbclt', 'Sbclt', 'SimdAluOp', uptTop = True,
+                     subtract = True)
     # SADDV
     addvCode = 'destElem += srcElem1;'
     sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp',
@@ -4372,6 +4586,13 @@
     destElem = do_mulh(srcElem1, srcElem2);'''
     sveBinInst('smulh', 'Smulh', 'SimdMultOp', signedTypes, mulhCode,
                PredType.MERGE, True)
+    # SMULLB
+    smullCode = 'int64_t destElem = (int64_t)srcElem1 * (int64_t)srcElem2;'
+    sveLongMulInst('smullb', 'Smullb', 'SimdAluOp',
+                   ('int8_t','int16_t','int32_t',), smullCode)
+    # SMULLT
+    sveLongMulInst('smullt', 'Smullt', 'SimdAluOp',

+ ('int8_t','int16_t','int32_t',), smullCode, uptTop =True)

     # SPLICE
     sveSpliceInst('splice', 'Splice', 'SimdAluOp', unsignedTypes)
     # SQADD (immediate)
@@ -4557,6 +4778,8 @@
             sxtCode, PredType.MERGE)
     # TBL
     sveTblInst('tbl', 'Tbl', 'SimdAluOp')
+    # TBX
+    sveTblInst('tbx', 'Tbx', 'SimdAluOp', merging=True)
     # TRN1, TRN2 (predicates)
     trnPredIterCode = '''
         constexpr unsigned sz = sizeof(Element);
@@ -4654,6 +4877,14 @@
     # UMULH
     sveBinInst('umulh', 'Umulh', 'SimdMultOp', unsignedTypes, mulhCode,
                PredType.MERGE, True)
+    # UMULLB

+ umullCode = 'uint64_t destElem = (uint64_t)srcElem1 *(uint64_t)srcElem2;'

+    sveLongMulInst('umullb', 'Umullb', 'SimdAluOp',
+                   ('uint8_t','uint16_t','uint32_t',), umullCode)
+    # UMULLT
+    sveLongMulInst('umullt', 'Umullt', 'SimdAluOp',
+                   ('uint8_t','uint16_t','uint32_t',), umullCode,
+                    uptTop = True)
     # UQADD (immediate)
     uqaddCode = '''
             destElem = srcElem1 + srcElem2;
@@ -4861,6 +5092,13 @@
             Ffr_ub[i] = POp1_ub[i];
         }'''

svePredWriteFfrInst('wrffr', 'Wrffr', 'SimdPredAluOp', wrffrCode,False)

+    # XAR
+    xarCode = '''
+            destElem = AA64FpDestMerge_x[i] ^ srcElem1;
+            destElem = ((destElem >> srcElem2) |
+                    (destElem << (sizeof(Element) * 8 - srcElem2)));
+    '''
+    sveBinImmInst('xar', 'Xar', 'SimdAluOp', unsignedTypes, xarCode)
     # ZIP1, ZIP2 (predicates)
     zipPredIterCode = '''
         constexpr unsigned sz = sizeof(Element);

--

To view, visithttps://gem5-review.googlesource.com/c/public/gem5/+/70277?usp=emailTo unsubscribe, or for help writing mail filters, visithttps://gem5-review.googlesource.com/settings


Gerrit-MessageType: newchange
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ia135ba9300eae312b24342bcbda835fef6867113
Gerrit-Change-Number: 70277
Gerrit-PatchSet: 1
Gerrit-Owner: Giacomo Travaglini <giacomo.travagl...@arm.com>

_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org

[gem5-dev] [L] Change in gem5/gem5[develop]: arch-arm: Partial SVE2 Implementation

Reply via email to