[gem5-dev] [M] Change in gem5/gem5[develop]: arch-arm: Added 128-bit encodings of SVE TRN, UZP, and ZIP insts.

Bobby Bruce (Gerrit) via gem5-dev Thu, 25 May 2023 14:38:30 -0700

Bobby Bruce has submitted this change. (https://gem5-review.googlesource.com/c/public/gem5/+/70728?usp=email )


6 is the latest approved patch-set.

No files were changed between the latest approved patch-set and thesubmitted one.)Change subject: arch-arm: Added 128-bit encodings of SVE TRN, UZP, andZIP insts.

......................................................................

arch-arm: Added 128-bit encodings of SVE TRN, UZP, and ZIP insts.

Add support for the 128-bit element encodings of the TRN1, TRN2, UZP1,
UZP2, ZIP1, and ZIP2 instructions, required by the Armv8.2 SVE
Double-precision floating-point Matrix Multiplication
instructions (ARMv8.2-F64MM).

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Change-Id: I496576340c48410fedb2cf6fc7d1a02e219b3bd4
Reviewed-by: Richard Cooper <richard.coo...@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70728
Maintainer: Jason Lowe-Power <power...@gmail.com>
Tested-by: kokoro <noreply+kok...@google.com>
Reviewed-by: Jason Lowe-Power <power...@gmail.com>
Reviewed-by: Andreas Sandberg <andreas.sandb...@arm.com>
Reviewed-by: Giacomo Travaglini <giacomo.travagl...@arm.com>
Maintainer: Andreas Sandberg <andreas.sandb...@arm.com>
---
M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/formats/sve_top_level.isa
M src/arch/arm/isa/insts/sve.isa
3 files changed, 108 insertions(+), 35 deletions(-)

Approvals:
  Andreas Sandberg: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass

Jason Lowe-Power: Looks good to me, but someone else must approve; Looksgood to me, approved

  Giacomo Travaglini: Looks good to me, approved

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isab/src/arch/arm/isa/formats/sve_2nd_level.isa

index f74181a..3d211bc 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -1145,29 +1145,31 @@
     }  // decodeSvePermPredicates

     StaticInstPtr
-    decodeSvePermIntlv(ExtMachInst machInst)
+    decodeSvePermIntlv(ExtMachInst machInst, bool f64mm)
     {
         RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
         RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
         RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);

-        uint8_t size = bits(machInst, 23, 22);
+        uint8_t size = f64mm ? 4 : (uint8_t)bits(machInst, 23, 22);

         uint8_t opc = bits(machInst, 12, 10);

         switch (opc) {
           case 0x0:

- return decodeSveBinUnpredU<SveZip1>(size, machInst, zd, zn,zm);+ return decodeSveBinUnpredUQ<SveZip1>(size, machInst, zd, zn,zm);

           case 0x1:

- return decodeSveBinUnpredU<SveZip2>(size, machInst, zd, zn,zm);+ return decodeSveBinUnpredUQ<SveZip2>(size, machInst, zd, zn,zm);

           case 0x2:

- return decodeSveBinUnpredU<SveUzp1>(size, machInst, zd, zn,zm);+ return decodeSveBinUnpredUQ<SveUzp1>(size, machInst, zd, zn,zm);

           case 0x3:

- return decodeSveBinUnpredU<SveUzp2>(size, machInst, zd, zn,zm);+ return decodeSveBinUnpredUQ<SveUzp2>(size, machInst, zd, zn,zm);

           case 0x4:

- return decodeSveBinUnpredU<SveTrn1>(size, machInst, zd, zn,zm);

+          case 0x6:

+ return decodeSveBinUnpredUQ<SveTrn1>(size, machInst, zd, zn,zm);

           case 0x5:

- return decodeSveBinUnpredU<SveTrn2>(size, machInst, zd, zn,zm);

+          case 0x7:

+ return decodeSveBinUnpredUQ<SveTrn2>(size, machInst, zd, zn,zm);

         }
         return new Unknown64(machInst);
     }  // decodeSvePermIntlv

diff --git a/src/arch/arm/isa/formats/sve_top_level.isab/src/arch/arm/isa/formats/sve_top_level.isa

index b0579fb..61f2f5c 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -57,7 +57,7 @@
     StaticInstPtr decodeSvePermExtract(ExtMachInst machInst);
     StaticInstPtr decodeSvePermUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSvePermPredicates(ExtMachInst machInst);
-    StaticInstPtr decodeSvePermIntlv(ExtMachInst machInst);
+    StaticInstPtr decodeSvePermIntlv(ExtMachInst machInst, bool f64mm);
     StaticInstPtr decodeSvePermPred(ExtMachInst machInst);
     StaticInstPtr decodeSveSelVec(ExtMachInst machInst);
     StaticInstPtr decodeSveIntCmpVec(ExtMachInst machInst);
@@ -202,11 +202,18 @@
                     if (b_13) {
                         return decodeSvePermUnpred(machInst);
                     } else {
-                        return decodeSvePermExtract(machInst);
+                        uint8_t b_23 = bits(machInst, 23);
+                        if (b_23) {
+                            // 128-bit element encodings for Armv8.6 F64MM
+                            return decodeSvePermIntlv(machInst, true);
+                        } else {
+                            return decodeSvePermExtract(machInst);
+                        }
                     }
                   case 0x1:
                     if (b_13) {
-                        return decodeSvePermIntlv(machInst);
+                        // 8,16,32,64-bit element encodings
+                        return decodeSvePermIntlv(machInst, false);
                     } else {
                         return decodeSvePermPredicates(machInst);
                     }
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 74eacb8..cbaa2b5 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -633,6 +633,29 @@
     }

     // Decodes binary, constructive, unpredicated SVE instructions.
+    // Unsigned instructions only, including Quadword variants.
+    template <template <typename T> class Base>
+    StaticInstPtr

+ decodeSveBinUnpredUQ(unsigned size, ExtMachInst machInst, RegIndexdest,

+            RegIndex op1, RegIndex op2)
+    {
+        switch (size) {
+          case 0:
+            return new Base<uint8_t>(machInst, dest, op1, op2);
+          case 1:
+            return new Base<uint16_t>(machInst, dest, op1, op2);
+          case 2:
+            return new Base<uint32_t>(machInst, dest, op1, op2);
+          case 3:
+            return new Base<uint64_t>(machInst, dest, op1, op2);
+          case 4:
+            return new Base<__uint128_t>(machInst, dest, op1, op2);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    // Decodes binary, constructive, unpredicated SVE instructions.
     // Signed instructions only.
     template <template <typename T> class Base>
     StaticInstPtr
@@ -3299,6 +3322,8 @@
     fpTypes = ('uint16_t', 'uint32_t', 'uint64_t')
     signedTypes = ('int8_t', 'int16_t', 'int32_t', 'int64_t')
     unsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t')
+    extendedUnsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t',
+                             '__uint128_t')

     smallSignedTypes = ('int8_t', 'int16_t', 'int32_t')
     bigSignedTypes = ('int16_t', 'int32_t', 'int64_t')
@@ -4754,23 +4779,36 @@
                        trnPredIterCode % 1)
     # TRN1, TRN2 (vectors)
     trnIterCode = '''
+        // SVE F64MM support requires that there are at least two elements
+        // in the vector.
+        if (eCount < 2) {
+            return std::make_shared<UndefinedInstruction>(machInst, false,
+                                                          "%(mnemonic)s");
+        }
         int s;
-        int part = %d;
+        int part = %(part)d;
         ArmISA::VecRegContainer tmpVecC;
         auto auxDest = tmpVecC.as<Element>();
-        for (unsigned i = 0; i < eCount / 2; i++) {
+        const unsigned eltPairsCount = eCount / 2;
+        const unsigned eltsInPairsCount = eltPairsCount * 2;
+        for (unsigned i = 0; i < eltPairsCount; i++) {
             s = 2 * i + part;
             auxDest[2 * i] = AA64FpOp1_x[s];
             auxDest[2 * i + 1] = AA64FpOp2_x[s];
         }
-        for (unsigned i = 0; i < eCount; i++) {
+        // Fill output vector with pairs of elements
+        for (unsigned i = 0; i < eltsInPairsCount; i++) {
             AA64FpDest_x[i] = auxDest[i];
         }
+        // Fill any trailing non-full pairs with zeros
+        for (unsigned i = eltsInPairsCount; i < eCount; i++) {
+            AA64FpDest_x[i] = 0;
+        }
     '''
-    sveBinInst('trn1', 'Trn1', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=trnIterCode % 0)
-    sveBinInst('trn2', 'Trn2', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=trnIterCode % 1)
+    sveBinInst('trn1', 'Trn1', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=trnIterCode % dict(mnemonic='trn1', part=0))
+    sveBinInst('trn2', 'Trn2', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=trnIterCode % dict(mnemonic='trn2', part=1))
     # UABD
     sveBinInst('uabd', 'Uabd', 'SimdAddOp', unsignedTypes, abdCode,
                PredType.MERGE, True)
@@ -4976,26 +5014,39 @@
                        uzpPredIterCode % 1)
     # UZP1, UZP2 (vectors)
     uzpIterCode = '''
+        // SVE F64MM support requires that there are at least two elements
+        // in the vector.
+        if (eCount < 2) {
+            return std::make_shared<UndefinedInstruction>(machInst, false,
+                                                          "%(mnemonic)s");
+        }
         int s;
-        int part = %d;
+        int part = %(part)d;
         ArmISA::VecRegContainer tmpVecC;
         auto auxDest = tmpVecC.as<Element>();
-        for (unsigned i = 0; i < eCount; i++) {
+        const unsigned eltPairsCount = eCount / 2;
+        const unsigned eltsInPairsCount = eltPairsCount * 2;
+        for (unsigned i = 0; i < eltsInPairsCount; i++) {
             s = 2 * i + part;
-            if (s < eCount) {
+            if (s < eltsInPairsCount) {
                 auxDest[i] = AA64FpOp1_x[s];
             } else {
-                auxDest[i] = AA64FpOp2_x[s - eCount];
+                auxDest[i] = AA64FpOp2_x[s - eltsInPairsCount];
             }
         }
-        for (unsigned i = 0; i < eCount; i++) {
+        // Fill output vector with pairs of elements
+        for (unsigned i = 0; i < eltsInPairsCount; i++) {
             AA64FpDest_x[i] = auxDest[i];
         }
+        // Fill any trailing non-full pairs with zeros
+        for (unsigned i = eltsInPairsCount; i < eCount; i++) {
+            AA64FpDest_x[i] = 0;
+        }
     '''
-    sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=uzpIterCode % 0)
-    sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=uzpIterCode % 1)
+    sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=uzpIterCode % dict(mnemonic='uzp1', part=0))
+    sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=uzpIterCode % dict(mnemonic='uzp2', part=1))
     # WHILELE (32-bit)
     whileLECode = '''
             cond = srcElem1 <= srcElem2;
@@ -5058,22 +5109,35 @@
                        zipPredIterCode % 1)
     # ZIP1, ZIP2 (vectors)
     zipIterCode = '''
+        // SVE F64MM support requires that there are at least two elements
+        // in the vector.
+        if (eCount < 2) {
+            return std::make_shared<UndefinedInstruction>(machInst, false,
+                                                          "%(mnemonic)s");
+        }
         int s;
-        int part = %d;
+        int part = %(part)d;
         ArmISA::VecRegContainer tmpVecC;
         auto auxDest = tmpVecC.as<Element>();
-        for (unsigned i = 0; i < eCount / 2; i++) {
-            s = i + (part * (eCount / 2));
+        const unsigned eltPairsCount = eCount / 2;
+        const unsigned eltsInPairsCount = eltPairsCount * 2;
+        for (unsigned i = 0; i < eltPairsCount; i++) {
+            s = i + (part * (eltsInPairsCount / 2));
             auxDest[2 * i] = AA64FpOp1_x[s];
             auxDest[2 * i + 1] = AA64FpOp2_x[s];
         }
-        for (unsigned i = 0; i < eCount; i++) {
+        // Fill output vector with pairs of elements
+        for (unsigned i = 0; i < eltsInPairsCount; i++) {
             AA64FpDest_x[i] = auxDest[i];
         }
+        // Fill any trailing non-full pairs with zeros
+        for (unsigned i = eltsInPairsCount; i < eCount; i++) {
+            AA64FpDest_x[i] = 0;
+        }
     '''
-    sveBinInst('zip1', 'Zip1', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=zipIterCode % 0)
-    sveBinInst('zip2', 'Zip2', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=zipIterCode % 1)
+    sveBinInst('zip1', 'Zip1', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=zipIterCode % dict(mnemonic='zip1', part=0))
+    sveBinInst('zip2', 'Zip2', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=zipIterCode % dict(mnemonic='zip2', part=1))

 }};

--

To view, visithttps://gem5-review.googlesource.com/c/public/gem5/+/70728?usp=emailTo unsubscribe, or for help writing mail filters, visithttps://gem5-review.googlesource.com/settings?usp=email


Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I496576340c48410fedb2cf6fc7d1a02e219b3bd4
Gerrit-Change-Number: 70728
Gerrit-PatchSet: 8
Gerrit-Owner: Giacomo Travaglini <giacomo.travagl...@arm.com>
Gerrit-Reviewer: Andreas Sandberg <andreas.sandb...@arm.com>
Gerrit-Reviewer: Bobby Bruce <bbr...@ucdavis.edu>
Gerrit-Reviewer: Giacomo Travaglini <giacomo.travagl...@arm.com>
Gerrit-Reviewer: Jason Lowe-Power <power...@gmail.com>
Gerrit-Reviewer: Richard Cooper <richard.coo...@arm.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>

_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org

[gem5-dev] [M] Change in gem5/gem5[develop]: arch-arm: Added 128-bit encodings of SVE TRN, UZP, and ZIP insts.

Reply via email to