[gem5-dev] [M] Change in gem5/gem5[develop]: arch-vega: Fix several issues with DPP

Matthew Poremba (Gerrit) via gem5-dev Tue, 03 Jan 2023 14:57:59 -0800

Matthew Poremba has submitted this change. (https://gem5-review.googlesource.com/c/public/gem5/+/66752?usp=email )


2 is the latest approved patch-set.

No files were changed between the latest approved patch-set and thesubmitted one.

 )Change subject: arch-vega: Fix several issues with DPP
......................................................................

arch-vega: Fix several issues with DPP

DPP processing has several issues which are fixed in this changeset:

1) Incorrect comment is updated
2) newLane calculation for shift/rotate instructions is corrected
3) A copy of original data is made so that a copy of a copy is not made
4) Reset all booleans (OOB, zeroSrc, laneDisabled) after each lane
iteration

The shift, rotate, and broadcast variants were tested by implementing
them in assembly and running on silicon.

Change-Id: If86fbb26c87eaca4ef0587fd846978115858b168
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66752
Maintainer: Matt Sinclair <[email protected]>
Tested-by: kokoro <[email protected]>
Reviewed-by: Matt Sinclair <[email protected]>
---
M src/arch/amdgpu/vega/insts/inst_util.hh
1 file changed, 58 insertions(+), 23 deletions(-)

Approvals:
  Matt Sinclair: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass

diff --git a/src/arch/amdgpu/vega/insts/inst_util.hhb/src/arch/amdgpu/vega/insts/inst_util.hh

index 01925f9..7ec2e2d 100644
--- a/src/arch/amdgpu/vega/insts/inst_util.hh
+++ b/src/arch/amdgpu/vega/insts/inst_util.hh
@@ -303,9 +303,9 @@
      * Currently the values are:
      * 0x0 - 0xFF: full permute of four threads
      * 0x100: reserved
-     * 0x101 - 0x10F: row shift right by 1-15 threads
+     * 0x101 - 0x10F: row shift left by 1-15 threads
      * 0x111 - 0x11F: row shift right by 1-15 threads
-     * 0x121 - 0x12F: row shift right by 1-15 threads
+     * 0x121 - 0x12F: row rotate right by 1-15 threads
      * 0x130: wavefront left shift by 1 thread
      * 0x134: wavefront left rotate by 1 thread
      * 0x138: wavefront right shift by 1 thread
@@ -322,7 +322,8 @@

// newLane will be the same as the input lane unless swizzlinghappens

         int newLane = currLane;
         // for shift/rotate permutations; positive values are LEFT rotates
-        int count = 1;

+ // shift/rotate left means lane n -> lane n-1 (e.g., lane 1 ->lane 0)

+        int count = 0;
         int localRowOffset = rowOffset;
         int localRowNum = rowNum;

@@ -335,51 +336,47 @@
             panic("ERROR: instruction using reserved DPP_CTRL value\n");
         } else if ((dppCtrl >= SQ_DPP_ROW_SL1) &&
                    (dppCtrl <= SQ_DPP_ROW_SL15)) { // DPP_ROW_SL{1:15}
-            count -= (dppCtrl - SQ_DPP_ROW_SL1 + 1);
+            count = (dppCtrl - SQ_DPP_ROW_SL1 + 1);
             if ((localRowOffset + count >= 0) &&
                 (localRowOffset + count < ROW_SIZE)) {
                 localRowOffset += count;
-                newLane = (rowNum | localRowOffset);
+                newLane = ((rowNum * ROW_SIZE) | localRowOffset);
             } else {
                 outOfBounds = true;
             }
         } else if ((dppCtrl >= SQ_DPP_ROW_SR1) &&
                    (dppCtrl <= SQ_DPP_ROW_SR15)) { // DPP_ROW_SR{1:15}
-            count -= (dppCtrl - SQ_DPP_ROW_SR1 + 1);
+            count = -(dppCtrl - SQ_DPP_ROW_SR1 + 1);
             if ((localRowOffset + count >= 0) &&
                 (localRowOffset + count < ROW_SIZE)) {
                 localRowOffset += count;
-                newLane = (rowNum | localRowOffset);
+                newLane = ((rowNum * ROW_SIZE) | localRowOffset);
             } else {
                 outOfBounds = true;
             }
         } else if ((dppCtrl >= SQ_DPP_ROW_RR1) &&
                    (dppCtrl <= SQ_DPP_ROW_RR15)) { // DPP_ROW_RR{1:15}
-            count -= (dppCtrl - SQ_DPP_ROW_RR1 + 1);
+            count = -(dppCtrl - SQ_DPP_ROW_RR1 + 1);

localRowOffset = (localRowOffset + count + ROW_SIZE) %ROW_SIZE;

-            newLane = (rowNum | localRowOffset);
+            newLane = ((rowNum * ROW_SIZE) | localRowOffset);
         } else if (dppCtrl == SQ_DPP_WF_SL1) { // DPP_WF_SL1
-            count = 1;
             if ((currLane >= 0) && (currLane < NumVecElemPerVecReg)) {
-                newLane += count;
+                newLane += 1;
             } else {
                 outOfBounds = true;
             }
         } else if (dppCtrl == SQ_DPP_WF_RL1) { // DPP_WF_RL1
-            count = 1;
-            newLane = (currLane + count + NumVecElemPerVecReg) %
+            newLane = (currLane - 1 + NumVecElemPerVecReg) %
                       NumVecElemPerVecReg;
         } else if (dppCtrl == SQ_DPP_WF_SR1) { // DPP_WF_SR1
-            count = -1;
-            int currVal = (currLane + count);
+            int currVal = (currLane - 1);
             if ((currVal >= 0) && (currVal < NumVecElemPerVecReg)) {
-                newLane += count;
+                newLane -= 1;
             } else {
                 outOfBounds = true;
             }
         } else if (dppCtrl == SQ_DPP_WF_RR1) { // DPP_WF_RR1
-            count = -1;
-            newLane = (currLane + count + NumVecElemPerVecReg) %
+            newLane = (currLane - 1 + NumVecElemPerVecReg) %
                       NumVecElemPerVecReg;
         } else if (dppCtrl == SQ_DPP_ROW_MIRROR) { // DPP_ROW_MIRROR
             localRowOffset = (15 - localRowOffset);
@@ -392,12 +389,22 @@
         } else if (dppCtrl == SQ_DPP_ROW_BCAST15) { // DPP_ROW_BCAST15
             count = 15;
             if (currLane > count) {
-                newLane = (currLane & ~count) - 1;

+ // 0x30 selects which set of 16 lanes to use. We broadcastthe

+                // last lane of one set to all lanes of the next set (e.g.,
+                // lane 15 is written to 16-31, 31 to 32-47, 47 to 48-63).
+                newLane = (currLane & 0x30) - 1;
+            } else {
+                outOfBounds = true;
             }
         } else if (dppCtrl == SQ_DPP_ROW_BCAST31) { // DPP_ROW_BCAST31
             count = 31;
             if (currLane > count) {
-                newLane = (currLane & ~count) - 1;
+                // 0x20 selects either the upper 32 or lower 32 lanes and
+                // broadcasts the last lane of one set to all lanes of the
+                // next set (e.g., lane 31 is written to 32-63).
+                newLane = (currLane & 0x20) - 1;
+            } else {
+                outOfBounds = true;
             }
         } else {
             panic("Unimplemented DPP control operation: %d\n", dppCtrl);
@@ -443,6 +450,9 @@
             src0.absModifier();
         }

+ // Need a copy of the original data since we update one lane at atime

+        T src0_copy = src0;
+
         // iterate over all register lanes, performing steps 2-4
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             threadValid = (0x1LL << lane);
@@ -458,7 +468,6 @@
             if (((rowMask & (0x1 << rowNum)) == 0)   /* row mask */   ||
                 ((bankMask & (0x1 << bankNum)) == 0) /* bank mask */) {
                 laneDisabled = true;
-                continue;
             }

             /**
@@ -495,7 +504,7 @@
                 } else {
                     threadValid = 0;
                 }
-            } else if (!gpuDynInst->exec_mask[lane]) {
+            } else if (!gpuDynInst->wavefront()->execMask(lane)) {
                 if (boundCtrl == 1) {
                     zeroSrc = true;
                 } else {
@@ -505,13 +514,15 @@

             if (threadValid != 0 && !outOfBounds && !zeroSrc) {
                 assert(!laneDisabled);
-                src0[outLane] = src0[lane];
+                src0[lane] = src0_copy[outLane];
             } else if (zeroSrc) {
                 src0[lane] = 0;
             }

             // reset for next iteration
             laneDisabled = false;
+            outOfBounds = false;
+            zeroSrc = false;
         }
     }


--

To view, visithttps://gem5-review.googlesource.com/c/public/gem5/+/66752?usp=emailTo unsubscribe, or for help writing mail filters, visithttps://gem5-review.googlesource.com/settings


Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: If86fbb26c87eaca4ef0587fd846978115858b168
Gerrit-Change-Number: 66752
Gerrit-PatchSet: 5
Gerrit-Owner: Matthew Poremba <[email protected]>
Gerrit-Reviewer: Matt Sinclair <[email protected]>
Gerrit-Reviewer: Matthew Poremba <[email protected]>
Gerrit-Reviewer: kokoro <[email protected]>
Gerrit-MessageType: merged

_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[gem5-dev] [M] Change in gem5/gem5[develop]: arch-vega: Fix several issues with DPP

Reply via email to