Alexandru Duțu has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/42209 )
Change subject: gpu-compute: Add operand info class to GPUDynInst
......................................................................
gpu-compute: Add operand info class to GPUDynInst
This change adds a class that stores operand register info
for the GPUDynInst. The operand info is calculated when the
instruction object is created and stored for easy access
by the RF, etc.
Change-Id: I3cf267942e54fe60fcb4224d3b88da08a1a0226e
---
M src/arch/gcn3/registers.hh
M src/gpu-compute/SConscript
M src/gpu-compute/fetch_unit.cc
M src/gpu-compute/gpu_dyn_inst.cc
M src/gpu-compute/gpu_dyn_inst.hh
M src/gpu-compute/wavefront.cc
6 files changed, 223 insertions(+), 9 deletions(-)
diff --git a/src/arch/gcn3/registers.hh b/src/arch/gcn3/registers.hh
index 7ad9b1f..df1ef4e 100644
--- a/src/arch/gcn3/registers.hh
+++ b/src/arch/gcn3/registers.hh
@@ -168,6 +168,12 @@
typedef int64_t VecElemI64;
typedef double VecElemF64;
+ const int DWORDSize = sizeof(VecElemU32);
+ /**
+ * Size of a single-precision register in DWORDs.
+ */
+ const int RegSizeDWORDs = sizeof(VecElemU32) / DWORDSize;
+
// typedefs for the various sizes/types of vector regs
using VecRegU8 = ::VecRegT<VecElemU8, NumVecElemPerVecReg, false>;
using VecRegI8 = ::VecRegT<VecElemI8, NumVecElemPerVecReg, false>;
diff --git a/src/gpu-compute/SConscript b/src/gpu-compute/SConscript
index e41e387..adb9b0e 100644
--- a/src/gpu-compute/SConscript
+++ b/src/gpu-compute/SConscript
@@ -80,6 +80,7 @@
DebugFlag('GPUDisp')
DebugFlag('GPUExec')
DebugFlag('GPUFetch')
+DebugFlag('GPUInst')
DebugFlag('GPUKernelInfo')
DebugFlag('GPUMem')
DebugFlag('GPUPort')
diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc
index 62b9e73..d2af7b3 100644
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -557,6 +557,7 @@
wavefront, gpu_static_inst,
wavefront->computeUnit->
getAndIncSeqNum());
+ gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "
@@ -597,6 +598,7 @@
wavefront, gpu_static_inst,
wavefront->computeUnit->
getAndIncSeqNum());
+ gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "
diff --git a/src/gpu-compute/gpu_dyn_inst.cc
b/src/gpu-compute/gpu_dyn_inst.cc
index b9b23d4..c08e4b9 100644
--- a/src/gpu-compute/gpu_dyn_inst.cc
+++ b/src/gpu-compute/gpu_dyn_inst.cc
@@ -33,6 +33,7 @@
#include "gpu-compute/gpu_dyn_inst.hh"
+#include "debug/GPUInst.hh"
#include "debug/GPUMem.hh"
#include "gpu-compute/gpu_static_inst.hh"
#include "gpu-compute/scalar_register_file.hh"
@@ -43,7 +44,8 @@
GPUStaticInst *static_inst, InstSeqNum instSeqNum)
: GPUExecContext(_cu, _wf), scalarAddr(0),
addr(computeUnit()->wfSize(),
(Addr)0), numScalarReqs(0), isSaveRestore(false),
- _staticInst(static_inst), _seqNum(instSeqNum)
+ _staticInst(static_inst), _seqNum(instSeqNum),
+ maxSrcVecRegOpSize(0), maxSrcScalarRegOpSize(0)
{
statusVector.assign(TheGpuISA::NumVecElemPerVecReg, 0);
tlbHitLevel.assign(computeUnit()->wfSize(), -1);
@@ -82,6 +84,109 @@
}
}
+void
+GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
+{
+ assert(gpu_dyn_inst->wavefront());
+ /**
+ * Generate and cache the operand to register mapping information. This
+ * prevents this info from being generated multiple times throughout
+ * the CU pipeline.
+ */
+ DPRINTF(GPUInst, "%s: generating operand info for %d operands\n",
+ disassemble(), getNumOperands());
+
+ for (int op_idx = 0; op_idx < getNumOperands(); ++op_idx) {
+ int virt_idx(-1);
+ int phys_idx(-1);
+ int op_num_dwords(-1);
+
+ if (isVectorRegister(op_idx)) {
+ virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
+ op_num_dwords = numOpdDWORDs(op_idx);
+
+ if (isSrcOperand(op_idx)) {
+ std::vector<int> virt_indices;
+ std::vector<int> phys_indices;
+
+ if (op_num_dwords > maxSrcVecRegOpSize) {
+ maxSrcVecRegOpSize = op_num_dwords;
+ }
+
+ for (int i = 0; i < op_num_dwords; ++i) {
+ phys_idx = computeUnit()->registerManager->
+ mapVgpr(wavefront(), virt_idx + i);
+ virt_indices.push_back(virt_idx + i);
+ phys_indices.push_back(phys_idx);
+ }
+ DPRINTF(GPUInst, "%s adding vector src (%d->%d) operand "
+ "that uses %d registers.\n", disassemble(),
+ virt_idx, computeUnit()->registerManager->
+ mapVgpr(wavefront(), virt_idx), op_num_dwords);
+ srcVecRegOps.emplace_back(op_idx, op_num_dwords,
virt_indices,
+ phys_indices);
+ } else {
+ assert(isDstOperand(op_idx));
+ std::vector<int> virt_indices;
+ std::vector<int> phys_indices;
+ for (int i = 0; i < op_num_dwords; ++i) {
+ phys_idx = computeUnit()->registerManager->
+ mapVgpr(wavefront(), virt_idx + i);
+ virt_indices.push_back(virt_idx + i);
+ phys_indices.push_back(phys_idx);
+ }
+ DPRINTF(GPUInst, "%s adding vector dst (%d->%d) operand "
+ "that uses %d registers.\n", disassemble(),
+ virt_idx, computeUnit()->registerManager->
+ mapVgpr(wavefront(), virt_idx), op_num_dwords);
+ dstVecRegOps.emplace_back(op_idx, op_num_dwords,
virt_indices,
+ phys_indices);
+ }
+ } else if (isScalarRegister(op_idx)) {
+ virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
+ op_num_dwords = numOpdDWORDs(op_idx);
+
+ if (isSrcOperand(op_idx)) {
+ std::vector<int> virt_indices;
+ std::vector<int> phys_indices;
+
+ if (op_num_dwords > maxSrcScalarRegOpSize) {
+ maxSrcScalarRegOpSize = op_num_dwords;
+ }
+
+ for (int i = 0; i < op_num_dwords; ++i) {
+ phys_idx = computeUnit()->registerManager->
+ mapSgpr(wavefront(), virt_idx + i);
+ virt_indices.push_back(virt_idx + i);
+ phys_indices.push_back(phys_idx);
+ }
+ DPRINTF(GPUInst, "%s adding scalar src (%d->%d) operand "
+ "that uses %d registers.\n", disassemble(),
+ virt_idx, computeUnit()->registerManager->
+ mapSgpr(wavefront(), virt_idx), op_num_dwords);
+ srcScalarRegOps.emplace_back(op_idx, op_num_dwords,
+ virt_indices, phys_indices);
+ } else {
+ assert(isDstOperand(op_idx));
+ std::vector<int> virt_indices;
+ std::vector<int> phys_indices;
+ for (int i = 0; i < op_num_dwords; ++i) {
+ phys_idx = computeUnit()->registerManager->
+ mapSgpr(wavefront(), virt_idx + i);
+ virt_indices.push_back(virt_idx + i);
+ phys_indices.push_back(phys_idx);
+ }
+ DPRINTF(GPUInst, "%s adding scalar dst (%d->%d) operand "
+ "that uses %d registers.\n", disassemble(),
+ virt_idx, computeUnit()->registerManager->
+ mapSgpr(wavefront(), virt_idx), op_num_dwords);
+ dstScalarRegOps.emplace_back(op_idx, op_num_dwords,
+ virt_indices, phys_indices);
+ }
+ }
+ }
+}
+
GPUDynInst::~GPUDynInst()
{
delete[] d_data;
@@ -110,15 +215,39 @@
}
int
-GPUDynInst::numSrcVecOperands()
+GPUDynInst::numSrcVecRegOperands() const
{
- return _staticInst->numSrcVecOperands();
+ return srcVecRegOps.size();
}
int
-GPUDynInst::numDstVecOperands()
+GPUDynInst::numDstVecRegOperands() const
{
- return _staticInst->numDstVecOperands();
+ return dstVecRegOps.size();
+}
+
+int
+GPUDynInst::maxSrcVecRegOperandSize() const
+{
+ return maxSrcVecRegOpSize;
+}
+
+int
+GPUDynInst::numSrcScalarRegOperands() const
+{
+ return srcScalarRegOps.size();
+}
+
+int
+GPUDynInst::numDstScalarRegOperands() const
+{
+ return dstScalarRegOps.size();
+}
+
+int
+GPUDynInst::maxSrcScalarRegOperandSize() const
+{
+ return maxSrcScalarRegOpSize;
}
int
diff --git a/src/gpu-compute/gpu_dyn_inst.hh
b/src/gpu-compute/gpu_dyn_inst.hh
index 97eea01..e38a87f 100644
--- a/src/gpu-compute/gpu_dyn_inst.hh
+++ b/src/gpu-compute/gpu_dyn_inst.hh
@@ -74,17 +74,85 @@
AtomicOpFunctor* clone () { return new AtomicOpCAS(c, s, computeUnit);
}
};
+class RegisterOperandInfo
+{
+ public:
+ RegisterOperandInfo() = delete;
+ RegisterOperandInfo(int op_idx, int num_dwords,
+ const std::vector<int> &virt_indices,
+ const std::vector<int> &phys_indices)
+ : opIdx(op_idx), numDWORDs(num_dwords), virtIndices(virt_indices),
+ physIndices(phys_indices)
+ {
+ }
+
+ /**
+ * The number of registers required to store this operand.
+ */
+ int numRegisters() const { return numDWORDs /
TheGpuISA::RegSizeDWORDs; }
+ int operandIdx() const { return opIdx; }
+ /**
+ * We typically only need the first virtual register for the operand
+ * regardless of its size.
+ */
+ int virtIdx(int reg_num=0) const { return virtIndices.at(reg_num); }
+
+ private:
+ /**
+ * Index of this operand within the set of its parent instruction's
+ * operand list.
+ */
+ const int opIdx;
+ /**
+ * Size of this operand in DWORDs.
+ */
+ const int numDWORDs;
+ const std::vector<int> virtIndices;
+ const std::vector<int> physIndices;
+};
+
class GPUDynInst : public GPUExecContext
{
public:
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst
*static_inst,
uint64_t instSeqNum);
~GPUDynInst();
+ void initOperandInfo(GPUDynInstPtr &gpu_dyn_inst);
void execute(GPUDynInstPtr gpuDynInst);
+
+ const std::vector<RegisterOperandInfo>&
+ srcVecRegOperands() const
+ {
+ return srcVecRegOps;
+ }
+
+ const std::vector<RegisterOperandInfo>&
+ dstVecRegOperands() const
+ {
+ return dstVecRegOps;
+ }
+
+ const std::vector<RegisterOperandInfo>&
+ srcScalarRegOperands() const
+ {
+ return srcScalarRegOps;
+ }
+
+ const std::vector<RegisterOperandInfo>&
+ dstScalarRegOperands() const
+ {
+ return dstScalarRegOps;
+ }
+
+ int numSrcVecRegOperands() const;
+ int numDstVecRegOperands() const;
+ int maxSrcVecRegOperandSize() const;
+ int numSrcScalarRegOperands() const;
+ int numDstScalarRegOperands() const;
+ int maxSrcScalarRegOperandSize() const;
+
int numSrcRegOperands();
int numDstRegOperands();
- int numDstVecOperands();
- int numSrcVecOperands();
int numSrcVecDWORDs();
int numDstVecDWORDs();
int numOpdDWORDs(int operandIdx);
@@ -428,6 +496,8 @@
private:
GPUStaticInst *_staticInst;
const InstSeqNum _seqNum;
+ int maxSrcVecRegOpSize;
+ int maxSrcScalarRegOpSize;
// the time the request was started
Tick accessTime = -1;
@@ -439,6 +509,12 @@
// hold each cache block address for the instruction and a vector
// to hold the tick when the block arrives at certain hop points
std::map<Addr, std::vector<Tick>> lineAddressTime;
+
+ // Operand info.
+ std::vector<RegisterOperandInfo> srcVecRegOps;
+ std::vector<RegisterOperandInfo> dstVecRegOps;
+ std::vector<RegisterOperandInfo> srcScalarRegOps;
+ std::vector<RegisterOperandInfo> dstScalarRegOps;
};
#endif // __GPU_DYN_INST_HH__
diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc
index e442e2a..279aac7 100644
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -904,8 +904,8 @@
}
computeUnit->srf[simdId]->waveExecuteInst(this, ii);
- computeUnit->shader->incVectorInstSrcOperand(ii->numSrcVecOperands());
- computeUnit->shader->incVectorInstDstOperand(ii->numDstVecOperands());
+
computeUnit->shader->incVectorInstSrcOperand(ii->numSrcVecRegOperands());
+
computeUnit->shader->incVectorInstDstOperand(ii->numDstVecRegOperands());
computeUnit->stats.numInstrExecuted++;
stats.numInstrExecuted++;
computeUnit->instExecPerSimd[simdId]++;
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/42209
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I3cf267942e54fe60fcb4224d3b88da08a1a0226e
Gerrit-Change-Number: 42209
Gerrit-PatchSet: 1
Gerrit-Owner: Alexandru Duțu <alexandru.d...@amd.com>
Gerrit-CC: Tony Gutierrez <anthony.gutier...@amd.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s