Michael Boyer has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/47240 )
Change subject: arch-vega: Implement non-carry-out VEGA add, sub, and subrev
......................................................................
arch-vega: Implement non-carry-out VEGA add, sub, and subrev
In GCN3, the v_add_u32, v_sub_u32, and v_subrev_u32 instructions write
the carry-out value to VCC. VEGA introduces explicit carry-out versions
of these instructions (v_add_co_u32, v_sub_co_u32, and v_subrev_co_u32),
and modifies the behavior of the baseline, non-carry-out versions to not
write to VCC. Previously both the carry-out and non-carry-out versions
shared a single implementation that wrote to VCC. This patch correctly
implements the non-carry-out versions to avoid the VCC write.
This patch also makes the following substitutions for GCN3 instructions
that no longer exist in VEGA (this renaming has no functional impact):
v_addc_u32 -> v_addc_co_u32
v_subb_u32 -> v_subb_co_u32
v_subbrev_u32 -> v_subbrev_co_u32
Change-Id: I002fa6e9316d38fd4cc3554daff047523cfc12c9
---
M src/arch/amdgpu/vega/decoder.cc
M src/arch/amdgpu/vega/gpu_decoder.hh
M src/arch/amdgpu/vega/insts/instructions.cc
M src/arch/amdgpu/vega/insts/instructions.hh
4 files changed, 415 insertions(+), 172 deletions(-)
diff --git a/src/arch/amdgpu/vega/decoder.cc
b/src/arch/amdgpu/vega/decoder.cc
index 5be0d3d..363f7e1 100644
--- a/src/arch/amdgpu/vega/decoder.cc
+++ b/src/arch/amdgpu/vega/decoder.cc
@@ -849,12 +849,12 @@
&Decoder::decode_OPU_VOP3__V_MAC_F32,
&Decoder::decode_invalid,
&Decoder::decode_invalid,
- &Decoder::decode_OPU_VOP3__V_ADD_U32,
- &Decoder::decode_OPU_VOP3__V_SUB_U32,
- &Decoder::decode_OPU_VOP3__V_SUBREV_U32,
- &Decoder::decode_OPU_VOP3__V_ADDC_U32,
- &Decoder::decode_OPU_VOP3__V_SUBB_U32,
- &Decoder::decode_OPU_VOP3__V_SUBBREV_U32,
+ &Decoder::decode_OPU_VOP3__V_ADD_CO_U32,
+ &Decoder::decode_OPU_VOP3__V_SUB_CO_U32,
+ &Decoder::decode_OPU_VOP3__V_SUBREV_CO_U32,
+ &Decoder::decode_OPU_VOP3__V_ADDC_CO_U32,
+ &Decoder::decode_OPU_VOP3__V_SUBB_CO_U32,
+ &Decoder::decode_OPU_VOP3__V_SUBBREV_CO_U32,
&Decoder::decode_OPU_VOP3__V_ADD_F16,
&Decoder::decode_OPU_VOP3__V_SUB_F16,
&Decoder::decode_OPU_VOP3__V_SUBREV_F16,
@@ -3993,37 +3993,37 @@
GPUStaticInst*
Decoder::decode_OP_VOP2__V_ADD_CO_U32(MachInst iFmt)
{
- return new Inst_VOP2__V_ADD_U32(&iFmt->iFmt_VOP2);
+ return new Inst_VOP2__V_ADD_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_ADD_CO_U32
GPUStaticInst*
Decoder::decode_OP_VOP2__V_SUB_CO_U32(MachInst iFmt)
{
- return new Inst_VOP2__V_SUB_U32(&iFmt->iFmt_VOP2);
+ return new Inst_VOP2__V_SUB_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_SUB_CO_U32
GPUStaticInst*
Decoder::decode_OP_VOP2__V_SUBREV_CO_U32(MachInst iFmt)
{
- return new Inst_VOP2__V_SUBREV_U32(&iFmt->iFmt_VOP2);
+ return new Inst_VOP2__V_SUBREV_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_SUBREV_CO_U32
GPUStaticInst*
Decoder::decode_OP_VOP2__V_ADDC_CO_U32(MachInst iFmt)
{
- return new Inst_VOP2__V_ADDC_U32(&iFmt->iFmt_VOP2);
+ return new Inst_VOP2__V_ADDC_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_ADDC_CO_U32
GPUStaticInst*
Decoder::decode_OP_VOP2__V_SUBB_CO_U32(MachInst iFmt)
{
- return new Inst_VOP2__V_SUBB_U32(&iFmt->iFmt_VOP2);
+ return new Inst_VOP2__V_SUBB_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_SUBB_CO_U32
GPUStaticInst*
Decoder::decode_OP_VOP2__V_SUBBREV_CO_U32(MachInst iFmt)
{
- return new Inst_VOP2__V_SUBBREV_U32(&iFmt->iFmt_VOP2);
+ return new Inst_VOP2__V_SUBBREV_CO_U32(&iFmt->iFmt_VOP2);
} // decode_OP_VOP2__V_SUBBREV_CO_U32
GPUStaticInst*
@@ -5947,40 +5947,40 @@
} // decode_OPU_VOP3__V_MAC_F32
GPUStaticInst*
- Decoder::decode_OPU_VOP3__V_ADD_U32(MachInst iFmt)
+ Decoder::decode_OPU_VOP3__V_ADD_CO_U32(MachInst iFmt)
{
- return new Inst_VOP3__V_ADD_U32(&iFmt->iFmt_VOP3B);
- } // decode_OPU_VOP3__V_ADD_U32
+ return new Inst_VOP3__V_ADD_CO_U32(&iFmt->iFmt_VOP3B);
+ } // decode_OPU_VOP3__V_ADD_CO_U32
GPUStaticInst*
- Decoder::decode_OPU_VOP3__V_SUB_U32(MachInst iFmt)
+ Decoder::decode_OPU_VOP3__V_SUB_CO_U32(MachInst iFmt)
{
- return new Inst_VOP3__V_SUB_U32(&iFmt->iFmt_VOP3B);
- } // decode_OPU_VOP3__V_SUB_U32
+ return new Inst_VOP3__V_SUB_CO_U32(&iFmt->iFmt_VOP3B);
+ } // decode_OPU_VOP3__V_SUB_CO_U32
GPUStaticInst*
- Decoder::decode_OPU_VOP3__V_SUBREV_U32(MachInst iFmt)
+ Decoder::decode_OPU_VOP3__V_SUBREV_CO_U32(MachInst iFmt)
{
- return new Inst_VOP3__V_SUBREV_U32(&iFmt->iFmt_VOP3B);
- } // decode_OPU_VOP3__V_SUBREV_U32
+ return new Inst_VOP3__V_SUBREV_CO_U32(&iFmt->iFmt_VOP3B);
+ } // decode_OPU_VOP3__V_SUBREV_CO_U32
GPUStaticInst*
- Decoder::decode_OPU_VOP3__V_ADDC_U32(MachInst iFmt)
+ Decoder::decode_OPU_VOP3__V_ADDC_CO_U32(MachInst iFmt)
{
- return new Inst_VOP3__V_ADDC_U32(&iFmt->iFmt_VOP3B);
- } // decode_OPU_VOP3__V_ADDC_U32
+ return new Inst_VOP3__V_ADDC_CO_U32(&iFmt->iFmt_VOP3B);
+ } // decode_OPU_VOP3__V_ADDC_CO_U32
GPUStaticInst*
- Decoder::decode_OPU_VOP3__V_SUBB_U32(MachInst iFmt)
+ Decoder::decode_OPU_VOP3__V_SUBB_CO_U32(MachInst iFmt)
{
- return new Inst_VOP3__V_SUBB_U32(&iFmt->iFmt_VOP3B);
- } // decode_OPU_VOP3__V_SUBB_U32
+ return new Inst_VOP3__V_SUBB_CO_U32(&iFmt->iFmt_VOP3B);
+ } // decode_OPU_VOP3__V_SUBB_CO_U32
GPUStaticInst*
- Decoder::decode_OPU_VOP3__V_SUBBREV_U32(MachInst iFmt)
+ Decoder::decode_OPU_VOP3__V_SUBBREV_CO_U32(MachInst iFmt)
{
- return new Inst_VOP3__V_SUBBREV_U32(&iFmt->iFmt_VOP3B);
- } // decode_OPU_VOP3__V_SUBBREV_U32
+ return new Inst_VOP3__V_SUBBREV_CO_U32(&iFmt->iFmt_VOP3B);
+ } // decode_OPU_VOP3__V_SUBBREV_CO_U32
GPUStaticInst*
Decoder::decode_OPU_VOP3__V_ADD_F16(MachInst iFmt)
diff --git a/src/arch/amdgpu/vega/gpu_decoder.hh
b/src/arch/amdgpu/vega/gpu_decoder.hh
index 69954f8..0159589 100644
--- a/src/arch/amdgpu/vega/gpu_decoder.hh
+++ b/src/arch/amdgpu/vega/gpu_decoder.hh
@@ -296,12 +296,12 @@
GPUStaticInst* decode_OPU_VOP3__V_OR_B32(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_XOR_B32(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_MAC_F32(MachInst);
- GPUStaticInst* decode_OPU_VOP3__V_ADD_U32(MachInst);
- GPUStaticInst* decode_OPU_VOP3__V_SUB_U32(MachInst);
- GPUStaticInst* decode_OPU_VOP3__V_SUBREV_U32(MachInst);
- GPUStaticInst* decode_OPU_VOP3__V_ADDC_U32(MachInst);
- GPUStaticInst* decode_OPU_VOP3__V_SUBB_U32(MachInst);
- GPUStaticInst* decode_OPU_VOP3__V_SUBBREV_U32(MachInst);
+ GPUStaticInst* decode_OPU_VOP3__V_ADD_CO_U32(MachInst);
+ GPUStaticInst* decode_OPU_VOP3__V_SUB_CO_U32(MachInst);
+ GPUStaticInst* decode_OPU_VOP3__V_SUBREV_CO_U32(MachInst);
+ GPUStaticInst* decode_OPU_VOP3__V_ADDC_CO_U32(MachInst);
+ GPUStaticInst* decode_OPU_VOP3__V_SUBB_CO_U32(MachInst);
+ GPUStaticInst* decode_OPU_VOP3__V_SUBBREV_CO_U32(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_ADD_F16(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_SUB_F16(MachInst);
GPUStaticInst* decode_OPU_VOP3__V_SUBREV_F16(MachInst);
diff --git a/src/arch/amdgpu/vega/insts/instructions.cc
b/src/arch/amdgpu/vega/insts/instructions.cc
index 5db3e54..b0a6cb0 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -6988,19 +6988,19 @@
vdst.write();
} // execute
- // --- Inst_VOP2__V_ADD_U32 class methods ---
+ // --- Inst_VOP2__V_ADD_CO_U32 class methods ---
- Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt)
- : Inst_VOP2(iFmt, "v_add_u32")
+ Inst_VOP2__V_ADD_CO_U32::Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *iFmt)
+ : Inst_VOP2(iFmt, "v_add_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP2__V_ADD_U32
+ } // Inst_VOP2__V_ADD_CO_U32
- Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32()
+ Inst_VOP2__V_ADD_CO_U32::~Inst_VOP2__V_ADD_CO_U32()
{
- } // ~Inst_VOP2__V_ADD_U32
+ } // ~Inst_VOP2__V_ADD_CO_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u;
@@ -7008,7 +7008,7 @@
// --- overflow or carry-out for V_ADDC_U32.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
void
- Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP2__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
@@ -7031,8 +7031,8 @@
origSrc0_sdwa.read();
origSrc1.read();
- DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register
v[%d], "
- "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
+ DPRINTF(VEGA, "Handling V_ADD_CO_U32 SRC SDWA. SRC0: register "
+ "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d,
SRC0_SEL: %d, "
"SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d,
SRC1_SEL: %d, "
"SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
extData.iFmt_VOP_SDWA.SRC0,
extData.iFmt_VOP_SDWA.DST_SEL,
@@ -7073,19 +7073,19 @@
vcc.write();
vdst.write();
} // execute
- // --- Inst_VOP2__V_SUB_U32 class methods ---
+ // --- Inst_VOP2__V_SUB_CO_U32 class methods ---
- Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt)
- : Inst_VOP2(iFmt, "v_sub_u32")
+ Inst_VOP2__V_SUB_CO_U32::Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *iFmt)
+ : Inst_VOP2(iFmt, "v_sub_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP2__V_SUB_U32
+ } // Inst_VOP2__V_SUB_CO_U32
- Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32()
+ Inst_VOP2__V_SUB_CO_U32::~Inst_VOP2__V_SUB_CO_U32()
{
- } // ~Inst_VOP2__V_SUB_U32
+ } // ~Inst_VOP2__V_SUB_CO_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u;
@@ -7093,7 +7093,7 @@
// carry-out for V_SUBB_U32.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
void
- Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP2__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
@@ -7114,28 +7114,27 @@
vdst.write();
vcc.write();
} // execute
- // --- Inst_VOP2__V_SUBREV_U32 class methods ---
+ // --- Inst_VOP2__V_SUBREV_CO_U32 class methods ---
- Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt)
- : Inst_VOP2(iFmt, "v_subrev_u32")
+ Inst_VOP2__V_SUBREV_CO_U32::Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2
*iFmt)
+ : Inst_VOP2(iFmt, "v_subrev_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP2__V_SUBREV_U32
+ } // Inst_VOP2__V_SUBREV_CO_U32
- Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32()
+ Inst_VOP2__V_SUBREV_CO_U32::~Inst_VOP2__V_SUBREV_CO_U32()
{
- } // ~Inst_VOP2__V_SUBREV_U32
+ } // ~Inst_VOP2__V_SUBREV_CO_U32
// --- description from .arch file ---
// D.u = S1.u - S0.u;
// VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
// carry-out for V_SUBB_U32.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
- // SQ translates this to V_SUB_U32 with reversed operands.
void
- Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP2__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
@@ -7156,20 +7155,20 @@
vdst.write();
vcc.write();
} // execute
- // --- Inst_VOP2__V_ADDC_U32 class methods ---
+ // --- Inst_VOP2__V_ADDC_CO_U32 class methods ---
- Inst_VOP2__V_ADDC_U32::Inst_VOP2__V_ADDC_U32(InFmt_VOP2 *iFmt)
- : Inst_VOP2(iFmt, "v_addc_u32")
+ Inst_VOP2__V_ADDC_CO_U32::Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *iFmt)
+ : Inst_VOP2(iFmt, "v_addc_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP2__V_ADDC_U32
+ } // Inst_VOP2__V_ADDC_CO_U32
- Inst_VOP2__V_ADDC_U32::~Inst_VOP2__V_ADDC_U32()
+ Inst_VOP2__V_ADDC_CO_U32::~Inst_VOP2__V_ADDC_CO_U32()
{
- } // ~Inst_VOP2__V_ADDC_U32
+ } // ~Inst_VOP2__V_ADDC_CO_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u + VCC[threadId];
@@ -7178,7 +7177,7 @@
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the
VCC
// source comes from the SGPR-pair at S2.u.
void
- Inst_VOP2__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP2__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
@@ -7204,20 +7203,20 @@
vdst.write();
vcc.write();
} // execute
- // --- Inst_VOP2__V_SUBB_U32 class methods ---
+ // --- Inst_VOP2__V_SUBB_CO_U32 class methods ---
- Inst_VOP2__V_SUBB_U32::Inst_VOP2__V_SUBB_U32(InFmt_VOP2 *iFmt)
- : Inst_VOP2(iFmt, "v_subb_u32")
+ Inst_VOP2__V_SUBB_CO_U32::Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *iFmt)
+ : Inst_VOP2(iFmt, "v_subb_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP2__V_SUBB_U32
+ } // Inst_VOP2__V_SUBB_CO_U32
- Inst_VOP2__V_SUBB_U32::~Inst_VOP2__V_SUBB_U32()
+ Inst_VOP2__V_SUBB_CO_U32::~Inst_VOP2__V_SUBB_CO_U32()
{
- } // ~Inst_VOP2__V_SUBB_U32
+ } // ~Inst_VOP2__V_SUBB_CO_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u - VCC[threadId];
@@ -7226,7 +7225,7 @@
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the
VCC
// --- source comes from the SGPR-pair at S2.u.
void
- Inst_VOP2__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP2__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
@@ -7250,20 +7249,20 @@
vdst.write();
vcc.write();
} // execute
- // --- Inst_VOP2__V_SUBBREV_U32 class methods ---
+ // --- Inst_VOP2__V_SUBBREV_CO_U32 class methods ---
- Inst_VOP2__V_SUBBREV_U32::Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2 *iFmt)
- : Inst_VOP2(iFmt, "v_subbrev_u32")
+ Inst_VOP2__V_SUBBREV_CO_U32::Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2
*iFmt)
+ : Inst_VOP2(iFmt, "v_subbrev_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP2__V_SUBBREV_U32
+ } // Inst_VOP2__V_SUBBREV_CO_U32
- Inst_VOP2__V_SUBBREV_U32::~Inst_VOP2__V_SUBBREV_U32()
+ Inst_VOP2__V_SUBBREV_CO_U32::~Inst_VOP2__V_SUBBREV_CO_U32()
{
- } // ~Inst_VOP2__V_SUBBREV_U32
+ } // ~Inst_VOP2__V_SUBBREV_CO_U32
// --- description from .arch file ---
// D.u = S1.u - S0.u - VCC[threadId];
@@ -7273,7 +7272,7 @@
// source comes from the SGPR-pair at S2.u. SQ translates to
V_SUBB_U32.
// SQ translates this to V_SUBREV_U32 with reversed operands.
void
- Inst_VOP2__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP2__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
@@ -7893,6 +7892,149 @@
{
panicUnimplemented();
} // execute
+ // --- Inst_VOP2__V_ADD_U32 class methods ---
+
+ Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt)
+ : Inst_VOP2(iFmt, "v_add_u32")
+ {
+ setFlag(ALU);
+ setFlag(ValuCacGrp2);
+ } // Inst_VOP2__V_ADD_U32
+
+ Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32()
+ {
+ } // ~Inst_VOP2__V_ADD_U32
+
+ // --- description from .arch file ---
+ // D.u = S0.u + S1.u;
+ void
+ Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *wf = gpuDynInst->wavefront();
+ ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
+ VecOperandU32 src1(gpuDynInst, instData.VSRC1);
+ VecOperandU32 vdst(gpuDynInst, instData.VDST);
+
+ src0.readSrc();
+ src1.read();
+
+ if (isSDWAInst()) {
+ VecOperandU32 src0_sdwa(gpuDynInst,
extData.iFmt_VOP_SDWA.SRC0);
+ // use copies of original src0, src1, and dest during selecting
+ VecOperandU32 origSrc0_sdwa(gpuDynInst,
+ extData.iFmt_VOP_SDWA.SRC0);
+ VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
+ VecOperandU32 origVdst(gpuDynInst, instData.VDST);
+
+ src0_sdwa.read();
+ origSrc0_sdwa.read();
+ origSrc1.read();
+
+ DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register
v[%d], "
+ "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, "
+ "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d,
SRC1_SEL: %d, "
+ "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
+ extData.iFmt_VOP_SDWA.SRC0,
extData.iFmt_VOP_SDWA.DST_SEL,
+ extData.iFmt_VOP_SDWA.DST_U,
+ extData.iFmt_VOP_SDWA.CLMP,
+ extData.iFmt_VOP_SDWA.SRC0_SEL,
+ extData.iFmt_VOP_SDWA.SRC0_SEXT,
+ extData.iFmt_VOP_SDWA.SRC0_NEG,
+ extData.iFmt_VOP_SDWA.SRC0_ABS,
+ extData.iFmt_VOP_SDWA.SRC1_SEL,
+ extData.iFmt_VOP_SDWA.SRC1_SEXT,
+ extData.iFmt_VOP_SDWA.SRC1_NEG,
+ extData.iFmt_VOP_SDWA.SRC1_ABS);
+
+ processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa,
origSrc0_sdwa,
+ src1, origSrc1);
+
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = src0_sdwa[lane] + src1[lane];
+ origVdst[lane] = vdst[lane]; // keep copy consistent
+ }
+ }
+
+ processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
+ } else {
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = src0[lane] + src1[lane];
+ }
+ }
+ }
+
+ vdst.write();
+ } // execute
+ // --- Inst_VOP2__V_SUB_U32 class methods ---
+
+ Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt)
+ : Inst_VOP2(iFmt, "v_sub_u32")
+ {
+ setFlag(ALU);
+ setFlag(ValuCacGrp2);
+ } // Inst_VOP2__V_SUB_U32
+
+ Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32()
+ {
+ } // ~Inst_VOP2__V_SUB_U32
+
+ // --- description from .arch file ---
+ // D.u = S0.u - S1.u;
+ void
+ Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *wf = gpuDynInst->wavefront();
+ ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
+ ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
+ VecOperandU32 vdst(gpuDynInst, instData.VDST);
+
+ src0.readSrc();
+ src1.read();
+
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = src0[lane] - src1[lane];
+ }
+ }
+
+ vdst.write();
+ } // execute
+ // --- Inst_VOP2__V_SUBREV_U32 class methods ---
+
+ Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt)
+ : Inst_VOP2(iFmt, "v_subrev_u32")
+ {
+ setFlag(ALU);
+ setFlag(ValuCacGrp2);
+ } // Inst_VOP2__V_SUBREV_U32
+
+ Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32()
+ {
+ } // ~Inst_VOP2__V_SUBREV_U32
+
+ // --- description from .arch file ---
+ // D.u = S1.u - S0.u;
+ void
+ Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
+ {
+ Wavefront *wf = gpuDynInst->wavefront();
+ ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
+ ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
+ VecOperandU32 vdst(gpuDynInst, instData.VDST);
+
+ src0.readSrc();
+ src1.read();
+
+ for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+ if (wf->execMask(lane)) {
+ vdst[lane] = src1[lane] - src0[lane];
+ }
+ }
+
+ vdst.write();
+ } // execute
// --- Inst_VOP1__V_NOP class methods ---
Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt)
@@ -26157,19 +26299,19 @@
vdst.write();
} // execute
- // --- Inst_VOP3__V_ADD_U32 class methods ---
+ // --- Inst_VOP3__V_ADD_CO_U32 class methods ---
- Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3B *iFmt)
- : Inst_VOP3B(iFmt, "v_add_u32")
+ Inst_VOP3__V_ADD_CO_U32::Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B *iFmt)
+ : Inst_VOP3B(iFmt, "v_add_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP3__V_ADD_U32
+ } // Inst_VOP3__V_ADD_CO_U32
- Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32()
+ Inst_VOP3__V_ADD_CO_U32::~Inst_VOP3__V_ADD_CO_U32()
{
- } // ~Inst_VOP3__V_ADD_U32
+ } // ~Inst_VOP3__V_ADD_CO_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u;
@@ -26177,7 +26319,7 @@
// --- overflow or carry-out for V_ADDC_U32.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
void
- Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP3__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
@@ -26206,19 +26348,19 @@
vdst.write();
vcc.write();
} // execute
- // --- Inst_VOP3__V_SUB_U32 class methods ---
+ // --- Inst_VOP3__V_SUB_CO_U32 class methods ---
- Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3B *iFmt)
- : Inst_VOP3B(iFmt, "v_sub_u32")
+ Inst_VOP3__V_SUB_CO_U32::Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B *iFmt)
+ : Inst_VOP3B(iFmt, "v_sub_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP3__V_SUB_U32
+ } // Inst_VOP3__V_SUB_CO_U32
- Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32()
+ Inst_VOP3__V_SUB_CO_U32::~Inst_VOP3__V_SUB_CO_U32()
{
- } // ~Inst_VOP3__V_SUB_U32
+ } // ~Inst_VOP3__V_SUB_CO_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u;
@@ -26226,7 +26368,7 @@
// carry-out for V_SUBB_U32.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
void
- Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP3__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
@@ -26254,20 +26396,20 @@
vdst.write();
vcc.write();
} // execute
- // --- Inst_VOP3__V_SUBREV_U32 class methods ---
+ // --- Inst_VOP3__V_SUBREV_CO_U32 class methods ---
- Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(
+ Inst_VOP3__V_SUBREV_CO_U32::Inst_VOP3__V_SUBREV_CO_U32(
InFmt_VOP3B *iFmt)
- : Inst_VOP3B(iFmt, "v_subrev_u32")
+ : Inst_VOP3B(iFmt, "v_subrev_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP3__V_SUBREV_U32
+ } // Inst_VOP3__V_SUBREV_CO_U32
- Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32()
+ Inst_VOP3__V_SUBREV_CO_U32::~Inst_VOP3__V_SUBREV_CO_U32()
{
- } // ~Inst_VOP3__V_SUBREV_U32
+ } // ~Inst_VOP3__V_SUBREV_CO_U32
// --- description from .arch file ---
// D.u = S1.u - S0.u;
@@ -26276,7 +26418,7 @@
// In VOP3 the VCC destination may be an arbitrary SGPR-pair.
// SQ translates this to V_SUB_U32 with reversed operands.
void
- Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP3__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
@@ -26304,20 +26446,20 @@
vdst.write();
vcc.write();
} // execute
- // --- Inst_VOP3__V_ADDC_U32 class methods ---
+ // --- Inst_VOP3__V_ADDC_CO_U32 class methods ---
- Inst_VOP3__V_ADDC_U32::Inst_VOP3__V_ADDC_U32(InFmt_VOP3B *iFmt)
- : Inst_VOP3B(iFmt, "v_addc_u32")
+ Inst_VOP3__V_ADDC_CO_U32::Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B *iFmt)
+ : Inst_VOP3B(iFmt, "v_addc_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP3__V_ADDC_U32
+ } // Inst_VOP3__V_ADDC_CO_U32
- Inst_VOP3__V_ADDC_U32::~Inst_VOP3__V_ADDC_U32()
+ Inst_VOP3__V_ADDC_CO_U32::~Inst_VOP3__V_ADDC_CO_U32()
{
- } // ~Inst_VOP3__V_ADDC_U32
+ } // ~Inst_VOP3__V_ADDC_CO_U32
// --- description from .arch file ---
// D.u = S0.u + S1.u + VCC[threadId];
@@ -26326,7 +26468,7 @@
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the
VCC
// source comes from the SGPR-pair at S2.u.
void
- Inst_VOP3__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP3__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
@@ -26360,20 +26502,20 @@
vdst.write();
sdst.write();
} // execute
- // --- Inst_VOP3__V_SUBB_U32 class methods ---
+ // --- Inst_VOP3__V_SUBB_CO_U32 class methods ---
- Inst_VOP3__V_SUBB_U32::Inst_VOP3__V_SUBB_U32(InFmt_VOP3B *iFmt)
- : Inst_VOP3B(iFmt, "v_subb_u32")
+ Inst_VOP3__V_SUBB_CO_U32::Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B *iFmt)
+ : Inst_VOP3B(iFmt, "v_subb_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP3__V_SUBB_U32
+ } // Inst_VOP3__V_SUBB_CO_U32
- Inst_VOP3__V_SUBB_U32::~Inst_VOP3__V_SUBB_U32()
+ Inst_VOP3__V_SUBB_CO_U32::~Inst_VOP3__V_SUBB_CO_U32()
{
- } // ~Inst_VOP3__V_SUBB_U32
+ } // ~Inst_VOP3__V_SUBB_CO_U32
// --- description from .arch file ---
// D.u = S0.u - S1.u - VCC[threadId];
@@ -26382,7 +26524,7 @@
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the
VCC
// --- source comes from the SGPR-pair at S2.u.
void
- Inst_VOP3__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP3__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
@@ -26414,21 +26556,21 @@
vdst.write();
sdst.write();
} // execute
- // --- Inst_VOP3__V_SUBBREV_U32 class methods ---
+ // --- Inst_VOP3__V_SUBBREV_CO_U32 class methods ---
- Inst_VOP3__V_SUBBREV_U32::Inst_VOP3__V_SUBBREV_U32(
+ Inst_VOP3__V_SUBBREV_CO_U32::Inst_VOP3__V_SUBBREV_CO_U32(
InFmt_VOP3B *iFmt)
- : Inst_VOP3B(iFmt, "v_subbrev_u32")
+ : Inst_VOP3B(iFmt, "v_subbrev_co_u32")
{
setFlag(ALU);
setFlag(WritesVCC);
setFlag(ReadsVCC);
setFlag(ValuCacGrp2);
- } // Inst_VOP3__V_SUBBREV_U32
+ } // Inst_VOP3__V_SUBBREV_CO_U32
- Inst_VOP3__V_SUBBREV_U32::~Inst_VOP3__V_SUBBREV_U32()
+ Inst_VOP3__V_SUBBREV_CO_U32::~Inst_VOP3__V_SUBBREV_CO_U32()
{
- } // ~Inst_VOP3__V_SUBBREV_U32
+ } // ~Inst_VOP3__V_SUBBREV_CO_U32
// --- description from .arch file ---
// D.u = S1.u - S0.u - VCC[threadId];
@@ -26436,9 +26578,8 @@
// overflow.
// In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the
VCC
// source comes from the SGPR-pair at S2.u. SQ translates to
V_SUBB_U32.
- // SQ translates this to V_SUBREV_U32 with reversed operands.
void
- Inst_VOP3__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst)
+ Inst_VOP3__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh
b/src/arch/amdgpu/vega/insts/instructions.hh
index 5c0ea8c..b815d3e 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -6987,11 +6987,11 @@
void execute(GPUDynInstPtr) override;
}; // Inst_VOP2__V_MADAK_F32
- class Inst_VOP2__V_ADD_U32 : public Inst_VOP2
+ class Inst_VOP2__V_ADD_CO_U32 : public Inst_VOP2
{
public:
- Inst_VOP2__V_ADD_U32(InFmt_VOP2*);
- ~Inst_VOP2__V_ADD_U32();
+ Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2*);
+ ~Inst_VOP2__V_ADD_CO_U32();
int
getNumOperands() override
@@ -7021,13 +7021,13 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP2__V_ADD_U32
+ }; // Inst_VOP2__V_ADD_CO_U32
- class Inst_VOP2__V_SUB_U32 : public Inst_VOP2
+ class Inst_VOP2__V_SUB_CO_U32 : public Inst_VOP2
{
public:
- Inst_VOP2__V_SUB_U32(InFmt_VOP2*);
- ~Inst_VOP2__V_SUB_U32();
+ Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2*);
+ ~Inst_VOP2__V_SUB_CO_U32();
int
getNumOperands() override
@@ -7057,13 +7057,13 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP2__V_SUB_U32
+ }; // Inst_VOP2__V_SUB_CO_U32
- class Inst_VOP2__V_SUBREV_U32 : public Inst_VOP2
+ class Inst_VOP2__V_SUBREV_CO_U32 : public Inst_VOP2
{
public:
- Inst_VOP2__V_SUBREV_U32(InFmt_VOP2*);
- ~Inst_VOP2__V_SUBREV_U32();
+ Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2*);
+ ~Inst_VOP2__V_SUBREV_CO_U32();
int
getNumOperands() override
@@ -7093,13 +7093,13 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP2__V_SUBREV_U32
+ }; // Inst_VOP2__V_SUBREV_CO_U32
- class Inst_VOP2__V_ADDC_U32 : public Inst_VOP2
+ class Inst_VOP2__V_ADDC_CO_U32 : public Inst_VOP2
{
public:
- Inst_VOP2__V_ADDC_U32(InFmt_VOP2*);
- ~Inst_VOP2__V_ADDC_U32();
+ Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2*);
+ ~Inst_VOP2__V_ADDC_CO_U32();
int
getNumOperands() override
@@ -7131,13 +7131,13 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP2__V_ADDC_U32
+ }; // Inst_VOP2__V_ADDC_CO_U32
- class Inst_VOP2__V_SUBB_U32 : public Inst_VOP2
+ class Inst_VOP2__V_SUBB_CO_U32 : public Inst_VOP2
{
public:
- Inst_VOP2__V_SUBB_U32(InFmt_VOP2*);
- ~Inst_VOP2__V_SUBB_U32();
+ Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2*);
+ ~Inst_VOP2__V_SUBB_CO_U32();
int
getNumOperands() override
@@ -7169,13 +7169,13 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP2__V_SUBB_U32
+ }; // Inst_VOP2__V_SUBB_CO_U32
- class Inst_VOP2__V_SUBBREV_U32 : public Inst_VOP2
+ class Inst_VOP2__V_SUBBREV_CO_U32 : public Inst_VOP2
{
public:
- Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2*);
- ~Inst_VOP2__V_SUBBREV_U32();
+ Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2*);
+ ~Inst_VOP2__V_SUBBREV_CO_U32();
int
getNumOperands() override
@@ -7207,7 +7207,7 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP2__V_SUBBREV_U32
+ }; // Inst_VOP2__V_SUBBREV_CO_U32
class Inst_VOP2__V_ADD_F16 : public Inst_VOP2
{
@@ -7927,6 +7927,108 @@
void execute(GPUDynInstPtr) override;
}; // Inst_VOP2__V_LDEXP_F16
+ class Inst_VOP2__V_ADD_U32 : public Inst_VOP2
+ {
+ public:
+ Inst_VOP2__V_ADD_U32(InFmt_VOP2*);
+ ~Inst_VOP2__V_ADD_U32();
+
+ int
+ getNumOperands() override
+ {
+ return numDstRegOperands() + numSrcRegOperands();
+ } // getNumOperands
+
+ int numDstRegOperands() override { return 1; }
+ int numSrcRegOperands() override { return 2; }
+
+ int
+ getOperandSize(int opIdx) override
+ {
+ switch (opIdx) {
+ case 0: //src_0
+ return 4;
+ case 1: //src_1
+ return 4;
+ case 2: //vdst
+ return 4;
+ default:
+ fatal("op idx %i out of bounds\n", opIdx);
+ return -1;
+ }
+ } // getOperandSize
+
+ void execute(GPUDynInstPtr) override;
+ }; // Inst_VOP2__V_ADD_U32
+
+ class Inst_VOP2__V_SUB_U32 : public Inst_VOP2
+ {
+ public:
+ Inst_VOP2__V_SUB_U32(InFmt_VOP2*);
+ ~Inst_VOP2__V_SUB_U32();
+
+ int
+ getNumOperands() override
+ {
+ return numDstRegOperands() + numSrcRegOperands();
+ } // getNumOperands
+
+ int numDstRegOperands() override { return 1; }
+ int numSrcRegOperands() override { return 2; }
+
+ int
+ getOperandSize(int opIdx) override
+ {
+ switch (opIdx) {
+ case 0: //src_0
+ return 4;
+ case 1: //src_1
+ return 4;
+ case 2: //vdst
+ return 4;
+ default:
+ fatal("op idx %i out of bounds\n", opIdx);
+ return -1;
+ }
+ } // getOperandSize
+
+ void execute(GPUDynInstPtr) override;
+ }; // Inst_VOP2__V_SUB_U32
+
+ class Inst_VOP2__V_SUBREV_U32 : public Inst_VOP2
+ {
+ public:
+ Inst_VOP2__V_SUBREV_U32(InFmt_VOP2*);
+ ~Inst_VOP2__V_SUBREV_U32();
+
+ int
+ getNumOperands() override
+ {
+ return numDstRegOperands() + numSrcRegOperands();
+ } // getNumOperands
+
+ int numDstRegOperands() override { return 1; }
+ int numSrcRegOperands() override { return 2; }
+
+ int
+ getOperandSize(int opIdx) override
+ {
+ switch (opIdx) {
+ case 0: //src_0
+ return 4;
+ case 1: //src_1
+ return 4;
+ case 2: //vdst
+ return 4;
+ default:
+ fatal("op idx %i out of bounds\n", opIdx);
+ return -1;
+ }
+ } // getOperandSize
+
+ void execute(GPUDynInstPtr) override;
+ }; // Inst_VOP2__V_SUBREV_U32
+
class Inst_VOP1__V_NOP : public Inst_VOP1
{
public:
@@ -24637,11 +24739,11 @@
void execute(GPUDynInstPtr) override;
}; // Inst_VOP3__V_MAC_F32
- class Inst_VOP3__V_ADD_U32 : public Inst_VOP3B
+ class Inst_VOP3__V_ADD_CO_U32 : public Inst_VOP3B
{
public:
- Inst_VOP3__V_ADD_U32(InFmt_VOP3B*);
- ~Inst_VOP3__V_ADD_U32();
+ Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B*);
+ ~Inst_VOP3__V_ADD_CO_U32();
int
getNumOperands() override
@@ -24671,13 +24773,13 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP3__V_ADD_U32
+ }; // Inst_VOP3__V_ADD_CO_U32
- class Inst_VOP3__V_SUB_U32 : public Inst_VOP3B
+ class Inst_VOP3__V_SUB_CO_U32 : public Inst_VOP3B
{
public:
- Inst_VOP3__V_SUB_U32(InFmt_VOP3B*);
- ~Inst_VOP3__V_SUB_U32();
+ Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B*);
+ ~Inst_VOP3__V_SUB_CO_U32();
int
getNumOperands() override
@@ -24707,13 +24809,13 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP3__V_SUB_U32
+ }; // Inst_VOP3__V_SUB_CO_U32
- class Inst_VOP3__V_SUBREV_U32 : public Inst_VOP3B
+ class Inst_VOP3__V_SUBREV_CO_U32 : public Inst_VOP3B
{
public:
- Inst_VOP3__V_SUBREV_U32(InFmt_VOP3B*);
- ~Inst_VOP3__V_SUBREV_U32();
+ Inst_VOP3__V_SUBREV_CO_U32(InFmt_VOP3B*);
+ ~Inst_VOP3__V_SUBREV_CO_U32();
int
getNumOperands() override
@@ -24743,13 +24845,13 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP3__V_SUBREV_U32
+ }; // Inst_VOP3__V_SUBREV_CO_U32
- class Inst_VOP3__V_ADDC_U32 : public Inst_VOP3B
+ class Inst_VOP3__V_ADDC_CO_U32 : public Inst_VOP3B
{
public:
- Inst_VOP3__V_ADDC_U32(InFmt_VOP3B*);
- ~Inst_VOP3__V_ADDC_U32();
+ Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B*);
+ ~Inst_VOP3__V_ADDC_CO_U32();
int
getNumOperands() override
@@ -24781,13 +24883,13 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP3__V_ADDC_U32
+ }; // Inst_VOP3__V_ADDC_CO_U32
- class Inst_VOP3__V_SUBB_U32 : public Inst_VOP3B
+ class Inst_VOP3__V_SUBB_CO_U32 : public Inst_VOP3B
{
public:
- Inst_VOP3__V_SUBB_U32(InFmt_VOP3B*);
- ~Inst_VOP3__V_SUBB_U32();
+ Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B*);
+ ~Inst_VOP3__V_SUBB_CO_U32();
int
getNumOperands() override
@@ -24819,13 +24921,13 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP3__V_SUBB_U32
+ }; // Inst_VOP3__V_SUBB_CO_U32
- class Inst_VOP3__V_SUBBREV_U32 : public Inst_VOP3B
+ class Inst_VOP3__V_SUBBREV_CO_U32 : public Inst_VOP3B
{
public:
- Inst_VOP3__V_SUBBREV_U32(InFmt_VOP3B*);
- ~Inst_VOP3__V_SUBBREV_U32();
+ Inst_VOP3__V_SUBBREV_CO_U32(InFmt_VOP3B*);
+ ~Inst_VOP3__V_SUBBREV_CO_U32();
int
getNumOperands() override
@@ -24857,7 +24959,7 @@
} // getOperandSize
void execute(GPUDynInstPtr) override;
- }; // Inst_VOP3__V_SUBBREV_U32
+ }; // Inst_VOP3__V_SUBBREV_CO_U32
class Inst_VOP3__V_ADD_F16 : public Inst_VOP3A
{
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/47240
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I002fa6e9316d38fd4cc3554daff047523cfc12c9
Gerrit-Change-Number: 47240
Gerrit-PatchSet: 1
Gerrit-Owner: Michael Boyer <[email protected]>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- [email protected]
To unsubscribe send an email to [email protected]
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s