On Thu, Apr 13, 2017 at 1:45 AM, Matthew Mondazzi <msmonda...@gmail.com> wrote: > More relevant ISA constants put in place of chipset compares. This helps > better display which features are available to card than previous chipset > compares, making future development easier continue with. > > Signed-off-by: Matthew Mondazzi <msmonda...@gmail.com> > --- > .../drivers/nouveau/codegen/nv50_ir_driver.h | 11 ++++--- > .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 2 +- > .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 24 +++++++------- > .../nouveau/codegen/nv50_ir_lowering_nv50.cpp | 2 +- > .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 38 > +++++++++++----------- > .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 +- > src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 8 ++--- > .../drivers/nouveau/codegen/nv50_ir_target.cpp | 2 +- > .../drivers/nouveau/codegen/nv50_ir_target.h | 2 +- > .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 8 ++--- > 10 files changed, 51 insertions(+), 48 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > index 76c815e..12a45fc 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > @@ -75,10 +75,13 @@ struct nv50_ir_prog_symbol > uint32_t offset; > }; > > -#define NVISA_SM30 0xe0 > -#define NVISA_SM35 0xea > -#define NVISA_SM50 0x110
This is not a patch against upstream. If there's a sequence of patches, you should post them together. If the patches are one logical change, squash them. > - > +enum sm_isa { > + NVISA_SM10 = 0x84, // > + NVISA_SM20 = 0xc0, // Fermi > + NVISA_SM30 = 0xe0, // Keplar > + NVISA_SM35 = 0xea, // Keplar > + NVISA_SM50 = 0x110, // Maxwell Leave the values to whatever the enum definition defaults them to. > +}; > > struct nv50_ir_prog_info > { > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp > index cc2a88e..9620151 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp > @@ -635,7 +635,7 @@ CodeEmitterNV50::emitLOAD(const Instruction *i) > code[1] |= 0x04000000; > break; > case FILE_MEMORY_SHARED: > - if (targ->getChipset() >= 0x84) { > + if (targ->getIsa() >= NVISA_SM10) { Nope. This is to distinguish G80 from G84+. Leave the chipset check. > assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType))); > code[0] = 0x10000001; > code[1] = 0x40000000; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > index ed29661..045e6e0 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp > @@ -806,7 +806,7 @@ CodeEmitterNVC0::emitSHLADD(const Instruction *i) > void > CodeEmitterNVC0::emitMADSP(const Instruction *i) > { > - assert(targ->getChipset() >= NVISA_SM30); > + assert(targ->getIsa() >= NVISA_SM30); > > emitForm_A(i, HEX64(00000000, 00000003)); > > @@ -1852,7 +1852,7 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i) > case FILE_MEMORY_LOCAL: opc = 0xc8000000; break; > case FILE_MEMORY_SHARED: > if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { > - if (targ->getChipset() >= NVISA_SM30) > + if (targ->getIsa() >= NVISA_SM30) > opc = 0xb8000000; > else > opc = 0xcc000000; > @@ -1868,7 +1868,7 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i) > code[0] = 0x00000005; > code[1] = opc; > > - if (targ->getChipset() >= NVISA_SM30) { > + if (targ->getIsa() >= NVISA_SM30) { > // Unlocked store on shared memory can fail. > if (i->src(0).getFile() == FILE_MEMORY_SHARED && > i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { > @@ -1901,7 +1901,7 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i) > case FILE_MEMORY_LOCAL: opc = 0xc0000000; break; > case FILE_MEMORY_SHARED: > if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { > - if (targ->getChipset() >= NVISA_SM30) > + if (targ->getIsa() >= NVISA_SM30) > opc = 0xa8000000; > else > opc = 0xc4000000; > @@ -1944,7 +1944,7 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i) > code[0] |= 63 << 14; > > if (p >= 0) { > - if (targ->getChipset() >= NVISA_SM30) > + if (targ->getIsa() >= NVISA_SM30) > defId(i->def(p), 8); > else > defId(i->def(p), 32 + 18); > @@ -2362,7 +2362,7 @@ CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i) > void > CodeEmitterNVC0::emitSUAddr(const TexInstruction *i) > { > - assert(targ->getChipset() < NVISA_SM30); > + assert(targ->getIsa() < NVISA_SM30); > > if (i->tex.rIndirectSrc < 0) { > code[1] |= 0x00004000; > @@ -2375,7 +2375,7 @@ CodeEmitterNVC0::emitSUAddr(const TexInstruction *i) > void > CodeEmitterNVC0::emitSUDim(const TexInstruction *i) > { > - assert(targ->getChipset() < NVISA_SM30); > + assert(targ->getIsa() < NVISA_SM30); > > code[1] |= (i->tex.target.getDim() - 1) << 12; > if (i->tex.target.isArray() || i->tex.target.isCube() || > @@ -2390,7 +2390,7 @@ CodeEmitterNVC0::emitSUDim(const TexInstruction *i) > void > CodeEmitterNVC0::emitSULEA(const TexInstruction *i) > { > - assert(targ->getChipset() < NVISA_SM30); > + assert(targ->getIsa() < NVISA_SM30); > > code[0] = 0x5; > code[1] = 0xf0000000; > @@ -2413,7 +2413,7 @@ CodeEmitterNVC0::emitSULEA(const TexInstruction *i) > void > CodeEmitterNVC0::emitSULDB(const TexInstruction *i) > { > - assert(targ->getChipset() < NVISA_SM30); > + assert(targ->getIsa() < NVISA_SM30); > > code[0] = 0x5; > code[1] = 0xd4000000 | (i->subOp << 15); > @@ -2431,7 +2431,7 @@ CodeEmitterNVC0::emitSULDB(const TexInstruction *i) > void > CodeEmitterNVC0::emitSUSTx(const TexInstruction *i) > { > - assert(targ->getChipset() < NVISA_SM30); > + assert(targ->getIsa() < NVISA_SM30); > > code[0] = 0x5; > code[1] = 0xdc000000 | (i->subOp << 15); > @@ -2751,14 +2751,14 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) > emitMADSP(insn); > break; > case OP_SULDB: > - if (targ->getChipset() >= NVISA_SM30) > + if (targ->getIsa() >= NVISA_SM30) > emitSULDGB(insn->asTex()); > else > emitSULDB(insn->asTex()); > break; > case OP_SUSTB: > case OP_SUSTP: > - if (targ->getChipset() >= NVISA_SM30) > + if (targ->getIsa() >= NVISA_SM30) > emitSUSTGx(insn->asTex()); > else > emitSUSTx(insn->asTex()); > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp > index 36ab837..00bd39e 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp > @@ -302,7 +302,7 @@ NV50LegalizePostRA::visit(BasicBlock *bb) > if (i->isNop()) { > bb->remove(i); > } else > - if (i->op == OP_PRERET && prog->getTarget()->getChipset() < 0xa0) { > + if (i->op == OP_PRERET && prog->getTarget()->getIsa() < 0xa0) { This is a chipset check, not an ISA check. Leave it as getChipset() < 0xa0 for now. > handlePRERET(i->asFlow()); > } else { > // TODO: We will want to do this before register allocation, > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > index a887436..b589820 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > @@ -129,11 +129,11 @@ NVC0LegalizeSSA::handleTEXLOD(TexInstruction *i) > > // SM30+ stores the indirect handle as a separate arg, which comes before > // the LOD. > - if (prog->getTarget()->getChipset() >= NVISA_SM30 && > + if (prog->getTarget()->getIsa() >= NVISA_SM30 && > i->tex.rIndirectSrc >= 0) > arg++; > // SM20 stores indirect handle combined with array coordinate > - if (prog->getTarget()->getChipset() < NVISA_SM30 && > + if (prog->getTarget()->getIsa() < NVISA_SM30 && > !i->tex.target.isArray() && > i->tex.rIndirectSrc >= 0) > arg++; > @@ -162,7 +162,7 @@ NVC0LegalizeSSA::handleShift(Instruction *lo) > // SM30 and prior don't have the fancy new SHF.L/R ops. So the logic has > to > // be completely emulated. For SM35+, we can use the more directed SHF > // operations. > - if (prog->getTarget()->getChipset() < NVISA_SM35) { > + if (prog->getTarget()->getIsa() < NVISA_SM35) { > // The strategy here is to handle shifts >= 32 and less than 32 as > // separate parts. > // > @@ -311,8 +311,8 @@ NVC0LegalizePostRA::NVC0LegalizePostRA(const Program > *prog) > : rZero(NULL), > carry(NULL), > pOne(NULL), > - needTexBar(prog->getTarget()->getChipset() >= 0xe0 && > - prog->getTarget()->getChipset() < 0x110) > + needTexBar(prog->getTarget()->getIsa() >= NVISA_SM30 && > + prog->getTarget()->getIsa() < NVISA_SM50) > { > } > > @@ -629,7 +629,7 @@ NVC0LegalizePostRA::visit(Function *fn) > pOne = new_LValue(fn, FILE_PREDICATE); > carry = new_LValue(fn, FILE_FLAGS); > > - rZero->reg.data.id = (prog->getTarget()->getChipset() >= NVISA_SM35) ? > 255 : 63; > + rZero->reg.data.id = (prog->getTarget()->getIsa() >= NVISA_SM35) ? 255 : > 63; > carry->reg.data.id = 0; > pOne->reg.data.id = 7; > > @@ -800,7 +800,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) > const int dim = i->tex.target.getDim() + i->tex.target.isCube(); > const int arg = i->tex.target.getArgCount(); > const int lyr = arg - (i->tex.target.isMS() ? 2 : 1); > - const int chipset = prog->getTarget()->getChipset(); > + const int chipset = prog->getTarget()->getIsa(); chipset != isa Rename the variable as well. > > /* Only normalize in the non-explicit derivatives case. For explicit > * derivatives, this is handled in handleManualTXD. > @@ -1083,7 +1083,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) > // indirect are separate (and both precede the coordinates). Maxwell is > // handled in a separate function. > unsigned array; > - if (targ->getChipset() < NVISA_SM30) > + if (targ->getIsa() < NVISA_SM30) > array = i->tex.target.isArray() || i->tex.rIndirectSrc >= 0; > else > array = i->tex.target.isArray() + (i->tex.rIndirectSrc >= 0); > @@ -1150,7 +1150,7 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd) > int dim = txd->tex.target.getDim() + txd->tex.target.isCube(); > unsigned arg = txd->tex.target.getArgCount(); > unsigned expected_args = arg; > - const int chipset = prog->getTarget()->getChipset(); > + const int chipset = prog->getTarget()->getIsa(); > > if (chipset >= NVISA_SM30) { > if (!txd->tex.target.isArray() && txd->tex.useOffsets) > @@ -1205,7 +1205,7 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd) > bool > NVC0LoweringPass::handleTXQ(TexInstruction *txq) > { > - const int chipset = prog->getTarget()->getChipset(); > + const int chipset = prog->getTarget()->getIsa(); > if (chipset >= NVISA_SM30 && txq->tex.rIndirectSrc < 0) > txq->tex.r += prog->driver->io.texBindBase / 4; > > @@ -1501,9 +1501,9 @@ NVC0LoweringPass::handleATOM(Instruction *atom) > case FILE_MEMORY_SHARED: > // For Fermi/Kepler, we have to use ld lock/st unlock to perform atomic > // operations on shared memory. For Maxwell, ATOMS is enough. > - if (targ->getChipset() < NVISA_SM30) > + if (targ->getIsa() < NVISA_SM30) > handleSharedATOM(atom); > - else if (targ->getChipset() < NVISA_SM50) > + else if (targ->getIsa() < NVISA_SM50) > handleSharedATOMNVE4(atom); > return true; > default: > @@ -1552,7 +1552,7 @@ NVC0LoweringPass::handleATOM(Instruction *atom) > bool > NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) > { > - if (targ->getChipset() < NVISA_SM50) { > + if (targ->getIsa() < NVISA_SM50) { > if (cas->src(0).getFile() == FILE_MEMORY_SHARED) { > // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM(). > return false; > @@ -2391,7 +2391,7 @@ NVC0LoweringPass::handleLDST(Instruction *i) > assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP > } > } else if (i->src(0).getFile() == FILE_MEMORY_CONST) { > - if (targ->getChipset() >= NVISA_SM30 && > + if (targ->getIsa() >= NVISA_SM30 && > prog->getType() == Program::TYPE_COMPUTE) { > // The launch descriptor only allows to set up 8 CBs, but OpenGL > // requires at least 12 UBOs. To bypass this limitation, we store > the > @@ -2568,7 +2568,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i) > case SV_NTID: > case SV_NCTAID: > case SV_GRIDID: > - assert(targ->getChipset() >= NVISA_SM30); // mov $sreg otherwise > + assert(targ->getIsa() >= NVISA_SM30); // mov $sreg otherwise > if (sym->reg.data.sv.index == 3) { > i->op = OP_MOV; > i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1)); > @@ -2845,9 +2845,9 @@ NVC0LoweringPass::visit(Instruction *i) > case OP_SUSTP: > case OP_SUREDB: > case OP_SUREDP: > - if (targ->getChipset() >= NVISA_SM50) > + if (targ->getIsa() >= NVISA_SM50) > handleSurfaceOpGM107(i->asTex()); > - else if (targ->getChipset() >= NVISA_SM30) > + else if (targ->getIsa() >= NVISA_SM30) > handleSurfaceOpNVE4(i->asTex()); > else > handleSurfaceOpNVC0(i->asTex()); > @@ -2869,13 +2869,13 @@ NVC0LoweringPass::visit(Instruction *i) > * interpolation ops in frag shaders. > */ > bool doAfetch = false; > - if (targ->getChipset() >= NVISA_SM30 && > + if (targ->getIsa() >= NVISA_SM30 && > !i->perPatch && > (i->op == OP_VFETCH || i->op == OP_EXPORT) && > i->src(0).isIndirect(0)) { > doAfetch = true; > } > - if (targ->getChipset() >= NVISA_SM50 && > + if (targ->getIsa() >= NVISA_SM50 && > (i->op == OP_LINTERP || i->op == OP_PINTERP) && > i->src(0).isIndirect(0)) { > doAfetch = true; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 4c92a1e..fa5ddc6 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -3309,7 +3309,7 @@ PostRaLoadPropagation::visit(Instruction *i) > switch (i->op) { > case OP_FMA: > case OP_MAD: > - if (prog->getTarget()->getChipset() < 0xc0) > + if (prog->getTarget()->getIsa() < NVISA_SM20) > handleMADforNV50(i); > else > handleMADforNVC0(i); > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > index 193628c..2f9a952 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp > @@ -136,7 +136,7 @@ RegisterSet::init(const Target *targ) > } > > RegisterSet::RegisterSet(const Target *targ) > - : restrictedGPR16Range(targ->getChipset() < 0xc0) > + : restrictedGPR16Range(targ->getIsa() < NVISA_SM20) > { > init(targ); > for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i) > @@ -955,7 +955,7 @@ GCRA::coalesce(ArrayList& insns) > bool ret = doCoalesce(insns, JOIN_MASK_PHI); > if (!ret) > return false; > - switch (func->getProgram()->getTarget()->getChipset() & ~0xf) { > + switch (func->getProgram()->getTarget()->getIsa() & ~0xf) { > case 0x50: > case 0x80: > case 0x90: > @@ -1469,7 +1469,7 @@ GCRA::allocateRegisters(ArrayList& insns) > RIG.insert(&nodes[i]); > > if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL && > - prog->getTarget()->getChipset() < 0xc0) { > + prog->getTarget()->getIsa() < NVISA_SM20) { > Instruction *insn = lval->getInsn(); > if (insn->op == OP_MAD || insn->op == OP_FMA || insn->op == > OP_SAD) > // Short encoding only possible if they're all GPRs, no need > to > @@ -2242,7 +2242,7 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) > next = i->next; > > if ((tex = i->asTex())) { > - switch (targ->getChipset() & ~0xf) { > + switch (targ->getIsa() & ~0xf) { This switch might have made sense at the dawn of time. But now, it's just if (targ->getIsa() < SM20) { do one thing } else { do another thing; } Maybe take this opportunity to do that. > case 0x50: > case 0x80: > case 0x90: > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > index 298e7c6..fbbbdde 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp > @@ -404,7 +404,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info) > emitSymbolTable(info); > > // the nvc0 driver will print the binary iself together with the header > - if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0) > + if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getIsa() < > NVISA_SM20) > emit->printBinary(); > > delete emit; > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h > index e9d1057..c31ce07 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h > @@ -165,7 +165,7 @@ public: > > // 0x50 and 0x84 to 0xaf for nv50 > // 0xc0 to 0xdf for nvc0 > - inline uint32_t getChipset() const { return chipset; } > + inline uint32_t getIsa() const { return chipset; } Leave chipset alone. Add isa. Add logic to make sure both are set properly. Also, I have a mild preference for getISA() rather than getIsa. > > virtual CodeEmitter *getCodeEmitter(Program::Type) = 0; > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > index 62c3740..5cef879 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp > @@ -279,7 +279,7 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const > Symbol *sym) const > const SVSemantic sv = sym->reg.data.sv.sv; > > const bool isInput = shaderFile == FILE_SHADER_INPUT; > - const bool kepler = getChipset() >= NVISA_SM30; > + const bool kepler = getIsa() >= NVISA_SM30; > > switch (sv) { > case SV_POSITION: return 0x070 + idx * 4; > @@ -407,10 +407,10 @@ TargetNVC0::isAccessSupported(DataFile file, DataType > ty) const > if (ty == TYPE_NONE) > return false; > if (file == FILE_MEMORY_CONST) { > - if (getChipset() >= NVISA_SM50) > + if (getIsa() >= NVISA_SM50) > return typeSizeof(ty) <= 4; > else > - if (getChipset() >= NVISA_SM30) // wrong encoding ? > + if (getIsa() >= NVISA_SM30) // wrong encoding ? > return typeSizeof(ty) <= 8; > } > if (ty == TYPE_B96) > @@ -625,7 +625,7 @@ bool TargetNVC0::canDualIssue(const Instruction *a, const > Instruction *b) const > const OpClass clA = operationClass[a->op]; > const OpClass clB = operationClass[b->op]; > > - if (getChipset() >= 0xe4) { > + if (getIsa() >= 0xe4) { > // not texturing > // not if the 2nd instruction isn't necessarily executed > if (clA == OPCLASS_TEXTURE || clA == OPCLASS_FLOW) > -- > 2.9.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev