On Mon, Sep 17, 2018 at 12:19 PM, Rhys Perry <pendingchao...@gmail.com> wrote: > NVC0_CB_AUX_BINDLESS_INFO isn't written to on Maxwell+ and it's too small > anyway. > > With these changes, TXQ is used to determine the number of samples and > the coordinate adjustment information looked up in a small array in the > driver constant buffer. > > v2: rework to use TXQ and a small array instead of a larger array with an > entry for each texture > v3: get rid of the small array and calculate the adjustments in the shader > > Signed-off-by: Rhys Perry <pendingchao...@gmail.com> > Fixes: c2ae9b40527 ('nvc0: implement multisampled images on Maxwell+') > --- > .../codegen/nv50_ir_lowering_gm107.cpp | 4 +- > .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 43 ++++++++++++++++++- > .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 3 +- > 3 files changed, 45 insertions(+), 5 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp > index c7436e2e29..49a5f3b01f 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp > @@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq) > > if (mask & 0x1) > bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0), > - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), > suq->tex.bindless)); > + loadMsAdjInfo32(suq->tex.target, 0, slot, ind, > suq->tex.bindless)); > if (mask & 0x2) { > int d = util_bitcount(mask & 0x1); > bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d), > - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), > suq->tex.bindless)); > + loadMsAdjInfo32(suq->tex.target, 1, slot, ind, > suq->tex.bindless)); > } > } > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > index 176e0cf608..7ca38ca4bb 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > @@ -1732,6 +1732,45 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, > uint32_t off, bool bindless > prog->driver->io.suInfoBase); > } > > +inline Value * > +NVC0LoweringPass::loadMsAdjInfo32(TexInstruction::Target target, uint32_t > index, int slot, Value *ind, bool bindless) > +{ > + if (!bindless || targ->getChipset() < NVISA_GM107_CHIPSET) > + return loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(index), bindless); > + > + assert(bindless); > + > + Value *samples = bld.getSSA(); > + // this shouldn't be lowered because it's being inserted before the > current instruction > + TexInstruction *tex = new_TexInstruction(func, OP_TXQ); > + tex->tex.target = target; > + tex->tex.query = TXQ_TYPE; > + tex->tex.mask = 0x4; > + tex->tex.r = 0xff; > + tex->tex.s = 0x1f; > + tex->tex.rIndirectSrc = 0; > + tex->setDef(0, samples); > + tex->setSrc(0, ind); > + tex->setSrc(1, bld.loadImm(NULL, 0)); > + bld.insert(tex); > + > + // doesn't work with sample counts other than 1/2/4/8 but they aren't > supported > + switch (index) { > + case 0: { > + Value *tmp = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), samples, > bld.mkImm(2)); > + return bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(2)); > + } > + case 1: { > + Value *tmp = bld.mkCmp(OP_SET, CC_GT, TYPE_U32, bld.getSSA(), > TYPE_U32, samples, bld.mkImm(2))->getDef(0); > + return bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(1));
I'd prefer OP_NEG here (with a TYPE_S32). That will allow the modifier to get embedded into a use, perhaps. Or alternatively, make OP_SET with TYPE_U8 + FILE_PREDICATE, and then OP_CVT the predicate, which will come out as a 1, iirc. However I think I like the OP_NEG better -- predicates are apparently slow, and it's a lot more likely for this value to get used in arithmetic (hence NEG embedding possible). Otherwise this is Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu> > + } > + default: { > + assert(false); > + return NULL; > + } > + } > +} > + > static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) > { > switch (su->tex.target.getEnum()) { > @@ -1817,8 +1856,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction > *tex) > Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); > Value *ind = tex->getIndirectR(); > > - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), > tex->tex.bindless); > - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), > tex->tex.bindless); > + Value *ms_x = loadMsAdjInfo32(tex->tex.target, 0, slot, ind, > tex->tex.bindless); > + Value *ms_y = loadMsAdjInfo32(tex->tex.target, 1, slot, ind, > tex->tex.bindless); > > bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); > bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > index 5dbb3e4f00..4136b1ecfe 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > @@ -148,7 +148,7 @@ protected: > void handlePIXLD(Instruction *); > > void checkPredicate(Instruction *); > - Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); > + Value *loadMsAdjInfo32(TexInstruction::Target targ, uint32_t index, int > slot, Value *ind, bool bindless); > > virtual bool visit(Instruction *); > > @@ -161,6 +161,7 @@ private: > Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base); > Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base); > Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base); > + Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); > Value *loadBufInfo64(Value *ptr, uint32_t off); > Value *loadBufLength32(Value *ptr, uint32_t off); > Value *loadUboInfo64(Value *ptr, uint32_t off); > -- > 2.17.1 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev