For slightly more instruction cost, but simpler logic, you could also retrieve the number of samples from the TIC. There's a query for that. Not necessarily a good idea though, just a thought.
On Fri, Jul 20, 2018 at 10:56 AM, Rhys Perry <pendingchao...@gmail.com> wrote: > NVC0_CB_AUX_BINDLESS_INFO isn't written to on Maxwell+ and it's too small > anyway. > > This adds a new array, NVC0_CB_AUX_SU_MS_INFO, which aliases it and > provides the needed information. > > Signed-off-by: Rhys Perry <pendingchao...@gmail.com> > --- > .../drivers/nouveau/codegen/nv50_ir_driver.h | 1 + > .../nouveau/codegen/nv50_ir_lowering_gm107.cpp | 4 +- > .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 49 > +++++++++++++++++++++- > .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 32 +------------- > src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 5 ++- > src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 + > src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 28 +++++++++++++ > src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 26 +++++++++++- > 8 files changed, 110 insertions(+), 37 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > index 7c835ceab8..0045ef729d 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h > @@ -188,6 +188,7 @@ struct nv50_ir_prog_info > uint8_t msInfoCBSlot; /* cX[] used for multisample info */ > uint16_t msInfoBase; /* base address for multisample info */ > uint16_t uboInfoBase; /* base address for compute UBOs (gk104+) */ > + uint16_t suMsInfoBase; /* base address for surface multisample > info (gm107+) */ > } io; > > /* driver callback to assign input/output locations */ > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp > index c7436e2e29..d8eeaf4d09 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp > @@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq) > > if (mask & 0x1) > bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0), > - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), > suq->tex.bindless)); > + loadSuMsInfo32(handle, 0)); > if (mask & 0x2) { > int d = util_bitcount(mask & 0x1); > bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d), > - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), > suq->tex.bindless)); > + loadSuMsInfo32(handle, 1)); > } > } > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > index 1410cf26c8..e782b5bef9 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > @@ -1712,6 +1712,35 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t > off) > mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), > ptr); > } > > +/* On nvc0, surface info is obtained via the surface binding points passed > + * to the SULD/SUST instructions. > + * On nve4, surface info is stored in c[] and is used by various special > + * instructions, e.g. for clamping coordinates or generating an address. > + * They couldn't just have added an equivalent to TIC now, couldn't they ? > + */ > +#define NVC0_SU_INFO_ADDR 0x00 > +#define NVC0_SU_INFO_FMT 0x04 > +#define NVC0_SU_INFO_DIM_X 0x08 > +#define NVC0_SU_INFO_PITCH 0x0c > +#define NVC0_SU_INFO_DIM_Y 0x10 > +#define NVC0_SU_INFO_ARRAY 0x14 > +#define NVC0_SU_INFO_DIM_Z 0x18 > +#define NVC0_SU_INFO_UNK1C 0x1c > +#define NVC0_SU_INFO_WIDTH 0x20 > +#define NVC0_SU_INFO_HEIGHT 0x24 > +#define NVC0_SU_INFO_DEPTH 0x28 > +#define NVC0_SU_INFO_TARGET 0x2c > +#define NVC0_SU_INFO_BSIZE 0x30 > +#define NVC0_SU_INFO_RAW_X 0x34 > +#define NVC0_SU_INFO_MS_X 0x38 > +#define NVC0_SU_INFO_MS_Y 0x3c > + > +#define NVC0_SU_INFO__STRIDE 0x40 > + > +#define NVC0_SU_INFO_DIM(i) (0x08 + (i) * 8) > +#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4) > +#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4) > + > inline Value * > NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool > bindless) > { > @@ -1732,6 +1761,15 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, > uint32_t off, bool bindless > prog->driver->io.suInfoBase); > } > > +inline Value * > +NVC0LoweringPass::loadSuMsInfo32(Value *handle, uint32_t index) > +{ > + Value *ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), handle, > bld.mkImm(2047)); > + ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(3)); > + > + return loadResInfo32(ptr, index * 4, prog->driver->io.suMsInfoBase); > +} > + > static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) > { > switch (su->tex.target.getEnum()) { > @@ -1817,8 +1855,15 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction > *tex) > Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); > Value *ind = tex->getIndirectR(); > > - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), > tex->tex.bindless); > - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), > tex->tex.bindless); > + Value *ms_x, *ms_y; > + if (targ->getChipset() >= NVISA_GM107_CHIPSET) { > + Value *handle = tex->tex.bindless ? ind : loadTexHandle(ind, slot + > 32); > + ms_x = loadSuMsInfo32(handle, 0); > + ms_y = loadSuMsInfo32(handle, 1); > + } else { > + ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless); > + ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless); > + } > > bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); > bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > index 8724c09afd..5b3918d906 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > @@ -23,35 +23,6 @@ > #include "codegen/nv50_ir.h" > #include "codegen/nv50_ir_build_util.h" > > -/* On nvc0, surface info is obtained via the surface binding points passed > - * to the SULD/SUST instructions. > - * On nve4, surface info is stored in c[] and is used by various special > - * instructions, e.g. for clamping coordinates or generating an address. > - * They couldn't just have added an equivalent to TIC now, couldn't they ? > - */ > -#define NVC0_SU_INFO_ADDR 0x00 > -#define NVC0_SU_INFO_FMT 0x04 > -#define NVC0_SU_INFO_DIM_X 0x08 > -#define NVC0_SU_INFO_PITCH 0x0c > -#define NVC0_SU_INFO_DIM_Y 0x10 > -#define NVC0_SU_INFO_ARRAY 0x14 > -#define NVC0_SU_INFO_DIM_Z 0x18 > -#define NVC0_SU_INFO_UNK1C 0x1c > -#define NVC0_SU_INFO_WIDTH 0x20 > -#define NVC0_SU_INFO_HEIGHT 0x24 > -#define NVC0_SU_INFO_DEPTH 0x28 > -#define NVC0_SU_INFO_TARGET 0x2c > -#define NVC0_SU_INFO_BSIZE 0x30 > -#define NVC0_SU_INFO_RAW_X 0x34 > -#define NVC0_SU_INFO_MS_X 0x38 > -#define NVC0_SU_INFO_MS_Y 0x3c > - > -#define NVC0_SU_INFO__STRIDE 0x40 > - > -#define NVC0_SU_INFO_DIM(i) (0x08 + (i) * 8) > -#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4) > -#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4) > - > namespace nv50_ir { > > class NVC0LegalizeSSA : public Pass > @@ -148,7 +119,7 @@ protected: > void handlePIXLD(Instruction *); > > void checkPredicate(Instruction *); > - Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); > + Value *loadSuMsInfo32(Value *handle, uint32_t index); > > virtual bool visit(Instruction *); > > @@ -161,6 +132,7 @@ private: > Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base); > Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base); > Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base); > + Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); > Value *loadBufInfo64(Value *ptr, uint32_t off); > Value *loadBufLength32(Value *ptr, uint32_t off); > Value *loadUboInfo64(Value *ptr, uint32_t off); > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > index 77237a3c0a..8aecfd8f6d 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h > @@ -149,9 +149,12 @@ > /* 1 64-bits address and 1 32-bits sequence */ > #define NVC0_CB_AUX_MP_INFO 0x6a0 > #define NVC0_CB_AUX_MP_SIZE 3 * 4 > -/* 512 64-byte blocks for bindless image handles */ > +/* 512 64-byte blocks for bindless image handles (NVE4 only) */ > #define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4 > #define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4) > +/* 2048 8-byte blocks for image multisampling info (GM107+) */ > +#define NVC0_CB_AUX_SU_MS_INFO(i) 0x6b0 + (i) * 2 * 4 > +#define NVC0_CB_AUX_SU_MS_SIZE (NVC0_TIC_MAX_ENTRIES * 2 * 4) > /* 4 32-bits floats for the vertex runout, put at the end */ > #define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6) > > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c > index 57d98753f4..10e9815ccc 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c > @@ -607,6 +607,8 @@ nvc0_program_translate(struct nvc0_program *prog, > uint16_t chipset, > info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO; > info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0); > } > + if (info->target >= NVISA_GM107_CHIPSET) > + info->io.suMsInfoBase = NVC0_CB_AUX_SU_MS_INFO(0); > > if (prog->type == PIPE_SHADER_COMPUTE) { > if (info->target >= NVISA_GK104_CHIPSET) { > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > index f40600e48a..4b4359c889 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c > @@ -1258,6 +1258,17 @@ gm107_validate_surfaces(struct nvc0_context *nvc0, > BEGIN_NVC0(push, NVC0_3D(CB_POS), 2); > PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32)); > PUSH_DATA (push, tic->id); > + > + /* upload multisampling info */ > + if (view->resource->target == PIPE_TEXTURE_2D || > + view->resource->target == PIPE_TEXTURE_2D_ARRAY) { > + struct nv50_miptree *mt = nv50_miptree(view->resource); > + > + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3); > + PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id)); > + PUSH_DATA (push, mt->ms_x); > + PUSH_DATA (push, mt->ms_y); > + } > } > > static inline void > @@ -1398,6 +1409,7 @@ gm107_create_image_handle(struct pipe_context *pipe, > struct pipe_sampler_view *sview = > gm107_create_texture_view_from_image(pipe, view); > struct nv50_tic_entry *tic = nv50_tic_entry(sview); > + int s; > > if (tic == NULL) > goto fail; > @@ -1415,6 +1427,22 @@ gm107_create_image_handle(struct pipe_context *pipe, > > nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); > > + if (view->resource->target == PIPE_TEXTURE_2D || > + view->resource->target == PIPE_TEXTURE_2D_ARRAY) { > + struct nv50_miptree *mt = nv50_miptree(view->resource); > + > + for (s = 0; s < 6; s++) { > + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); > + PUSH_DATA (push, NVC0_CB_AUX_SIZE); > + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + > NVC0_CB_AUX_INFO(s)); > + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + > NVC0_CB_AUX_INFO(s)); > + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3); > + PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id)); > + PUSH_DATA (push, mt->ms_x); > + PUSH_DATA (push, mt->ms_y); > + } > + } > + > return 0x100000000ULL | tic->id; > > fail: > diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c > b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c > index 28460f8cbe..7d436d1980 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c > +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c > @@ -251,6 +251,23 @@ gm107_compute_validate_surfaces(struct nvc0_context > *nvc0, > PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); > PUSH_DATA (push, tic->id); > > + /* upload multisampling info */ > + if (view->resource->target == PIPE_TEXTURE_2D || > + view->resource->target == PIPE_TEXTURE_2D_ARRAY) { > + struct nv50_miptree *mt = nv50_miptree(view->resource); > + > + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); > + PUSH_DATAh(push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id)); > + PUSH_DATA (push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id)); > + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); > + PUSH_DATA (push, 8); > + PUSH_DATA (push, 0x1); > + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 3); > + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); > + PUSH_DATA (push, mt->ms_x); > + PUSH_DATA (push, mt->ms_y); > + } > + > BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); > PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); > } > @@ -558,6 +575,7 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, > { > const struct nvc0_screen *screen = nvc0->screen; > const struct nvc0_program *cp = nvc0->compprog; > + int cb_size; > > nve4_cp_launch_desc_init_default(desc); > > @@ -586,8 +604,12 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, > nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo, > NVC0_CB_USR_INFO(5), 1 << 16); > } > + > + cb_size = 1 << 12; > + if (nvc0->screen->compute->oclass >= GM107_COMPUTE_CLASS) > + cb_size = 1 << 15; // make room for NVC0_CB_AUX_SU_MS_INFO > nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, > - NVC0_CB_AUX_INFO(5), 1 << 11); > + NVC0_CB_AUX_INFO(5), cb_size); > } > > static void > @@ -625,7 +647,7 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, > NVC0_CB_USR_INFO(5), 1 << 16); > } > gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, > - NVC0_CB_AUX_INFO(5), 1 << 11); > + NVC0_CB_AUX_INFO(5), 1 << 15); > } > > static inline void * > -- > 2.14.4 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev