NVC0_CB_AUX_BINDLESS_INFO isn't written to on Maxwell+ and it's too small anyway.
This adds a new array, NVC0_CB_AUX_SU_MS_INFO, which aliases it and provides the needed information. Signed-off-by: Rhys Perry <pendingchao...@gmail.com> --- .../drivers/nouveau/codegen/nv50_ir_driver.h | 1 + .../nouveau/codegen/nv50_ir_lowering_gm107.cpp | 4 +- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 49 +++++++++++++++++++++- .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 32 +------------- src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 5 ++- src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 + src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 28 +++++++++++++ src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 26 +++++++++++- 8 files changed, 110 insertions(+), 37 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 7c835ceab8..0045ef729d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -188,6 +188,7 @@ struct nv50_ir_prog_info uint8_t msInfoCBSlot; /* cX[] used for multisample info */ uint16_t msInfoBase; /* base address for multisample info */ uint16_t uboInfoBase; /* base address for compute UBOs (gk104+) */ + uint16_t suMsInfoBase; /* base address for surface multisample info (gm107+) */ } io; /* driver callback to assign input/output locations */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp index c7436e2e29..d8eeaf4d09 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq) if (mask & 0x1) bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless)); + loadSuMsInfo32(handle, 0)); if (mask & 0x2) { int d = util_bitcount(mask & 0x1); bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d), - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless)); + loadSuMsInfo32(handle, 1)); } } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 1410cf26c8..e782b5bef9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1712,6 +1712,35 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); } +/* On nvc0, surface info is obtained via the surface binding points passed + * to the SULD/SUST instructions. + * On nve4, surface info is stored in c[] and is used by various special + * instructions, e.g. for clamping coordinates or generating an address. + * They couldn't just have added an equivalent to TIC now, couldn't they ? + */ +#define NVC0_SU_INFO_ADDR 0x00 +#define NVC0_SU_INFO_FMT 0x04 +#define NVC0_SU_INFO_DIM_X 0x08 +#define NVC0_SU_INFO_PITCH 0x0c +#define NVC0_SU_INFO_DIM_Y 0x10 +#define NVC0_SU_INFO_ARRAY 0x14 +#define NVC0_SU_INFO_DIM_Z 0x18 +#define NVC0_SU_INFO_UNK1C 0x1c +#define NVC0_SU_INFO_WIDTH 0x20 +#define NVC0_SU_INFO_HEIGHT 0x24 +#define NVC0_SU_INFO_DEPTH 0x28 +#define NVC0_SU_INFO_TARGET 0x2c +#define NVC0_SU_INFO_BSIZE 0x30 +#define NVC0_SU_INFO_RAW_X 0x34 +#define NVC0_SU_INFO_MS_X 0x38 +#define NVC0_SU_INFO_MS_Y 0x3c + +#define NVC0_SU_INFO__STRIDE 0x40 + +#define NVC0_SU_INFO_DIM(i) (0x08 + (i) * 8) +#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4) +#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4) + inline Value * NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless) { @@ -1732,6 +1761,15 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless prog->driver->io.suInfoBase); } +inline Value * +NVC0LoweringPass::loadSuMsInfo32(Value *handle, uint32_t index) +{ + Value *ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), handle, bld.mkImm(2047)); + ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(3)); + + return loadResInfo32(ptr, index * 4, prog->driver->io.suMsInfoBase); +} + static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) { switch (su->tex.target.getEnum()) { @@ -1817,8 +1855,15 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); Value *ind = tex->getIndirectR(); - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless); - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless); + Value *ms_x, *ms_y; + if (targ->getChipset() >= NVISA_GM107_CHIPSET) { + Value *handle = tex->tex.bindless ? ind : loadTexHandle(ind, slot + 32); + ms_x = loadSuMsInfo32(handle, 0); + ms_y = loadSuMsInfo32(handle, 1); + } else { + ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless); + ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless); + } bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 8724c09afd..5b3918d906 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -23,35 +23,6 @@ #include "codegen/nv50_ir.h" #include "codegen/nv50_ir_build_util.h" -/* On nvc0, surface info is obtained via the surface binding points passed - * to the SULD/SUST instructions. - * On nve4, surface info is stored in c[] and is used by various special - * instructions, e.g. for clamping coordinates or generating an address. - * They couldn't just have added an equivalent to TIC now, couldn't they ? - */ -#define NVC0_SU_INFO_ADDR 0x00 -#define NVC0_SU_INFO_FMT 0x04 -#define NVC0_SU_INFO_DIM_X 0x08 -#define NVC0_SU_INFO_PITCH 0x0c -#define NVC0_SU_INFO_DIM_Y 0x10 -#define NVC0_SU_INFO_ARRAY 0x14 -#define NVC0_SU_INFO_DIM_Z 0x18 -#define NVC0_SU_INFO_UNK1C 0x1c -#define NVC0_SU_INFO_WIDTH 0x20 -#define NVC0_SU_INFO_HEIGHT 0x24 -#define NVC0_SU_INFO_DEPTH 0x28 -#define NVC0_SU_INFO_TARGET 0x2c -#define NVC0_SU_INFO_BSIZE 0x30 -#define NVC0_SU_INFO_RAW_X 0x34 -#define NVC0_SU_INFO_MS_X 0x38 -#define NVC0_SU_INFO_MS_Y 0x3c - -#define NVC0_SU_INFO__STRIDE 0x40 - -#define NVC0_SU_INFO_DIM(i) (0x08 + (i) * 8) -#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4) -#define NVC0_SU_INFO_MS(i) (0x38 + (i) * 4) - namespace nv50_ir { class NVC0LegalizeSSA : public Pass @@ -148,7 +119,7 @@ protected: void handlePIXLD(Instruction *); void checkPredicate(Instruction *); - Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); + Value *loadSuMsInfo32(Value *handle, uint32_t index); virtual bool visit(Instruction *); @@ -161,6 +132,7 @@ private: Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base); Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base); Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base); + Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); Value *loadBufInfo64(Value *ptr, uint32_t off); Value *loadBufLength32(Value *ptr, uint32_t off); Value *loadUboInfo64(Value *ptr, uint32_t off); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 77237a3c0a..8aecfd8f6d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -149,9 +149,12 @@ /* 1 64-bits address and 1 32-bits sequence */ #define NVC0_CB_AUX_MP_INFO 0x6a0 #define NVC0_CB_AUX_MP_SIZE 3 * 4 -/* 512 64-byte blocks for bindless image handles */ +/* 512 64-byte blocks for bindless image handles (NVE4 only) */ #define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4 #define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4) +/* 2048 8-byte blocks for image multisampling info (GM107+) */ +#define NVC0_CB_AUX_SU_MS_INFO(i) 0x6b0 + (i) * 2 * 4 +#define NVC0_CB_AUX_SU_MS_SIZE (NVC0_TIC_MAX_ENTRIES * 2 * 4) /* 4 32-bits floats for the vertex runout, put at the end */ #define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 57d98753f4..10e9815ccc 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -607,6 +607,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO; info->io.bindlessBase = NVC0_CB_AUX_BINDLESS_INFO(0); } + if (info->target >= NVISA_GM107_CHIPSET) + info->io.suMsInfoBase = NVC0_CB_AUX_SU_MS_INFO(0); if (prog->type == PIPE_SHADER_COMPUTE) { if (info->target >= NVISA_GK104_CHIPSET) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index f40600e48a..4b4359c889 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -1258,6 +1258,17 @@ gm107_validate_surfaces(struct nvc0_context *nvc0, BEGIN_NVC0(push, NVC0_3D(CB_POS), 2); PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32)); PUSH_DATA (push, tic->id); + + /* upload multisampling info */ + if (view->resource->target == PIPE_TEXTURE_2D || + view->resource->target == PIPE_TEXTURE_2D_ARRAY) { + struct nv50_miptree *mt = nv50_miptree(view->resource); + + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3); + PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id)); + PUSH_DATA (push, mt->ms_x); + PUSH_DATA (push, mt->ms_y); + } } static inline void @@ -1398,6 +1409,7 @@ gm107_create_image_handle(struct pipe_context *pipe, struct pipe_sampler_view *sview = gm107_create_texture_view_from_image(pipe, view); struct nv50_tic_entry *tic = nv50_tic_entry(sview); + int s; if (tic == NULL) goto fail; @@ -1415,6 +1427,22 @@ gm107_create_image_handle(struct pipe_context *pipe, nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + if (view->resource->target == PIPE_TEXTURE_2D || + view->resource->target == PIPE_TEXTURE_2D_ARRAY) { + struct nv50_miptree *mt = nv50_miptree(view->resource); + + for (s = 0; s < 6; s++) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, NVC0_CB_AUX_SIZE); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 3); + PUSH_DATA (push, NVC0_CB_AUX_SU_MS_INFO(tic->id)); + PUSH_DATA (push, mt->ms_x); + PUSH_DATA (push, mt->ms_y); + } + } + return 0x100000000ULL | tic->id; fail: diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index 28460f8cbe..7d436d1980 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -251,6 +251,23 @@ gm107_compute_validate_surfaces(struct nvc0_context *nvc0, PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATA (push, tic->id); + /* upload multisampling info */ + if (view->resource->target == PIPE_TEXTURE_2D || + view->resource->target == PIPE_TEXTURE_2D_ARRAY) { + struct nv50_miptree *mt = nv50_miptree(view->resource); + + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id)); + PUSH_DATA (push, address + NVC0_CB_AUX_SU_MS_INFO(tic->id)); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 8); + PUSH_DATA (push, 0x1); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 3); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + PUSH_DATA (push, mt->ms_x); + PUSH_DATA (push, mt->ms_y); + } + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); } @@ -558,6 +575,7 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, { const struct nvc0_screen *screen = nvc0->screen; const struct nvc0_program *cp = nvc0->compprog; + int cb_size; nve4_cp_launch_desc_init_default(desc); @@ -586,8 +604,12 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo, NVC0_CB_USR_INFO(5), 1 << 16); } + + cb_size = 1 << 12; + if (nvc0->screen->compute->oclass >= GM107_COMPUTE_CLASS) + cb_size = 1 << 15; // make room for NVC0_CB_AUX_SU_MS_INFO nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, - NVC0_CB_AUX_INFO(5), 1 << 11); + NVC0_CB_AUX_INFO(5), cb_size); } static void @@ -625,7 +647,7 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, NVC0_CB_USR_INFO(5), 1 << 16); } gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, - NVC0_CB_AUX_INFO(5), 1 << 11); + NVC0_CB_AUX_INFO(5), 1 << 15); } static inline void * -- 2.14.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev