Oh. That's the base relative to the bo offset. Which is a u32. So it's actually OK
On Fri, Aug 3, 2018 at 6:06 PM, Rhys Perry <pendingchao...@gmail.com> wrote: > Yeah > > "base" in nve4_cp_launch_desc_set_cb() and > gp100_cp_launch_desc_set_cb() are uint32_t too. They should probably > be updated > > On Fri, Aug 3, 2018 at 11:04 PM, Ilia Mirkin <imir...@alum.mit.edu> wrote: >> On Fri, Aug 3, 2018 at 5:57 PM, Rhys Perry <pendingchao...@gmail.com> wrote: >>> Previously, UBOs were implemented using global memory loads for compute on >>> Kepler+ because it only supported 8 constant buffers on compute. This >>> required bounds checking and expensive load instructions. >>> >>> However 6 of the constant buffer bindings were left unused, this uses them >>> instead of loading from global memory in the shader for the first 6 >>> non-user constant buffers when possible. >>> >>> total instructions in shared programs : 5787979 -> 5748677 (-0.68%) >>> total gprs used in shared programs : 669901 -> 669373 (-0.08%) >>> total shared used in shared programs : 548832 -> 548832 (0.00%) >>> total local used in shared programs : 21068 -> 21064 (-0.02%) >>> >>> local shared gpr inst bytes >>> helped 1 0 152 274 274 >>> hurt 0 0 0 0 0 >>> >>> Signed-off-by: Rhys Perry <pendingchao...@gmail.com> >>> --- >>> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 18 +++++++------- >>> src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 28 >>> ++++++++++++++++++++++ >>> 2 files changed, 36 insertions(+), 10 deletions(-) >>> >>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >>> index 1410cf26c8..0fba96f261 100644 >>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >>> @@ -2437,18 +2437,16 @@ NVC0LoweringPass::handleLDST(Instruction *i) >>> assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP >>> } >>> } else if (i->src(0).getFile() == FILE_MEMORY_CONST) { >>> + int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1; >>> + Value *ind = i->getIndirect(0, 1); >>> + >>> if (targ->getChipset() >= NVISA_GK104_CHIPSET && >>> - prog->getType() == Program::TYPE_COMPUTE) { >>> + prog->getType() == Program::TYPE_COMPUTE && >>> + (fileIndex >= 6 || ind)) { >>> // The launch descriptor only allows to set up 8 CBs, but OpenGL >>> - // requires at least 12 UBOs. To bypass this limitation, we store >>> the >>> - // addrs into the driver constbuf and we directly load from the >>> global >>> - // memory. >>> - int8_t fileIndex = i->getSrc(0)->reg.fileIndex - 1; >>> - Value *ind = i->getIndirect(0, 1); >>> - >>> - if (!ind && fileIndex == -1) >>> - return; >>> - >>> + // requires at least 12 UBOs. To bypass this limitation, for >>> constant >>> + // buffers 7+, we store the addrs into the driver constbuf and we >>> + // directly load from the global memory. >>> if (ind) { >>> // Clamp the UBO index when an indirect access is used to avoid >>> // loading information from the wrong place in the driver cb. >>> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c >>> b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c >>> index 28460f8cbe..8aa8d4936f 100644 >>> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c >>> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c >>> @@ -551,6 +551,30 @@ nve4_compute_derive_cache_split(struct nvc0_context >>> *nvc0, uint32_t shared_size) >>> return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1; >>> } >>> >>> +static void >>> +nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void >>> *desc) >>> +{ >>> + // only user constant buffers 1-6 can be put in the descriptor, the >>> rest are >>> + // loaded through global memory >>> + for (int i = 1; i <= 6; i++) { >>> + if (nvc0->constbuf[5][i].user || !nvc0->constbuf[5][i].u.buf) >>> + continue; >>> + >>> + struct nv04_resource *res = >>> + nv04_resource(nvc0->constbuf[5][i].u.buf); >>> + >>> + uint32_t base = res->offset + nvc0->constbuf[5][i].offset; >> >> No compiler error on this one? Ouch. This obviously needs to be a uint64_t. >> >>> + uint32_t size = nvc0->constbuf[5][i].size; >>> + if (gp100) >>> + gp100_cp_launch_desc_set_cb(desc, i, res->bo, base, size); >>> + else >>> + nve4_cp_launch_desc_set_cb(desc, i, res->bo, base, size); >>> + } >>> + >>> + // there is no need to do FLUSH(NVE4_COMPUTE_FLUSH_CB) because >>> + // nve4_compute_upload_input() does it later >>> +} >>> + >>> static void >>> nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, >>> struct nve4_cp_launch_desc *desc, >>> @@ -588,6 +612,8 @@ nve4_compute_setup_launch_desc(struct nvc0_context >>> *nvc0, >>> } >>> nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, >>> NVC0_CB_AUX_INFO(5), 1 << 11); >>> + >>> + nve4_compute_setup_buf_cb(nvc0, false, desc); >>> } >>> >>> static void >>> @@ -626,6 +652,8 @@ gp100_compute_setup_launch_desc(struct nvc0_context >>> *nvc0, >>> } >>> gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, >>> NVC0_CB_AUX_INFO(5), 1 << 11); >>> + >>> + nve4_compute_setup_buf_cb(nvc0, true, desc); >>> } >>> >>> static inline void * >>> -- >>> 2.14.4 >>> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev