On Tue, Jan 8, 2013 at 4:51 PM, Jerome Glisse <j.gli...@gmail.com> wrote: > On Tue, Jan 8, 2013 at 10:15 AM, Marek Olšák <mar...@gmail.com> wrote: >> On Mon, Jan 7, 2013 at 9:30 PM, <j.gli...@gmail.com> wrote: >>> From: Jerome Glisse <jgli...@redhat.com> >>> >>> Signed-off-by: Jerome Glisse <jgli...@redhat.com> >>> --- >>> src/gallium/drivers/r300/r300_context.c | 2 +- >>> src/gallium/drivers/r600/r600_pipe.c | 2 +- >>> src/gallium/drivers/radeonsi/radeonsi_pipe.c | 2 +- >>> src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 2 +- >>> src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 104 >>> +++++++++++++++------- >>> src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 2 +- >>> src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 6 ++ >>> src/gallium/winsys/radeon/drm/radeon_winsys.h | 21 ++++- >>> 8 files changed, 100 insertions(+), 41 deletions(-) >>> >>> diff --git a/src/gallium/drivers/r300/r300_context.c >>> b/src/gallium/drivers/r300/r300_context.c >>> index b498454..f0d738e 100644 >>> --- a/src/gallium/drivers/r300/r300_context.c >>> +++ b/src/gallium/drivers/r300/r300_context.c >>> @@ -376,7 +376,7 @@ struct pipe_context* r300_create_context(struct >>> pipe_screen* screen, >>> sizeof(struct pipe_transfer), 64, >>> UTIL_SLAB_SINGLETHREADED); >>> >>> - r300->cs = rws->cs_create(rws); >>> + r300->cs = rws->cs_create(rws, RING_GFX); >>> if (r300->cs == NULL) >>> goto fail; >>> >>> diff --git a/src/gallium/drivers/r600/r600_pipe.c >>> b/src/gallium/drivers/r600/r600_pipe.c >>> index 29ef988..7c4ec44 100644 >>> --- a/src/gallium/drivers/r600/r600_pipe.c >>> +++ b/src/gallium/drivers/r600/r600_pipe.c >>> @@ -289,7 +289,7 @@ static struct pipe_context *r600_create_context(struct >>> pipe_screen *screen, void >>> goto fail; >>> } >>> >>> - rctx->cs = rctx->ws->cs_create(rctx->ws); >>> + rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX); >>> rctx->ws->cs_set_flush_callback(rctx->cs, r600_flush_from_winsys, >>> rctx); >>> >>> rctx->uploader = u_upload_create(&rctx->context, 1024 * 1024, 256, >>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c >>> b/src/gallium/drivers/radeonsi/radeonsi_pipe.c >>> index d66e30f..cfa1ff7 100644 >>> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c >>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c >>> @@ -222,7 +222,7 @@ static struct pipe_context *r600_create_context(struct >>> pipe_screen *screen, void >>> case TAHITI: >>> si_init_state_functions(rctx); >>> LIST_INITHEAD(&rctx->active_query_list); >>> - rctx->cs = rctx->ws->cs_create(rctx->ws); >>> + rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX); >>> rctx->max_db = 8; >>> si_init_config(rctx); >>> break; >>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c >>> b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c >>> index 897e962..6daafc3 100644 >>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c >>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c >>> @@ -453,7 +453,7 @@ static void *radeon_bo_map(struct >>> radeon_winsys_cs_handle *buf, >>> } else { >>> /* Try to avoid busy-waiting in radeon_bo_wait. */ >>> if (p_atomic_read(&bo->num_active_ioctls)) >>> - radeon_drm_cs_sync_flush(cs); >>> + radeon_drm_cs_sync_flush(rcs); >>> } >>> >>> radeon_bo_wait((struct pb_buffer*)bo, >>> RADEON_USAGE_READWRITE); >>> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c >>> b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c >>> index c5e7f1e..5e2c471 100644 >>> --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c >>> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c >>> @@ -90,6 +90,10 @@ >>> #define RADEON_CS_RING_COMPUTE 1 >>> #endif >>> >>> +#ifndef RADEON_CS_RING_DMA >>> +#define RADEON_CS_RING_DMA 2 >>> +#endif >>> + >>> #ifndef RADEON_CS_END_OF_FRAME >>> #define RADEON_CS_END_OF_FRAME 0x04 >>> #endif >>> @@ -161,7 +165,7 @@ static void radeon_destroy_cs_context(struct >>> radeon_cs_context *csc) >>> DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE) >>> static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param); >>> >>> -static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys >>> *rws) >>> +static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys >>> *rws, enum ring_type ring_type) >>> { >>> struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); >>> struct radeon_drm_cs *cs; >>> @@ -189,6 +193,7 @@ static struct radeon_winsys_cs >>> *radeon_drm_cs_create(struct radeon_winsys *rws) >>> cs->csc = &cs->csc1; >>> cs->cst = &cs->csc2; >>> cs->base.buf = cs->csc->buf; >>> + cs->base.ring_type = ring_type; >>> >>> p_atomic_inc(&ws->num_cs); >>> if (cs->ws->num_cpus > 1 && debug_get_option_thread()) >>> @@ -246,24 +251,34 @@ int radeon_get_reloc(struct radeon_cs_context *csc, >>> struct radeon_bo *bo) >>> return -1; >>> } >>> >>> -static unsigned radeon_add_reloc(struct radeon_cs_context *csc, >>> +static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, >>> struct radeon_bo *bo, >>> enum radeon_bo_usage usage, >>> enum radeon_bo_domain domains, >>> enum radeon_bo_domain *added_domains) >>> { >>> + struct radeon_cs_context *csc = cs->csc; >>> struct drm_radeon_cs_reloc *reloc; >>> unsigned i; >>> unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); >>> enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0; >>> enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; >>> + bool update_hash = TRUE; >>> >>> if (csc->is_handle_added[hash]) { >>> i = csc->reloc_indices_hashlist[hash]; >>> reloc = &csc->relocs[i]; >>> if (reloc->handle == bo->handle) { >>> + /* do not update the hash table if it's dma ring, so that >>> first hash always point >>> + * to first bo relocation which will the one used by the >>> kernel. Following relocation >>> + * will be ignore by the kernel memory placement (but still >>> use by the kernel to >>> + * update the cmd stream with proper buffer offset). >>> + */ >>> + update_hash = FALSE; >>> update_reloc_domains(reloc, rd, wd, added_domains); >>> - return i; >>> + if (cs->base.ring_type != RING_DMA) { >>> + return i; >>> + } >>> } >>> >>> /* Hash collision, look for the BO in the list of relocs linearly. >>> */ >>> @@ -271,11 +286,18 @@ static unsigned radeon_add_reloc(struct >>> radeon_cs_context *csc, >>> --i; >>> reloc = &csc->relocs[i]; >>> if (reloc->handle == bo->handle) { >>> + /* do not update the hash table if it's dma ring, so that >>> first hash always point >>> + * to first bo relocation which will the one used by the >>> kernel. Following relocation >>> + * will be ignore by the kernel memory placement (but >>> still use by the kernel to >>> + * update the cmd stream with proper buffer offset). >>> + */ >>> + update_hash = FALSE; >>> update_reloc_domains(reloc, rd, wd, added_domains); >>> - >>> csc->reloc_indices_hashlist[hash] = i; >>> /*printf("write_reloc collision, hash: %i, handle: %i\n", >>> hash, bo->handle);*/ >>> - return i; >>> + if (cs->base.ring_type != RING_DMA) { >>> + return i; >>> + } >> >> All the changes in radeon_add_reloc don't make any sense. The hash >> table has nothing to do with the kernel nor does it change the >> ordering of relocations. It's only an optimization for a quick lookup >> of relocation indices which were added to the list before. The >> function behaves exactly like a linear search, except that it's way >> faster than that. It needs no changes at all. >> >> The rest looks good. >> >> Marek >> > > It needs change for dma, for dma you have to reemit the relocation as > many time as the buffer is use. We can't use the dma nop packet to > store an index in the dma cmd stream. So the change i made are > necessary. I keep the hash for dma so that i can update the first > relocation of a bo and update properly the read/write domain with the > new read write domain if those were ever to be different in a same dma > cmd stream.
In that case, you should put what you just told me in the comment in radeon_add_reloc, so that it's not so confusing. Reviewed-by: Marek Olšák <mar...@gmail.com> Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev