From: Jerome Glisse <jgli...@redhat.com> Signed-off-by: Jerome Glisse <jgli...@redhat.com> --- src/gallium/drivers/r300/r300_context.c | 2 +- src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/radeonsi/radeonsi_pipe.c | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 104 +++++++++++++++------- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 6 ++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 21 ++++- 8 files changed, 100 insertions(+), 41 deletions(-)
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index b498454..f0d738e 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -376,7 +376,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, sizeof(struct pipe_transfer), 64, UTIL_SLAB_SINGLETHREADED); - r300->cs = rws->cs_create(rws); + r300->cs = rws->cs_create(rws, RING_GFX); if (r300->cs == NULL) goto fail; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 29ef988..7c4ec44 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -289,7 +289,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void goto fail; } - rctx->cs = rctx->ws->cs_create(rctx->ws); + rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX); rctx->ws->cs_set_flush_callback(rctx->cs, r600_flush_from_winsys, rctx); rctx->uploader = u_upload_create(&rctx->context, 1024 * 1024, 256, diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c index d66e30f..cfa1ff7 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c @@ -222,7 +222,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case TAHITI: si_init_state_functions(rctx); LIST_INITHEAD(&rctx->active_query_list); - rctx->cs = rctx->ws->cs_create(rctx->ws); + rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX); rctx->max_db = 8; si_init_config(rctx); break; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 897e962..6daafc3 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -453,7 +453,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf, } else { /* Try to avoid busy-waiting in radeon_bo_wait. */ if (p_atomic_read(&bo->num_active_ioctls)) - radeon_drm_cs_sync_flush(cs); + radeon_drm_cs_sync_flush(rcs); } radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index c5e7f1e..5e2c471 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -90,6 +90,10 @@ #define RADEON_CS_RING_COMPUTE 1 #endif +#ifndef RADEON_CS_RING_DMA +#define RADEON_CS_RING_DMA 2 +#endif + #ifndef RADEON_CS_END_OF_FRAME #define RADEON_CS_END_OF_FRAME 0x04 #endif @@ -161,7 +165,7 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc) DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE) static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param); -static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws) +static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws, enum ring_type ring_type) { struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); struct radeon_drm_cs *cs; @@ -189,6 +193,7 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws) cs->csc = &cs->csc1; cs->cst = &cs->csc2; cs->base.buf = cs->csc->buf; + cs->base.ring_type = ring_type; p_atomic_inc(&ws->num_cs); if (cs->ws->num_cpus > 1 && debug_get_option_thread()) @@ -246,24 +251,34 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) return -1; } -static unsigned radeon_add_reloc(struct radeon_cs_context *csc, +static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, struct radeon_bo *bo, enum radeon_bo_usage usage, enum radeon_bo_domain domains, enum radeon_bo_domain *added_domains) { + struct radeon_cs_context *csc = cs->csc; struct drm_radeon_cs_reloc *reloc; unsigned i; unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0; enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; + bool update_hash = TRUE; if (csc->is_handle_added[hash]) { i = csc->reloc_indices_hashlist[hash]; reloc = &csc->relocs[i]; if (reloc->handle == bo->handle) { + /* do not update the hash table if it's dma ring, so that first hash always point + * to first bo relocation which will the one used by the kernel. Following relocation + * will be ignore by the kernel memory placement (but still use by the kernel to + * update the cmd stream with proper buffer offset). + */ + update_hash = FALSE; update_reloc_domains(reloc, rd, wd, added_domains); - return i; + if (cs->base.ring_type != RING_DMA) { + return i; + } } /* Hash collision, look for the BO in the list of relocs linearly. */ @@ -271,11 +286,18 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc, --i; reloc = &csc->relocs[i]; if (reloc->handle == bo->handle) { + /* do not update the hash table if it's dma ring, so that first hash always point + * to first bo relocation which will the one used by the kernel. Following relocation + * will be ignore by the kernel memory placement (but still use by the kernel to + * update the cmd stream with proper buffer offset). + */ + update_hash = FALSE; update_reloc_domains(reloc, rd, wd, added_domains); - csc->reloc_indices_hashlist[hash] = i; /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ - return i; + if (cs->base.ring_type != RING_DMA) { + return i; + } } } } @@ -305,7 +327,9 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc, reloc->flags = 0; csc->is_handle_added[hash] = TRUE; - csc->reloc_indices_hashlist[hash] = csc->crelocs; + if (update_hash) { + csc->reloc_indices_hashlist[hash] = csc->crelocs; + } csc->chunks[1].length_dw += RELOC_DWORDS; @@ -321,8 +345,7 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; enum radeon_bo_domain added_domains; - - unsigned index = radeon_add_reloc(cs->csc, bo, usage, domains, &added_domains); + unsigned index = radeon_add_reloc(cs, bo, usage, domains, &added_domains); if (added_domains & RADEON_DOMAIN_GTT) cs->csc->used_gart += bo->base.size; @@ -373,7 +396,6 @@ static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; - unsigned index = radeon_get_reloc(cs->csc, bo); if (index == -1) { @@ -425,8 +447,10 @@ static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) return NULL; } -void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs) +void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs) { + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + /* Wait for any pending ioctl to complete. */ if (cs->thread && cs->flush_started) { pipe_semaphore_wait(&cs->flush_completed); @@ -445,7 +469,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) fprintf(stderr, "radeon: command stream overflowed\n"); } - radeon_drm_cs_sync_flush(cs); + radeon_drm_cs_sync_flush(rcs); /* Flip command streams. */ tmp = cs->csc; @@ -453,8 +477,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) cs->cst = tmp; /* If the CS is not empty or overflowed, emit it in a separate thread. */ - if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && - !debug_get_option_noop()) { + if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { unsigned i, crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; @@ -464,28 +487,40 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } - cs->cst->flags[0] = 0; - cs->cst->flags[1] = RADEON_CS_RING_GFX; - cs->cst->cs.num_chunks = 2; - if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { - cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; - cs->cst->cs.num_chunks = 3; - } - if (cs->ws->info.r600_virtual_address) { - cs->cst->flags[0] |= RADEON_CS_USE_VM; - cs->cst->cs.num_chunks = 3; - } - if (flags & RADEON_FLUSH_END_OF_FRAME) { - cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; - cs->cst->cs.num_chunks = 3; - } - if (flags & RADEON_FLUSH_COMPUTE) { - cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; + switch (cs->base.ring_type) { + case RING_DMA: + cs->cst->flags[0] = 0; + cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; + if (cs->ws->info.r600_virtual_address) { + cs->cst->flags[0] |= RADEON_CS_USE_VM; + } + break; + default: + case RING_GFX: + cs->cst->flags[0] = 0; + cs->cst->flags[1] = RADEON_CS_RING_GFX; + cs->cst->cs.num_chunks = 2; + if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { + cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; + cs->cst->cs.num_chunks = 3; + } + if (cs->ws->info.r600_virtual_address) { + cs->cst->flags[0] |= RADEON_CS_USE_VM; + cs->cst->cs.num_chunks = 3; + } + if (flags & RADEON_FLUSH_END_OF_FRAME) { + cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; + cs->cst->cs.num_chunks = 3; + } + if (flags & RADEON_FLUSH_COMPUTE) { + cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; + cs->cst->cs.num_chunks = 3; + } + break; } - if (cs->thread && - (flags & RADEON_FLUSH_ASYNC)) { + if (cs->thread && (flags & RADEON_FLUSH_ASYNC)) { cs->flush_started = 1; pipe_semaphore_signal(&cs->flush_queued); } else { @@ -503,7 +538,8 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - radeon_drm_cs_sync_flush(cs); + + radeon_drm_cs_sync_flush(rcs); if (cs->thread) { cs->kill_thread = 1; pipe_semaphore_signal(&cs->flush_queued); @@ -525,6 +561,7 @@ static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs, void *user) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + cs->flush_cs = flush; cs->flush_data = user; } @@ -562,4 +599,5 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_flush = radeon_drm_cs_flush; ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; + ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 286eb6a..99d5fbb 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -118,7 +118,7 @@ radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) return bo->num_cs_references != 0; } -void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs); +void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs); void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws); #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index bcfb448..685af29 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -319,6 +319,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) break; } + /* Check for dma */ + ws->info.r600_has_dma = FALSE; + if (ws->info.chip_class >= R700 && ws->info.drm_minor >= 27) { + ws->info.r600_has_dma = TRUE; + } + /* Get GEM info. */ retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index d0c4822..1d159dc 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -138,12 +138,19 @@ enum chip_class { TAHITI, }; +enum ring_type { + RING_GFX = 0, + RING_DMA, + RING_LAST, +}; + struct winsys_handle; struct radeon_winsys_cs_handle; struct radeon_winsys_cs { - unsigned cdw; /* Number of used dwords. */ - uint32_t *buf; /* The command buffer. */ + unsigned cdw; /* Number of used dwords. */ + uint32_t *buf; /* The command buffer. */ + enum ring_type ring_type; }; struct radeon_info { @@ -170,6 +177,7 @@ struct radeon_info { uint32_t r600_max_pipes; boolean r600_backend_map_valid; boolean r600_virtual_address; + boolean r600_has_dma; }; enum radeon_feature_id { @@ -350,7 +358,7 @@ struct radeon_winsys { * * \param ws The winsys this function is called from. */ - struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws); + struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws, enum ring_type ring_type); /** * Destroy a command stream. @@ -435,6 +443,13 @@ struct radeon_winsys { boolean enable); /** + * Make sure all asynchronous flush of the cs have completed + * + * \param cs A command stream. + */ + void (*cs_sync_flush)(struct radeon_winsys_cs *cs); + + /** * Initialize surface * * \param ws The winsys this function is called from. -- 1.7.11.7 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev