This way we can let brw_bo_map() choose the best mapping type. Part of the patch inlines map_gtt() into brw_bo_map_gtt() (and removes map_gtt()). brw_bo_map_gtt() just wrapped map_gtt() with locking and a call to set_domain(). map_gtt() is called by brw_bo_map_unsynchronized() to avoid the call to set_domain(). With the MAP_ASYNC flag, we now have the same behavior previously provided by brw_bo_map_unsynchronized(). --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 91 ++++-------------------- src/mesa/drivers/dri/i965/brw_program_cache.c | 4 +- src/mesa/drivers/dri/i965/intel_buffer_objects.c | 27 +++---- 3 files changed, 25 insertions(+), 97 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index ec9611f..aec07e1 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -689,8 +689,9 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags) DBG("brw_bo_map_cpu: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_cpu); - set_domain(brw, "CPU mapping", bo, I915_GEM_DOMAIN_CPU, - flags & MAP_WRITE ? I915_GEM_DOMAIN_CPU : 0); + if (!(flags & MAP_ASYNC)) + set_domain(brw, "CPU mapping", bo, I915_GEM_DOMAIN_CPU, + flags & MAP_WRITE ? I915_GEM_DOMAIN_CPU : 0); bo_mark_mmaps_incoherent(bo); VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_cpu, bo->size)); @@ -700,15 +701,17 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags) } static void * -map_gtt(struct brw_bo *bo) +brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags) { struct brw_bufmgr *bufmgr = bo->bufmgr; + pthread_mutex_lock(&bufmgr->lock); + /* Get a mapping of the buffer if we haven't before. */ if (bo->map_gtt == NULL) { struct drm_i915_gem_mmap_gtt mmap_arg; - DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", + DBG("brw_bo_map_gtt: mmap %d (%s), map_count=%d\n", bo->gem_handle, bo->name, bo->map_count); memclear(mmap_arg); @@ -719,6 +722,7 @@ map_gtt(struct brw_bo *bo) if (ret != 0) { DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); + pthread_mutex_unlock(&bufmgr->lock); return NULL; } @@ -729,89 +733,24 @@ map_gtt(struct brw_bo *bo) bo->map_gtt = NULL; DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); + pthread_mutex_unlock(&bufmgr->lock); return NULL; } + bo->map_count++; } - DBG("bo_map_gtt: %d (%s) -> %p\n", bo->gem_handle, bo->name, + DBG("brw_bo_map_gtt: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_gtt); - bo->map_count++; - return bo->map_gtt; -} - -static void * -brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - pthread_mutex_lock(&bufmgr->lock); - - void *map = map_gtt(bo); - if (map == NULL) { - pthread_mutex_unlock(&bufmgr->lock); - return NULL; - } - - /* Now move it to the GTT domain so that the GPU and CPU - * caches are flushed and the GPU isn't actively using the - * buffer. - * - * The pagefault handler does this domain change for us when - * it has unbound the BO from the GTT, but it's up to us to - * tell it when we're about to use things if we had done - * rendering and it still happens to be bound to the GTT. - */ - set_domain(brw, "GTT mapping", bo, - I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); + if (!(flags & MAP_ASYNC)) + set_domain(brw, "GTT mapping", bo, + I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); bo_mark_mmaps_incoherent(bo); VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_gtt, bo->size)); pthread_mutex_unlock(&bufmgr->lock); - return map; -} - -/** - * Performs a mapping of the buffer object like the normal GTT - * mapping, but avoids waiting for the GPU to be done reading from or - * rendering to the buffer. - * - * This is used in the implementation of GL_ARB_map_buffer_range: The - * user asks to create a buffer, then does a mapping, fills some - * space, runs a drawing command, then asks to map it again without - * synchronizing because it guarantees that it won't write over the - * data that the GPU is busy using (or, more specifically, that if it - * does write over the data, it acknowledges that rendering is - * undefined). - */ - -void * -brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo) -{ - struct brw_bufmgr *bufmgr = bo->bufmgr; - - /* If the CPU cache isn't coherent with the GTT, then use a - * regular synchronized mapping. The problem is that we don't - * track where the buffer was last used on the CPU side in - * terms of brw_bo_map_cpu vs brw_bo_map_gtt, so - * we would potentially corrupt the buffer even when the user - * does reasonable things. - */ - if (!bufmgr->has_llc) - return brw_bo_map_gtt(brw, bo, MAP_READ | MAP_WRITE); - - pthread_mutex_lock(&bufmgr->lock); - - void *map = map_gtt(bo); - if (map != NULL) { - bo_mark_mmaps_incoherent(bo); - VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_gtt, bo->size)); - } - - pthread_mutex_unlock(&bufmgr->lock); - - return map; + return bo->map_gtt; } static bool diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c index ab03969..2fd989a 100644 --- a/src/mesa/drivers/dri/i965/brw_program_cache.c +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c @@ -220,7 +220,7 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) if (can_do_exec_capture(brw->screen)) new_bo->kflags = EXEC_OBJECT_CAPTURE; if (brw->has_llc) - llc_map = brw_bo_map_unsynchronized(brw, new_bo); + llc_map = brw_bo_map(brw, new_bo, MAP_READ | MAP_ASYNC); /* Copy any existing data that needs to be saved. */ if (cache->next_offset != 0) { @@ -417,7 +417,7 @@ brw_init_caches(struct brw_context *brw) if (can_do_exec_capture(brw->screen)) cache->bo->kflags = EXEC_OBJECT_CAPTURE; if (brw->has_llc) - cache->map = brw_bo_map_unsynchronized(brw, cache->bo); + cache->map = brw_bo_map(brw, cache->bo, MAP_READ | MAP_WRITE | MAP_ASYNC); } static void diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index 5813989..a9ac29a 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -216,17 +216,13 @@ brw_buffer_subdata(struct gl_context *ctx, */ if (offset + size <= intel_obj->gpu_active_start || intel_obj->gpu_active_end <= offset) { - if (brw->has_llc) { - void *map = brw_bo_map_unsynchronized(brw, intel_obj->buffer); - memcpy(map + offset, data, size); - brw_bo_unmap(intel_obj->buffer); + void *map = brw_bo_map(brw, intel_obj->buffer, MAP_WRITE | MAP_ASYNC); + memcpy(map + offset, data, size); + brw_bo_unmap(intel_obj->buffer); - if (intel_obj->gpu_active_end > intel_obj->gpu_active_start) - intel_obj->prefer_stall_to_blit = true; - return; - } else { - perf_debug("BufferSubData could be unsynchronized, but !LLC doesn't support it yet\n"); - } + if (intel_obj->gpu_active_end > intel_obj->gpu_active_start) + intel_obj->prefer_stall_to_blit = true; + return; } busy = @@ -400,15 +396,8 @@ brw_map_buffer_range(struct gl_context *ctx, return obj->Mappings[index].Pointer; } - void *map; - if (access & GL_MAP_UNSYNCHRONIZED_BIT) { - if (!brw->has_llc && brw->perf_debug && - brw_bo_busy(intel_obj->buffer)) { - perf_debug("MapBufferRange with GL_MAP_UNSYNCHRONIZED_BIT stalling (it's actually synchronized on non-LLC platforms)\n"); - } - map = brw_bo_map_unsynchronized(brw, intel_obj->buffer); - } else { - map = brw_bo_map(brw, intel_obj->buffer, access); + void *map = brw_bo_map(brw, intel_obj->buffer, access); + if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) { mark_buffer_inactive(intel_obj); } -- 2.10.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev