Improves citybench performance by 39.8638% +/- 2.26632% (n=10) (at default resolution and about the same for 320x240). --- src/mesa/drivers/dri/intel/intel_batchbuffer.c | 5 ----- src/mesa/drivers/dri/intel/intel_context.c | 21 +++++++++++++++++---- 2 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index d10e008..5631c19 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -211,11 +211,6 @@ _intel_batchbuffer_flush(struct intel_context *intel, if (intel->batch.used == 0) return 0; - if (intel->first_post_swapbuffers_batch == NULL) { - intel->first_post_swapbuffers_batch = intel->batch.bo; - drm_intel_bo_reference(intel->first_post_swapbuffers_batch); - } - if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, 4*intel->batch.used); diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 377bcbc..3d3feb6 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -419,18 +419,31 @@ intel_prepare_render(struct intel_context *intel) * We're using intelDRI2Flush (called from the loader before * swapbuffer) and glFlush (for front buffer rendering) as the * indicator that a frame is done and then throttle when we get - * here as we prepare to render the next frame. At this point for + * here as we prepare to render the next frame. At this point our * round trips for swap/copy and getting new buffers are done and - * we'll spend less time waiting on the GPU. + * we'll spend less time waiting on the GPU when we throttle here. * * Unfortunately, we don't have a handle to the batch containing * the swap, and getting our hands on that doesn't seem worth it, - * so we just us the first batch we emitted after the last swap. + * so we just use the first batch we emitted after the last swap. + * To bring the point we wait on closer to the swapbuffers, we make + * a tiny batchbuffer containing just a noop and flush it out. The + * overhead of it is worth the savings on apps that only emit one or + * a few batchbuffers per frame. */ if (intel->need_throttle && intel->first_post_swapbuffers_batch) { drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); - intel->first_post_swapbuffers_batch = NULL; + + BEGIN_BATCH(1); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + + intel->first_post_swapbuffers_batch = intel->batch.bo; + drm_intel_bo_reference(intel->batch.bo); + + intel_batchbuffer_flush(intel); + intel->need_throttle = false; } } -- 1.7.9 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev