Just to ease the next intermediate patch. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> --- src/mesa/drivers/dri/i965/brw_batch.h | 1 + src/mesa/drivers/dri/i965/brw_blorp.cpp | 10 +++--- src/mesa/drivers/dri/i965/brw_compute.c | 6 ++-- src/mesa/drivers/dri/i965/brw_draw.c | 7 ++-- .../drivers/dri/i965/brw_performance_monitor.c | 11 +++--- src/mesa/drivers/dri/i965/brw_state_batch.c | 6 ++-- src/mesa/drivers/dri/i965/brw_urb.c | 4 +-- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 41 +++++++++++---------- src/mesa/drivers/dri/i965/intel_batchbuffer.h | 42 +++++++++++----------- src/mesa/drivers/dri/i965/intel_blit.c | 7 ++-- 10 files changed, 72 insertions(+), 63 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_batch.h b/src/mesa/drivers/dri/i965/brw_batch.h index 1262751..3baaba9 100644 --- a/src/mesa/drivers/dri/i965/brw_batch.h +++ b/src/mesa/drivers/dri/i965/brw_batch.h @@ -61,6 +61,7 @@ typedef struct brw_batch { uint32_t state_batch_offset; enum brw_gpu_ring ring; bool needs_sol_reset; + int gen; bool no_batch_wrap; diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index f04b966..0b0cc8d 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -222,10 +222,10 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params) brw_emit_mi_flush(brw); retry: - intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING); - intel_batchbuffer_save_state(brw); + intel_batchbuffer_require_space(&brw->batch, estimated_max_batch_usage, RENDER_RING); + intel_batchbuffer_save_state(&brw->batch); drm_intel_bo *saved_bo = brw->batch.bo; - uint32_t saved_used = USED_BATCH(brw->batch); + uint32_t saved_used = USED_BATCH(&brw->batch); uint32_t saved_state_batch_offset = brw->batch.state_batch_offset; switch (brw->gen) { @@ -244,7 +244,7 @@ retry: * reserved enough space that a wrap will never happen. */ assert(brw->batch.bo == saved_bo); - assert((USED_BATCH(brw->batch) - saved_used) * 4 + + assert((USED_BATCH(&brw->batch) - saved_used) * 4 + (saved_state_batch_offset - brw->batch.state_batch_offset) < estimated_max_batch_usage); /* Shut up compiler warnings on release build */ @@ -259,7 +259,7 @@ retry: if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!check_aperture_failed_once) { check_aperture_failed_once = true; - intel_batchbuffer_reset_to_saved(brw); + intel_batchbuffer_reset_to_saved(&brw->batch); brw_batch_flush(&brw->batch, NULL); goto retry; } else { diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index 9fd0d0d..4bff716 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -108,9 +108,9 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) * we've got validated state that needs to be in the same batch as the * primitives. */ - intel_batchbuffer_require_space(brw, estimated_buffer_space_needed, + intel_batchbuffer_require_space(&brw->batch, estimated_buffer_space_needed, RENDER_RING); - intel_batchbuffer_save_state(brw); + intel_batchbuffer_save_state(&brw->batch); retry: brw->batch.no_batch_wrap = true; @@ -122,7 +122,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!fail_next) { - intel_batchbuffer_reset_to_saved(brw); + intel_batchbuffer_reset_to_saved(&brw->batch); brw_batch_flush(&brw->batch, NULL); fail_next = true; goto retry; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index ecc8bf6..c2e7bda 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -463,8 +463,9 @@ brw_try_draw_prims(struct gl_context *ctx, * we've got validated state that needs to be in the same batch as the * primitives. */ - intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); - intel_batchbuffer_save_state(brw); + intel_batchbuffer_require_space(&brw->batch, + estimated_max_prim_size, RENDER_RING); + intel_batchbuffer_save_state(&brw->batch); if (brw->num_instances != prims[i].num_instances || brw->basevertex != prims[i].basevertex) { @@ -518,7 +519,7 @@ retry: if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!fail_next) { - intel_batchbuffer_reset_to_saved(brw); + intel_batchbuffer_reset_to_saved(&brw->batch); brw_batch_flush(&brw->batch, NULL); fail_next = true; goto retry; diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c b/src/mesa/drivers/dri/i965/brw_performance_monitor.c index 3168613..dfc003f 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c +++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c @@ -704,9 +704,10 @@ emit_mi_report_perf_count(struct brw_context *brw, assert(offset_in_bytes % 64 == 0); /* Make sure the commands to take a snapshot fits in a single batch. */ - intel_batchbuffer_require_space(brw, MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4, + intel_batchbuffer_require_space(&brw->batch, + MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4, RENDER_RING); - int batch_used = USED_BATCH(brw->batch); + int batch_used = USED_BATCH(&brw->batch); /* Reports apparently don't always get written unless we flush first. */ brw_emit_mi_flush(brw); @@ -750,7 +751,7 @@ emit_mi_report_perf_count(struct brw_context *brw, brw_emit_mi_flush(brw); (void) batch_used; - assert(USED_BATCH(brw->batch) - batch_used <= MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4); + assert(USED_BATCH(&brw->batch) - batch_used <= MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4); } /** @@ -1128,7 +1129,7 @@ brw_begin_perf_monitor(struct gl_context *ctx, if (brw->perfmon.oa_users == 0) { /* Ensure the OACONTROL enable and snapshot land in the same batch. */ int space = (MI_REPORT_PERF_COUNT_BATCH_DWORDS + 3) * 4; - intel_batchbuffer_require_space(brw, space, RENDER_RING); + intel_batchbuffer_require_space(&brw->batch, space, RENDER_RING); start_oa_counters(brw); } @@ -1388,7 +1389,7 @@ void brw_perf_monitor_new_batch(struct brw_context *brw) { assert(brw->batch.ring == RENDER_RING); - assert(brw->gen < 6 || USED_BATCH(brw->batch) == 0); + assert(brw->gen < 6 || USED_BATCH(&brw->batch) == 0); if (brw->perfmon.oa_users == 0) return; diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index fa0dc6f..b9fc38a 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -84,7 +84,7 @@ brw_annotate_aub(struct brw_context *brw) drm_intel_aub_annotation annotations[annotation_count]; int a = 0; make_annotation(&annotations[a++], AUB_TRACE_TYPE_BATCH, 0, - 4 * USED_BATCH(brw->batch)); + 4 * USED_BATCH(&brw->batch)); for (int i = brw->state_batch_count; i-- > 0; ) { uint32_t type = brw->state_batch_list[i].type; uint32_t start_offset = brw->state_batch_list[i].offset; @@ -133,8 +133,8 @@ __brw_state_batch(struct brw_context *brw, * space, then flush and try again. */ if (batch->state_batch_offset < size || - offset < 4 * USED_BATCH(*batch) + batch->reserved_space) { - brw_batch_flush(&brw->batch, NULL); + offset < 4 * USED_BATCH(batch) + batch->reserved_space) { + brw_batch_flush(batch, NULL); offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); } diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index f01bcfb..f4215c7 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -250,8 +250,8 @@ void brw_upload_urb_fence(struct brw_context *brw) uf.bits1.cs_fence = brw->urb.size; /* erratum: URB_FENCE must not cross a 64byte cacheline */ - if ((USED_BATCH(brw->batch) & 15) > 12) { - int pad = 16 - (USED_BATCH(brw->batch) & 15); + if ((USED_BATCH(&brw->batch) & 15) > 12) { + int pad = 16 - (USED_BATCH(&brw->batch) & 15); do *brw->batch.map_next++ = MI_NOOP; while (--pad); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index ffc1373..2b5acd1 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -44,6 +44,8 @@ intel_batchbuffer_reset(struct brw_context *brw); int intel_batchbuffer_init(struct brw_context *brw) { + brw->batch.gen = brw->gen; + intel_batchbuffer_reset(brw); if (!brw->has_llc) { @@ -97,21 +99,20 @@ intel_batchbuffer_reset(struct brw_context *brw) } void -intel_batchbuffer_save_state(struct brw_context *brw) +intel_batchbuffer_save_state(struct brw_batch *batch) { - brw->batch.saved.map_next = brw->batch.map_next; - brw->batch.saved.reloc_count = - drm_intel_gem_bo_get_reloc_count(brw->batch.bo); + batch->saved.map_next = batch->map_next; + batch->saved.reloc_count = drm_intel_gem_bo_get_reloc_count(batch->bo); } void -intel_batchbuffer_reset_to_saved(struct brw_context *brw) +intel_batchbuffer_reset_to_saved(struct brw_batch *batch) { - drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count); + drm_intel_gem_bo_clear_relocs(batch->bo, batch->saved.reloc_count); - brw->batch.map_next = brw->batch.saved.map_next; - if (USED_BATCH(brw->batch) == 0) - brw->batch.ring = UNKNOWN_RING; + batch->map_next = batch->saved.map_next; + if (USED_BATCH(batch) == 0) + batch->ring = UNKNOWN_RING; } void @@ -143,7 +144,7 @@ do_batch_dump(struct brw_context *brw) drm_intel_decode_set_batch_pointer(decode, batch->bo->virtual, batch->bo->offset64, - USED_BATCH(*batch)); + USED_BATCH(batch)); } else { fprintf(stderr, "WARNING: failed to map batchbuffer (%s), " @@ -152,7 +153,7 @@ do_batch_dump(struct brw_context *brw) drm_intel_decode_set_batch_pointer(decode, batch->map, batch->bo->offset64, - USED_BATCH(*batch)); + USED_BATCH(batch)); } drm_intel_decode_set_output_file(decode, stderr); @@ -168,8 +169,10 @@ do_batch_dump(struct brw_context *brw) } void -intel_batchbuffer_emit_render_ring_prelude(struct brw_context *brw) +intel_batchbuffer_emit_render_ring_prelude(struct brw_batch *batch) { + struct brw_context *brw = container_of(batch, brw, batch); + /* We may need to enable and snapshot OA counters. */ brw_perf_monitor_new_batch(brw); } @@ -241,7 +244,7 @@ do_flush_locked(struct brw_context *brw) if (brw->has_llc) { drm_intel_bo_unmap(batch->bo); } else { - ret = drm_intel_bo_subdata(batch->bo, 0, 4 * USED_BATCH(*batch), batch->map); + ret = drm_intel_bo_subdata(batch->bo, 0, 4 * USED_BATCH(batch), batch->map); if (ret == 0 && batch->state_batch_offset != batch->bo->size) { ret = drm_intel_bo_subdata(batch->bo, batch->state_batch_offset, @@ -267,11 +270,11 @@ do_flush_locked(struct brw_context *brw) brw_annotate_aub(brw); if (batch->hw_ctx == NULL || batch->ring != RENDER_RING) { - ret = drm_intel_bo_mrb_exec(batch->bo, 4 * USED_BATCH(*batch), + ret = drm_intel_bo_mrb_exec(batch->bo, 4 * USED_BATCH(batch), NULL, 0, 0, flags); } else { ret = drm_intel_gem_bo_context_exec(batch->bo, batch->hw_ctx, - 4 * USED_BATCH(*batch), flags); + 4 * USED_BATCH(batch), flags); } } @@ -295,14 +298,14 @@ brw_batch_flush(struct brw_batch *batch, struct perf_debug *info) struct brw_context *brw = container_of(batch, brw, batch); int ret; - if (USED_BATCH(brw->batch) == 0) + if (USED_BATCH(batch) == 0) return 0; if (brw->batch.throttle_batch[0] == NULL) brw->batch.throttle_batch[0] = brw_bo_get(brw->batch.bo); if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { - int bytes_for_commands = 4 * USED_BATCH(brw->batch); + int bytes_for_commands = 4 * USED_BATCH(batch); int bytes_for_state = brw->batch.bo->size - brw->batch.state_batch_offset; int total_bytes = bytes_for_commands + bytes_for_state; fprintf(stderr, "%s:%d: Batchbuffer flush with %4db (pkt) + " @@ -322,7 +325,7 @@ brw_batch_flush(struct brw_batch *batch, struct perf_debug *info) /* Mark the end of the buffer. */ intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END); - if (USED_BATCH(brw->batch) & 1) { + if (USED_BATCH(&brw->batch) & 1) { /* Round batchbuffer usage to 2 DWORDs. */ intel_batchbuffer_emit_dword(brw, MI_NOOP); } @@ -397,7 +400,7 @@ intel_batchbuffer_data(struct brw_context *brw, const void *data, GLuint bytes, enum brw_gpu_ring ring) { assert((bytes & 3) == 0); - intel_batchbuffer_require_space(brw, bytes, ring); + intel_batchbuffer_require_space(&brw->batch, bytes, ring); memcpy(brw->batch.map_next, data, bytes); brw->batch.map_next += bytes >> 2; } diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 4a56a4c..9e06f4d 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -14,11 +14,13 @@ struct brw_batch; struct brw_context; enum brw_gpu_ring; -void intel_batchbuffer_emit_render_ring_prelude(struct brw_context *brw); int intel_batchbuffer_init(struct brw_context *brw); void intel_batchbuffer_free(struct brw_context *brw); -void intel_batchbuffer_save_state(struct brw_context *brw); -void intel_batchbuffer_reset_to_saved(struct brw_context *brw); + +void intel_batchbuffer_save_state(struct brw_batch *batch); +void intel_batchbuffer_reset_to_saved(struct brw_batch *batch); + +void intel_batchbuffer_emit_render_ring_prelude(struct brw_batch *batch); /* Unlike bmBufferData, this currently requires the buffer be mapped. * Consider it a convenience function wrapping multple @@ -41,7 +43,7 @@ uint64_t intel_batchbuffer_reloc64(struct brw_context *brw, uint32_t write_domain, uint32_t delta); -#define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map)) +#define USED_BATCH(batch) ((uintptr_t)((batch)->map_next - (batch)->map)) static inline uint32_t float_as_int(float f) { @@ -60,10 +62,10 @@ static inline uint32_t float_as_int(float f) * work... */ static inline unsigned -intel_batchbuffer_space(struct brw_context *brw) +intel_batchbuffer_space(struct brw_batch *batch) { - return (brw->batch.state_batch_offset - brw->batch.reserved_space) - - USED_BATCH(brw->batch) * 4; + return (batch->state_batch_offset - batch->reserved_space) + - USED_BATCH(batch)*4; } @@ -71,7 +73,7 @@ static inline void intel_batchbuffer_emit_dword(struct brw_context *brw, GLuint dword) { #ifdef DEBUG - assert(intel_batchbuffer_space(brw) >= 4); + assert(intel_batchbuffer_space(&brw->batch) >= 4); #endif *brw->batch.map_next++ = dword; assert(brw->batch.ring != UNKNOWN_RING); @@ -84,38 +86,38 @@ intel_batchbuffer_emit_float(struct brw_context *brw, float f) } static inline void -intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, +intel_batchbuffer_require_space(struct brw_batch *batch, GLuint sz, enum brw_gpu_ring ring) { /* If we're switching rings, implicitly flush the batch. */ - if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING && - brw->gen >= 6) { - brw_batch_flush(&brw->batch, NULL); + if (unlikely(ring != batch->ring) && batch->ring != UNKNOWN_RING && + batch->gen >= 6) { + brw_batch_flush(batch, NULL); } #ifdef DEBUG assert(sz < BATCH_SZ - BATCH_RESERVED); #endif - if (intel_batchbuffer_space(brw) < sz) - brw_batch_flush(&brw->batch, NULL); + if (intel_batchbuffer_space(batch) < sz) + brw_batch_flush(batch, NULL); - enum brw_gpu_ring prev_ring = brw->batch.ring; + enum brw_gpu_ring prev_ring = batch->ring; /* The brw_batch_flush() calls above might have changed * brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end. */ - brw->batch.ring = ring; + batch->ring = ring; if (unlikely(prev_ring == UNKNOWN_RING && ring == RENDER_RING)) - intel_batchbuffer_emit_render_ring_prelude(brw); + intel_batchbuffer_emit_render_ring_prelude(batch); } static inline void intel_batchbuffer_begin(struct brw_context *brw, int n, enum brw_gpu_ring ring) { - intel_batchbuffer_require_space(brw, n * 4, ring); + intel_batchbuffer_require_space(&brw->batch, n * 4, ring); #ifdef DEBUG - brw->batch.emit = USED_BATCH(brw->batch); + brw->batch.emit = USED_BATCH(&brw->batch); brw->batch.total = n; #endif } @@ -125,7 +127,7 @@ intel_batchbuffer_advance(struct brw_context *brw) { #ifdef DEBUG brw_batch *batch = &brw->batch; - unsigned int _n = USED_BATCH(*batch) - batch->emit; + unsigned int _n = USED_BATCH(batch) - batch->emit; assert(batch->total != 0); if (_n != batch->total) { fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 750550d..22a5c5d 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -550,7 +550,7 @@ intelEmitCopyBlit(struct brw_context *brw, unsigned length = brw->gen >= 8 ? 10 : 8; - intel_batchbuffer_require_space(brw, length * 4, BLT_RING); + intel_batchbuffer_require_space(&brw->batch, length * 4, BLT_RING); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __func__, src_buffer, src_pitch, src_offset, src_x, src_y, @@ -728,8 +728,9 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); unsigned xy_setup_blt_length = brw->gen >= 8 ? 10 : 8; - intel_batchbuffer_require_space(brw, (xy_setup_blt_length * 4) + - (3 * 4) + dwords * 4, BLT_RING); + intel_batchbuffer_require_space(&brw->batch, + (xy_setup_blt_length * 4) + + (3 * 4) + dwords * 4, BLT_RING); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) -- 2.5.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev