From: Kenneth Graunke <kenn...@whitecape.org> Using an unoptimized variant of glamor spending 50% of its CPU time in brw_draw_prims() (and hitting the cache *very* frequently):
N Min Max Median Avg Stddev x 200 29200 40500 34900 34750 958.43256 + 200 31000 40300 34700 34622 916.35941 No difference proven at 95.0% confidence Similarly, no difference on GLB2.7 (also with cache hits): N Min Max Median Avg Stddev x 63 64.1 71.36 70.69 70.113175 1.6782026 + 63 63.6 71.18 70.75 70.223651 1.6044186 No difference proven at 95.0% confidence v2: Rebase on master (by anholt) Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> Signed-off-by: Eric Anholt <e...@anholt.net> Reviewed-by: Eric Anholt <e...@anholt.net> --- Before sending this out, I tried writing a lower-overhead CACHED_BATCH as a variation on the cached-batch-2 branch of my tree. It didn't help, either. I'm just sending it to the list in case anyone has any objection to it -- I'll push it soon if not. src/mesa/drivers/dri/i965/brw_cc.c | 2 +- src/mesa/drivers/dri/i965/brw_curbe.c | 2 +- src/mesa/drivers/dri/i965/brw_draw_upload.c | 4 +-- src/mesa/drivers/dri/i965/brw_misc_state.c | 8 ++--- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 42 --------------------------- src/mesa/drivers/dri/i965/intel_batchbuffer.h | 3 -- 6 files changed, 8 insertions(+), 53 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 4bc3b23..460dad6 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -247,7 +247,7 @@ static void upload_blend_constant_color(struct brw_context *brw) OUT_BATCH_F(ctx->Color.BlendColorUnclamped[1]); OUT_BATCH_F(ctx->Color.BlendColorUnclamped[2]); OUT_BATCH_F(ctx->Color.BlendColorUnclamped[3]); - CACHED_BATCH(); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_blend_constant_color = { diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 5da92cc..121b0aa 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -161,7 +161,7 @@ void brw_upload_cs_urb_state(struct brw_context *brw) assert(brw->urb.nr_cs_entries); OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries); } - CACHED_BATCH(); + ADVANCE_BATCH(); } static GLfloat fixed_plane[6][4] = { diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 2179a3a..aa6514d 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -643,7 +643,7 @@ static void brw_emit_vertices(struct brw_context *brw) (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); - CACHED_BATCH(); + ADVANCE_BATCH(); return; } @@ -808,7 +808,7 @@ static void brw_emit_vertices(struct brw_context *brw) OUT_BATCH(dw1); } - CACHED_BATCH(); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_vertices = { diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 3c908c8..57f9926 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -778,7 +778,7 @@ static void upload_polygon_stipple(struct brw_context *brw) for (i = 0; i < 32; i++) OUT_BATCH(ctx->PolygonStipple[i]); } - CACHED_BATCH(); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_polygon_stipple = { @@ -822,7 +822,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31); else OUT_BATCH(0); - CACHED_BATCH(); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_polygon_stipple_offset = { @@ -856,7 +856,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) /* use legacy aa line coverage computation */ OUT_BATCH(0); OUT_BATCH(0); - CACHED_BATCH(); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_aa_line_parameters = { @@ -901,7 +901,7 @@ static void upload_line_stipple(struct brw_context *brw) OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor); } - CACHED_BATCH(); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_line_stipple = { diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index d9b6c15..64e0298 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -432,48 +432,6 @@ intel_batchbuffer_data(struct brw_context *brw, brw->batch.used += bytes >> 2; } -void -intel_batchbuffer_cached_advance(struct brw_context *brw) -{ - struct cached_batch_item **prev = &brw->batch.cached_items, *item; - uint32_t sz = (brw->batch.used - brw->batch.emit) * sizeof(uint32_t); - uint32_t *start = brw->batch.map + brw->batch.emit; - uint16_t op = *start >> 16; - - while (*prev) { - uint32_t *old; - - item = *prev; - old = brw->batch.map + item->header; - if (op == *old >> 16) { - if (item->size == sz && memcmp(old, start, sz) == 0) { - if (prev != &brw->batch.cached_items) { - *prev = item->next; - item->next = brw->batch.cached_items; - brw->batch.cached_items = item; - } - brw->batch.used = brw->batch.emit; - assert(brw->batch.used > 0); - return; - } - - goto emit; - } - prev = &item->next; - } - - item = malloc(sizeof(struct cached_batch_item)); - if (item == NULL) - return; - - item->next = brw->batch.cached_items; - brw->batch.cached_items = item; - -emit: - item->size = sz; - item->header = brw->batch.emit; -} - /** * Restriction [DevSNB, DevIVB]: * diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 80cd571..652a45b 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -163,8 +163,6 @@ intel_batchbuffer_advance(struct brw_context *brw) #endif } -void intel_batchbuffer_cached_advance(struct brw_context *brw); - #define BEGIN_BATCH(n) intel_batchbuffer_begin(brw, n, RENDER_RING) #define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(brw, n, BLT_RING) #define OUT_BATCH(d) intel_batchbuffer_emit_dword(brw, d) @@ -175,7 +173,6 @@ void intel_batchbuffer_cached_advance(struct brw_context *brw); } while (0) #define ADVANCE_BATCH() intel_batchbuffer_advance(brw); -#define CACHED_BATCH() intel_batchbuffer_cached_advance(brw); #ifdef __cplusplus } -- 1.8.5.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev