This avoids some snooping overhead between EUs processing separate shaders (so VS versus FS).
Improves performance of a minecraft trace with shader_time by 28.9% +/- 18.3% (n=7), and performance of my old GLSL demo by 93.7% +/- 0.8% (n=4). --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_program.c | 2 +- src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 8ce3954..dd3baa9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -621,7 +621,7 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type, fs_reg offset_mrf = fs_reg(MRF, base_mrf); offset_mrf.type = BRW_REGISTER_TYPE_UD; - emit(MOV(offset_mrf, fs_reg(shader_time_index * 4))); + emit(MOV(offset_mrf, fs_reg(shader_time_index * 64))); fs_reg time_mrf = fs_reg(MRF, base_mrf + 1); time_mrf.type = BRW_REGISTER_TYPE_UD; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 75eb6bc..c1aeefc 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -409,7 +409,7 @@ brw_collect_shader_time(struct brw_context *brw) uint32_t *times = brw->shader_time.bo->virtual; for (int i = 0; i < brw->shader_time.num_entries; i++) { - brw->shader_time.cumulative[i] += times[i]; + brw->shader_time.cumulative[i] += times[i * 16]; } /* Zero the BO out to clear it out for our next collection. diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index f319f32..b87d62b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1225,7 +1225,7 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type, dst_reg offset_mrf = dst_reg(MRF, base_mrf); offset_mrf.type = BRW_REGISTER_TYPE_UD; - emit(MOV(offset_mrf, src_reg(shader_time_index * 4))); + emit(MOV(offset_mrf, src_reg(shader_time_index * 64))); dst_reg time_mrf = dst_reg(MRF, base_mrf + 1); time_mrf.type = BRW_REGISTER_TYPE_UD; -- 1.7.10.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev