On 05/24/2011 04:00 PM, Eric Anholt wrote:
No net code size change, but unit update is down 0.8% code size.
---
src/mesa/drivers/dri/i965/brw_context.h | 15 +++++++++++++--
src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++--
src/mesa/drivers/dri/i965/brw_wm.c | 2 +-
src/mesa/drivers/dri/i965/brw_wm_state.c | 4 ++--
4 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h
b/src/mesa/drivers/dri/i965/brw_context.h
index 1d2ef06..621b6f8 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -231,8 +231,8 @@ struct brw_wm_prog_data {
GLuint first_curbe_grf;
GLuint first_curbe_grf_16;
- GLuint total_grf;
- GLuint total_grf_16;
+ GLuint reg_blocks;
+ GLuint reg_blocks_16;
GLuint total_scratch;
GLuint nr_params; /**< number of float params/constants */
@@ -863,6 +863,17 @@ float convert_param(enum param_conversion conversion,
float param)
}
}
+/**
+ * Pre-gen6, the register file of the EUs was shared between threads,
+ * and each thread used some subset allocated on a 16-register block
+ * granularity. The unit states wanted these block counts.
+ */
+static inline int
+brw_register_blocks(int reg_count)
+{
+ return ALIGN(reg_count, 16) / 16 - 1;
+}
+
I was a little concerned that the comment states that this is only
necessary pre-Gen6, yet most of this code runs on Gen6+ as well.
Presumably it calculates the number of register blocks used regardless
of generation...but Gen6 doesn't /use/ that information.
So, basically this moves a few instructions from the Gen4/5 3DSTATE_WM
update code (frequent since used in drawing) to the compile path (rare).
That might help Gen4/5 performance ever so slightly, but won't help
Gen6 at all.
It was nice having the Gen4/5-specific code at the point where it was
needed, but I suppose I'm okay with this.
Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>
GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 1586048..6ff692a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4170,9 +4170,9 @@ fs_visitor::run()
generate_code();
if (c->dispatch_width == 8) {
- c->prog_data.total_grf = grf_used;
+ c->prog_data.reg_blocks = brw_register_blocks(grf_used);
} else {
- c->prog_data.total_grf_16 = grf_used;
+ c->prog_data.reg_blocks_16 = brw_register_blocks(grf_used);
c->prog_data.prog_offset_16 = prog_offset_16;
/* Make sure we didn't try to sneak in an extra uniform */
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c
b/src/mesa/drivers/dri/i965/brw_wm.c
index 9079762..3478ff1 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -115,7 +115,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct
brw_wm_compile *c)
brw_wm_pass2(c);
/* how many general-purpose registers are used */
- c->prog_data.total_grf = c->max_wm_grf;
+ c->prog_data.reg_blocks = brw_register_blocks(c->max_wm_grf);
/* Emit GEN4 code.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c
b/src/mesa/drivers/dri/i965/brw_wm_state.c
index a356711..ef98f81 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -91,8 +91,8 @@ brw_prepare_wm_unit(struct brw_context *brw)
}
/* CACHE_NEW_WM_PROG */
- wm->thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 -
1;
- wm->wm9.grf_reg_count_2 = ALIGN(brw->wm.prog_data->total_grf_16, 16) / 16 -
1;
+ wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks;
+ wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16;
wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset>> 6; /* reloc
*/
/* reloc */
wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset +
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev