Hi Eric, BSpec says VS and WM should fall to single thread to avoid racing if use scratch space.
Thanks Zou Nanhai >>-----Original Message----- >>From: mesa-dev-bounces+nanhai.zou=intel....@lists.freedesktop.org >>[mailto:mesa-dev-bounces+nanhai.zou=intel....@lists.freedesktop.org] On >>Behalf Of Eric Anholt >>Sent: 2011年4月15日 14:53 >>To: mesa-dev@lists.freedesktop.org >>Subject: [Mesa-dev] [PATCH 1/2] i965/fs: Add gen6 register spilling support. >> >>Most of this is code movement to get the scratch space allocated in a >>shared location. Other than that, the only real changes are that the >>old oword block messages now operate on oword-aligned areas (with new >>messages for unaligned access, which we don't do), and that the >>caching control is in the SFID part of the descriptor instead of >>message control. >> >>Fixes glsl-fs-convolution-1. >>--- >> src/mesa/drivers/dri/i965/brw_eu_emit.c | 45 >>+++++++++++++++++---- >> src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 2 - >> src/mesa/drivers/dri/i965/brw_wm.c | 15 +++++++ >> src/mesa/drivers/dri/i965/brw_wm_state.c | 20 --------- >> src/mesa/drivers/dri/i965/gen6_wm_state.c | 7 +++- >> 5 files changed, 58 insertions(+), 31 deletions(-) >> >>diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c >>b/src/mesa/drivers/dri/i965/brw_eu_emit.c >>index 57313a5..2d654e7 100644 >>--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c >>+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c >>@@ -496,6 +496,8 @@ static void brw_set_dp_write_message( struct brw_context >>*brw, >> insn->bits3.dp_render_cache.response_length = response_length; >> insn->bits3.dp_render_cache.msg_length = msg_length; >> insn->bits3.dp_render_cache.end_of_thread = end_of_thread; >>+ >>+ /* We always use the render cache for write messages */ >> insn->header.destreg__conditionalmod = >>BRW_MESSAGE_TARGET_DATAPORT_WRITE; >> /* XXX really need below? */ >> insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; >>@@ -539,6 +541,13 @@ brw_set_dp_read_message(struct brw_context *brw, >> brw_set_src1(insn, brw_imm_d(0)); >> >> if (intel->gen >= 6) { >>+ uint32_t target_function; >>+ >>+ if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE) >>+ target_function = BRW_MESSAGE_TARGET_DATAPORT_READ; /* data cache */ >>+ else >>+ target_function = BRW_MESSAGE_TARGET_DATAPORT_WRITE; /* render cache >>*/ >>+ >> insn->bits3.dp_render_cache.binding_table_index = >>binding_table_index; >> insn->bits3.dp_render_cache.msg_control = msg_control; >> insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0; >>@@ -548,9 +557,9 @@ brw_set_dp_read_message(struct brw_context *brw, >> insn->bits3.dp_render_cache.response_length = response_length; >> insn->bits3.dp_render_cache.msg_length = msg_length; >> insn->bits3.dp_render_cache.end_of_thread = 0; >>- insn->header.destreg__conditionalmod = >>BRW_MESSAGE_TARGET_DATAPORT_READ; >>+ insn->header.destreg__conditionalmod = target_function; >> /* XXX really need below? */ >>- insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; >>+ insn->bits2.send_gen5.sfid = target_function; >> insn->bits2.send_gen5.end_of_thread = 0; >> } else if (intel->gen == 5) { >> insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; >>@@ -1486,9 +1495,12 @@ void brw_oword_block_write_scratch(struct brw_compile >>*p, >> GLuint offset) >> { >> struct intel_context *intel = &p->brw->intel; >>- uint32_t msg_control; >>+ uint32_t msg_control, msg_type; >> int mlen; >> >>+ if (intel->gen >= 6) >>+ offset /= 16; >>+ >> mrf = retype(mrf, BRW_REGISTER_TYPE_UD); >> >> if (num_regs == 1) { >>@@ -1554,13 +1566,22 @@ void brw_oword_block_write_scratch(struct brw_compile >>*p, >> } >> >> brw_set_dest(p, insn, dest); >>- brw_set_src0(insn, brw_null_reg()); >>+ if (intel->gen >= 6) { >>+ brw_set_src0(insn, mrf); >>+ } else { >>+ brw_set_src0(insn, brw_null_reg()); >>+ } >>+ >>+ if (intel->gen >= 6) >>+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; >>+ else >>+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; >> >> brw_set_dp_write_message(p->brw, >> insn, >> 255, /* binding table index (255=stateless) */ >> msg_control, >>- BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* >>msg_type */ >>+ msg_type, >> mlen, >> GL_TRUE, /* header_present */ >> 0, /* pixel scoreboard */ >>@@ -1585,9 +1606,13 @@ brw_oword_block_read_scratch(struct brw_compile *p, >> int num_regs, >> GLuint offset) >> { >>+ struct intel_context *intel = &p->brw->intel; >> uint32_t msg_control; >> int rlen; >> >>+ if (intel->gen >= 6) >>+ offset /= 16; >>+ >> mrf = retype(mrf, BRW_REGISTER_TYPE_UD); >> dest = retype(dest, BRW_REGISTER_TYPE_UW); >> >>@@ -1624,14 +1649,18 @@ brw_oword_block_read_scratch(struct brw_compile *p, >> insn->header.destreg__conditionalmod = mrf.nr; >> >> brw_set_dest(p, insn, dest); /* UW? */ >>- brw_set_src0(insn, brw_null_reg()); >>+ if (intel->gen >= 6) { >>+ brw_set_src0(insn, mrf); >>+ } else { >>+ brw_set_src0(insn, brw_null_reg()); >>+ } >> >> brw_set_dp_read_message(p->brw, >> insn, >> 255, /* binding table index (255=stateless) */ >> msg_control, >> BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* >> msg_type >>*/ >>- 1, /* target cache (render/scratch) */ >>+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE, >> 1, /* msg_length */ >> rlen); >> } >>@@ -1839,7 +1868,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, >> bind_table_index, >> BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, >> msg_type, >>- 0, /* source cache = data cache */ >>+ BRW_DATAPORT_READ_TARGET_DATA_CACHE, >> 2, /* msg_length */ >> 1); /* response_length */ >> } >>diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp >>b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp >>index 479a914..67f29ce 100644 >>--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp >>+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp >>@@ -228,8 +228,6 @@ fs_visitor::assign_regs() >> >> if (reg == -1) { >> fail("no register to spill\n"); >>- } else if (intel->gen >= 6) { >>- fail("no spilling support on gen6 yet\n"); >> } else { >> spill_reg(reg); >> } >>diff --git a/src/mesa/drivers/dri/i965/brw_wm.c >>b/src/mesa/drivers/dri/i965/brw_wm.c >>index 2dd28fd..ab731a8 100644 >>--- a/src/mesa/drivers/dri/i965/brw_wm.c >>+++ b/src/mesa/drivers/dri/i965/brw_wm.c >>@@ -185,6 +185,7 @@ static void do_wm_prog( struct brw_context *brw, >> struct brw_fragment_program *fp, >> struct brw_wm_prog_key *key) >> { >>+ struct intel_context *intel = &brw->intel; >> struct brw_wm_compile *c; >> const GLuint *program; >> GLuint program_size; >>@@ -238,12 +239,26 @@ static void do_wm_prog( struct brw_context *brw, >> >> /* Scratch space is used for register spilling */ >> if (c->last_scratch) { >>+ uint32_t total_scratch; >>+ >> /* Per-thread scratch space is power-of-two sized. */ >> for (c->prog_data.total_scratch = 1024; >> c->prog_data.total_scratch <= c->last_scratch; >> c->prog_data.total_scratch *= 2) { >> /* empty */ >> } >>+ total_scratch = c->prog_data.total_scratch * brw->wm_max_threads; >>+ >>+ if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) { >>+ drm_intel_bo_unreference(brw->wm.scratch_bo); >>+ brw->wm.scratch_bo = NULL; >>+ } >>+ if (brw->wm.scratch_bo == NULL) { >>+ brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, >>+ "wm scratch", >>+ total_scratch, >>+ 4096); >>+ } >> } >> else { >> c->prog_data.total_scratch = 0; >>diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c >>b/src/mesa/drivers/dri/i965/brw_wm_state.c >>index 5b5afc4..be4b260 100644 >>--- a/src/mesa/drivers/dri/i965/brw_wm_state.c >>+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c >>@@ -278,30 +278,10 @@ wm_unit_create_from_key(struct brw_context *brw, struct >>brw_wm_unit_key *key, >> >> static void upload_wm_unit( struct brw_context *brw ) >> { >>- struct intel_context *intel = &brw->intel; >> struct brw_wm_unit_key key; >> drm_intel_bo *reloc_bufs[3]; >> wm_unit_populate_key(brw, &key); >> >>- /* Allocate the necessary scratch space if we haven't already. Don't >>- * bother reducing the allocation later, since we use scratch so >>- * rarely. >>- */ >>- if (key.total_scratch) { >>- GLuint total = key.total_scratch * brw->wm_max_threads; >>- >>- if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { >>- drm_intel_bo_unreference(brw->wm.scratch_bo); >>- brw->wm.scratch_bo = NULL; >>- } >>- if (brw->wm.scratch_bo == NULL) { >>- brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, >>- "wm scratch", >>- total, >>- 4096); >>- } >>- } >>- >> reloc_bufs[0] = brw->wm.prog_bo; >> reloc_bufs[1] = brw->wm.scratch_bo; >> reloc_bufs[2] = brw->wm.sampler_bo; >>diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c >>b/src/mesa/drivers/dri/i965/gen6_wm_state.c >>index f4f0475..8215cb1 100644 >>--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c >>+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c >>@@ -184,7 +184,12 @@ upload_wm_state(struct brw_context *brw) >> OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); >> OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); >> OUT_BATCH(dw2); >>- OUT_BATCH(0); /* scratch space base offset */ >>+ if (brw->wm.prog_data->total_scratch) { >>+ OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, >>I915_GEM_DOMAIN_RENDER, >>+ ffs(brw->wm.prog_data->total_scratch) - 11); >>+ } else { >>+ OUT_BATCH(0); >>+ } >> OUT_BATCH(dw4); >> OUT_BATCH(dw5); >> OUT_BATCH(dw6); >>-- >>1.7.4.1 >> >>_______________________________________________ >>mesa-dev mailing list >>mesa-dev@lists.freedesktop.org >>http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev