On Mon, Aug 11, 2014 at 05:29:31PM -0700, Kristian Høgsberg wrote: > Right now we decide which kernels to use and the GRF start offsets in > one place and emit the kernel pointers later. The logic of how to map > 8, 16 and 32 kernels to kernel start pointers follows the same logic as which > GRF start offsets to use, so lets figure out these two things in one place. > > Signed-off-by: Kristian Høgsberg <k...@bitplanet.net>
Some nits inline, but it looks okay to me otherwise. Reviewed-by: Ben Widawsky <b...@bwidawsk.net> > --- > src/mesa/drivers/dri/i965/gen6_wm_state.c | 16 ++++++++-------- > src/mesa/drivers/dri/i965/gen7_wm_state.c | 18 ++++++++++-------- > src/mesa/drivers/dri/i965/gen8_ps_state.c | 14 ++++++++------ > 3 files changed, 26 insertions(+), 22 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c > b/src/mesa/drivers/dri/i965/gen6_wm_state.c > index 047e036..ebd8443 100644 > --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c > +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c > @@ -71,7 +71,7 @@ upload_wm_state(struct brw_context *brw) > struct gl_context *ctx = &brw->ctx; > const struct brw_fragment_program *fp = > brw_fragment_program_const(brw->fragment_program); > - uint32_t dw2, dw4, dw5, dw6; > + uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2; > > /* _NEW_BUFFERS */ > bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; > @@ -109,7 +109,7 @@ upload_wm_state(struct brw_context *brw) > ADVANCE_BATCH(); > } > > - dw2 = dw4 = dw5 = dw6 = 0; > + dw2 = dw4 = dw5 = dw6 = ksp2 = 0; > dw4 |= GEN6_WM_STATISTICS_ENABLE; > dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; > dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; > @@ -151,14 +151,18 @@ upload_wm_state(struct brw_context *brw) > GEN6_WM_DISPATCH_START_GRF_SHIFT_0); > dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > GEN6_WM_DISPATCH_START_GRF_SHIFT_2); > + ksp0 = brw->wm.base.prog_offset; > + ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16; > } else > dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > GEN6_WM_DISPATCH_START_GRF_SHIFT_0); > + ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16; > } > else { > dw5 |= GEN6_WM_8_DISPATCH_ENABLE; > dw4 |= (brw->wm.prog_data->base.dispatch_grf_start_reg << > GEN6_WM_DISPATCH_START_GRF_SHIFT_0); > + ksp0 = brw->wm.base.prog_offset; > } > > /* CACHE_NEW_WM_PROG | _NEW_COLOR */ > @@ -277,10 +281,7 @@ upload_wm_state(struct brw_context *brw) > > BEGIN_BATCH(9); > OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); > - if (brw->wm.prog_data->prog_offset_16 && min_inv_per_frag > 1) > - OUT_BATCH(brw->wm.base.prog_offset + > brw->wm.prog_data->prog_offset_16); > - else > - OUT_BATCH(brw->wm.base.prog_offset); > + OUT_BATCH(ksp0); > OUT_BATCH(dw2); > if (brw->wm.prog_data->total_scratch) { > OUT_RELOC(brw->wm.base.scratch_bo, > @@ -293,8 +294,7 @@ upload_wm_state(struct brw_context *brw) > OUT_BATCH(dw5); > OUT_BATCH(dw6); > OUT_BATCH(0); /* kernel 1 pointer */ > - /* kernel 2 pointer */ > - OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16); > + OUT_BATCH(ksp2); Is this change meant to be here? It seems like before the change this would never be 0, and now it can be (when you have 16 wide but min_inv_per_frag > 1). I think if you do ksp2 = brw->wm.base.prog_offset at the top, and then ksp2 += brw->wm.prog_data->prog_offset_16 in the above hunk, you get the identical behavior. The same goes for gen7... > ADVANCE_BATCH(); > } > > diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c > b/src/mesa/drivers/dri/i965/gen7_wm_state.c > index 27a3f44..c03d19d 100644 > --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c > +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c > @@ -139,11 +139,11 @@ static void > upload_ps_state(struct brw_context *brw) > { > struct gl_context *ctx = &brw->ctx; > - uint32_t dw2, dw4, dw5; > + uint32_t dw2, dw4, dw5, ksp0, ksp2; > const int max_threads_shift = brw->is_haswell ? > HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; > > - dw2 = dw4 = dw5 = 0; > + dw2 = dw4 = dw5 = ksp2 = 0; > > dw2 |= > (ALIGN(brw->wm.base.sampler_count, 4) / 4) << > GEN7_PS_SAMPLER_COUNT_SHIFT; > @@ -231,22 +231,24 @@ upload_ps_state(struct brw_context *brw) > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > GEN7_PS_DISPATCH_START_GRF_SHIFT_2); > - } else > + ksp0 = brw->wm.base.prog_offset; > + ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16; > + } else { > dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > + ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16; > + } > } > else { > dw4 |= GEN7_PS_8_DISPATCH_ENABLE; > dw5 |= (brw->wm.prog_data->base.dispatch_grf_start_reg << > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > + ksp0 = brw->wm.base.prog_offset; > } > > BEGIN_BATCH(8); > OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); > - if (brw->wm.prog_data->prog_offset_16 && min_inv_per_frag > 1) > - OUT_BATCH(brw->wm.base.prog_offset + > brw->wm.prog_data->prog_offset_16); > - else > - OUT_BATCH(brw->wm.base.prog_offset); > + OUT_BATCH(ksp0); > OUT_BATCH(dw2); > if (brw->wm.prog_data->total_scratch) { > OUT_RELOC(brw->wm.base.scratch_bo, > @@ -258,7 +260,7 @@ upload_ps_state(struct brw_context *brw) > OUT_BATCH(dw4); > OUT_BATCH(dw5); > OUT_BATCH(0); /* kernel 1 pointer */ > - OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16); > + OUT_BATCH(ksp2); > ADVANCE_BATCH(); > } > > diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c > b/src/mesa/drivers/dri/i965/gen8_ps_state.c > index 3d6d7f0..f58d49c 100644 > --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c > +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c > @@ -134,7 +134,7 @@ static void > upload_ps_state(struct brw_context *brw) > { > struct gl_context *ctx = &brw->ctx; > - uint32_t dw3 = 0, dw6 = 0, dw7 = 0; > + uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0; Should ksp0 and ksp2 be uint64_t? I realize the current code is broken anyway. (/me makes note for no reloc branch). > > /* Initialize the execution mask with VMask. Otherwise, derivatives are > * incorrect for subspans where some of the pixels are unlit. We believe > @@ -203,22 +203,24 @@ upload_ps_state(struct brw_context *brw) > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > GEN7_PS_DISPATCH_START_GRF_SHIFT_2); > + ksp0 = brw->wm.base.prog_offset; > + ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16; > } else { > dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > + > + ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16; > } > } else { > dw6 |= GEN7_PS_8_DISPATCH_ENABLE; > dw7 |= (brw->wm.prog_data->base.dispatch_grf_start_reg << > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > + ksp0 = brw->wm.base.prog_offset; > } > > BEGIN_BATCH(12); > OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2)); > - if (brw->wm.prog_data->prog_offset_16 && min_invocations_per_fragment > 1) > - OUT_BATCH(brw->wm.base.prog_offset + > brw->wm.prog_data->prog_offset_16); > - else > - OUT_BATCH(brw->wm.base.prog_offset); > + OUT_BATCH(ksp0); > OUT_BATCH(0); > OUT_BATCH(dw3); > if (brw->wm.prog_data->total_scratch) { > @@ -233,7 +235,7 @@ upload_ps_state(struct brw_context *brw) > OUT_BATCH(dw7); > OUT_BATCH(0); /* kernel 1 pointer */ > OUT_BATCH(0); > - OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16); > + OUT_BATCH(ksp2); > OUT_BATCH(0); > ADVANCE_BATCH(); > } lgtm otherwise -- Ben Widawsky, Intel Open Source Technology Center _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev