On Mon, Aug 11, 2014 at 07:53:11PM -0700, Ben Widawsky wrote: > On Mon, Aug 11, 2014 at 05:29:31PM -0700, Kristian Høgsberg wrote: > > Right now we decide which kernels to use and the GRF start offsets in > > one place and emit the kernel pointers later. The logic of how to map > > 8, 16 and 32 kernels to kernel start pointers follows the same logic as > > which > > GRF start offsets to use, so lets figure out these two things in one place. > > > > Signed-off-by: Kristian Høgsberg <k...@bitplanet.net> > > Some nits inline, but it looks okay to me otherwise. > Reviewed-by: Ben Widawsky <b...@bwidawsk.net> > > > --- > > src/mesa/drivers/dri/i965/gen6_wm_state.c | 16 ++++++++-------- > > src/mesa/drivers/dri/i965/gen7_wm_state.c | 18 ++++++++++-------- > > src/mesa/drivers/dri/i965/gen8_ps_state.c | 14 ++++++++------ > > 3 files changed, 26 insertions(+), 22 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c > > b/src/mesa/drivers/dri/i965/gen6_wm_state.c > > index 047e036..ebd8443 100644 > > --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c > > +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c > > @@ -71,7 +71,7 @@ upload_wm_state(struct brw_context *brw) > > struct gl_context *ctx = &brw->ctx; > > const struct brw_fragment_program *fp = > > brw_fragment_program_const(brw->fragment_program); > > - uint32_t dw2, dw4, dw5, dw6; > > + uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2; > > > > /* _NEW_BUFFERS */ > > bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; > > @@ -109,7 +109,7 @@ upload_wm_state(struct brw_context *brw) > > ADVANCE_BATCH(); > > } > > > > - dw2 = dw4 = dw5 = dw6 = 0; > > + dw2 = dw4 = dw5 = dw6 = ksp2 = 0; > > dw4 |= GEN6_WM_STATISTICS_ENABLE; > > dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; > > dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; > > @@ -151,14 +151,18 @@ upload_wm_state(struct brw_context *brw) > > GEN6_WM_DISPATCH_START_GRF_SHIFT_0); > > dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > > GEN6_WM_DISPATCH_START_GRF_SHIFT_2); > > + ksp0 = brw->wm.base.prog_offset; > > + ksp2 = brw->wm.base.prog_offset + > > brw->wm.prog_data->prog_offset_16; > > } else > > dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > > GEN6_WM_DISPATCH_START_GRF_SHIFT_0); > > + ksp0 = brw->wm.base.prog_offset + > > brw->wm.prog_data->prog_offset_16; > > } > > else { > > dw5 |= GEN6_WM_8_DISPATCH_ENABLE; > > dw4 |= (brw->wm.prog_data->base.dispatch_grf_start_reg << > > GEN6_WM_DISPATCH_START_GRF_SHIFT_0); > > + ksp0 = brw->wm.base.prog_offset; > > } > > > > /* CACHE_NEW_WM_PROG | _NEW_COLOR */ > > @@ -277,10 +281,7 @@ upload_wm_state(struct brw_context *brw) > > > > BEGIN_BATCH(9); > > OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); > > - if (brw->wm.prog_data->prog_offset_16 && min_inv_per_frag > 1) > > - OUT_BATCH(brw->wm.base.prog_offset + > > brw->wm.prog_data->prog_offset_16); > > - else > > - OUT_BATCH(brw->wm.base.prog_offset); > > + OUT_BATCH(ksp0); > > OUT_BATCH(dw2); > > if (brw->wm.prog_data->total_scratch) { > > OUT_RELOC(brw->wm.base.scratch_bo, > > @@ -293,8 +294,7 @@ upload_wm_state(struct brw_context *brw) > > OUT_BATCH(dw5); > > OUT_BATCH(dw6); > > OUT_BATCH(0); /* kernel 1 pointer */ > > - /* kernel 2 pointer */ > > - OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16); > > + OUT_BATCH(ksp2); > > Is this change meant to be here? It seems like before the change this > would never be 0, and now it can be (when you have 16 wide but > min_inv_per_frag > 1).
It is an intentional change. It doesn't matter what we set ksp2 if we're not enalbing sim16 dispatch. Before we would set ksp2 to brw->wm.base.prog_offset even when we were not using it, but I suspect that was just because it was easier and doesn't matter. > I think if you do ksp2 = brw->wm.base.prog_offset at the top, and then > ksp2 += brw->wm.prog_data->prog_offset_16 in the above hunk, you get the > identical behavior. > > The same goes for gen7... > > > ADVANCE_BATCH(); > > } > > > > diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c > > b/src/mesa/drivers/dri/i965/gen7_wm_state.c > > index 27a3f44..c03d19d 100644 > > --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c > > +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c > > @@ -139,11 +139,11 @@ static void > > upload_ps_state(struct brw_context *brw) > > { > > struct gl_context *ctx = &brw->ctx; > > - uint32_t dw2, dw4, dw5; > > + uint32_t dw2, dw4, dw5, ksp0, ksp2; > > const int max_threads_shift = brw->is_haswell ? > > HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; > > > > - dw2 = dw4 = dw5 = 0; > > + dw2 = dw4 = dw5 = ksp2 = 0; > > > > dw2 |= > > (ALIGN(brw->wm.base.sampler_count, 4) / 4) << > > GEN7_PS_SAMPLER_COUNT_SHIFT; > > @@ -231,22 +231,24 @@ upload_ps_state(struct brw_context *brw) > > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > > dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > > GEN7_PS_DISPATCH_START_GRF_SHIFT_2); > > - } else > > + ksp0 = brw->wm.base.prog_offset; > > + ksp2 = brw->wm.base.prog_offset + > > brw->wm.prog_data->prog_offset_16; > > + } else { > > dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > > + ksp0 = brw->wm.base.prog_offset + > > brw->wm.prog_data->prog_offset_16; > > + } > > } > > else { > > dw4 |= GEN7_PS_8_DISPATCH_ENABLE; > > dw5 |= (brw->wm.prog_data->base.dispatch_grf_start_reg << > > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > > + ksp0 = brw->wm.base.prog_offset; > > } > > > > BEGIN_BATCH(8); > > OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); > > - if (brw->wm.prog_data->prog_offset_16 && min_inv_per_frag > 1) > > - OUT_BATCH(brw->wm.base.prog_offset + > > brw->wm.prog_data->prog_offset_16); > > - else > > - OUT_BATCH(brw->wm.base.prog_offset); > > + OUT_BATCH(ksp0); > > OUT_BATCH(dw2); > > if (brw->wm.prog_data->total_scratch) { > > OUT_RELOC(brw->wm.base.scratch_bo, > > @@ -258,7 +260,7 @@ upload_ps_state(struct brw_context *brw) > > OUT_BATCH(dw4); > > OUT_BATCH(dw5); > > OUT_BATCH(0); /* kernel 1 pointer */ > > - OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16); > > + OUT_BATCH(ksp2); > > ADVANCE_BATCH(); > > } > > > > diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c > > b/src/mesa/drivers/dri/i965/gen8_ps_state.c > > index 3d6d7f0..f58d49c 100644 > > --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c > > +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c > > @@ -134,7 +134,7 @@ static void > > upload_ps_state(struct brw_context *brw) > > { > > struct gl_context *ctx = &brw->ctx; > > - uint32_t dw3 = 0, dw6 = 0, dw7 = 0; > > + uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0; > > Should ksp0 and ksp2 be uint64_t? I realize the current code is broken > anyway. (/me makes note for no reloc branch). Some day, I suppose. Today the offsets are still limited by the GTT size, right? Kristian > > > > /* Initialize the execution mask with VMask. Otherwise, derivatives are > > * incorrect for subspans where some of the pixels are unlit. We > > believe > > @@ -203,22 +203,24 @@ upload_ps_state(struct brw_context *brw) > > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > > dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > > GEN7_PS_DISPATCH_START_GRF_SHIFT_2); > > + ksp0 = brw->wm.base.prog_offset; > > + ksp2 = brw->wm.base.prog_offset + > > brw->wm.prog_data->prog_offset_16; > > } else { > > dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 << > > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > > + > > + ksp0 = brw->wm.base.prog_offset + > > brw->wm.prog_data->prog_offset_16; > > } > > } else { > > dw6 |= GEN7_PS_8_DISPATCH_ENABLE; > > dw7 |= (brw->wm.prog_data->base.dispatch_grf_start_reg << > > GEN7_PS_DISPATCH_START_GRF_SHIFT_0); > > + ksp0 = brw->wm.base.prog_offset; > > } > > > > BEGIN_BATCH(12); > > OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2)); > > - if (brw->wm.prog_data->prog_offset_16 && min_invocations_per_fragment > > > 1) > > - OUT_BATCH(brw->wm.base.prog_offset + > > brw->wm.prog_data->prog_offset_16); > > - else > > - OUT_BATCH(brw->wm.base.prog_offset); > > + OUT_BATCH(ksp0); > > OUT_BATCH(0); > > OUT_BATCH(dw3); > > if (brw->wm.prog_data->total_scratch) { > > @@ -233,7 +235,7 @@ upload_ps_state(struct brw_context *brw) > > OUT_BATCH(dw7); > > OUT_BATCH(0); /* kernel 1 pointer */ > > OUT_BATCH(0); > > - OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16); > > + OUT_BATCH(ksp2); > > OUT_BATCH(0); > > ADVANCE_BATCH(); > > } > > lgtm otherwise > > -- > Ben Widawsky, Intel Open Source Technology Center _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev