This is a straight port from gen7_wm_state.c; I haven't looked into whether we can do both.
v2: Actually do it right. Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> --- src/mesa/drivers/dri/i965/gen8_ps_state.c | 38 ++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 57bf053..a834b85 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -183,10 +183,6 @@ upload_ps_state(struct brw_context *brw) if (brw->wm.prog_data->nr_params > 0) dw6 |= GEN7_PS_PUSH_CONSTANT_ENABLE; - dw6 |= GEN7_PS_8_DISPATCH_ENABLE; - if (brw->wm.prog_data->prog_offset_16) - dw6 |= GEN7_PS_16_DISPATCH_ENABLE; - /* From the documentation for this packet: * "If the PS kernel does not need the Position XY Offsets to * compute a Position Value, then this field should be programmed @@ -205,13 +201,39 @@ upload_ps_state(struct brw_context *brw) else dw6 |= GEN7_PS_POSOFFSET_NONE; - dw7 |= - brw->wm.prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 | - brw->wm.prog_data->first_curbe_grf_16<< GEN7_PS_DISPATCH_START_GRF_SHIFT_2; + /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16 + * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader + * is successfully compiled. In majority of the cases that bring us + * better performance than 'SIMD8 only' dispatch. + */ + int min_invocations_per_fragment = + _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false); + assert(min_invocations_per_fragment >= 1); + + if (brw->wm.prog_data->prog_offset_16) { + dw6 |= GEN7_PS_16_DISPATCH_ENABLE; + if (min_invocations_per_fragment == 1) { + dw6 |= GEN7_PS_8_DISPATCH_ENABLE; + dw7 |= (brw->wm.prog_data->first_curbe_grf << + GEN7_PS_DISPATCH_START_GRF_SHIFT_0); + dw7 |= (brw->wm.prog_data->first_curbe_grf_16 << + GEN7_PS_DISPATCH_START_GRF_SHIFT_2); + } else { + dw7 |= (brw->wm.prog_data->first_curbe_grf_16 << + GEN7_PS_DISPATCH_START_GRF_SHIFT_0); + } + } else { + dw6 |= GEN7_PS_8_DISPATCH_ENABLE; + dw7 |= (brw->wm.prog_data->first_curbe_grf << + GEN7_PS_DISPATCH_START_GRF_SHIFT_0); + } BEGIN_BATCH(12); OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2)); - OUT_BATCH(brw->wm.base.prog_offset); + if (brw->wm.prog_data->prog_offset_16 && min_invocations_per_fragment > 1) + OUT_BATCH(brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16); + else + OUT_BATCH(brw->wm.base.prog_offset); OUT_BATCH(0); OUT_BATCH(dw3); if (brw->wm.prog_data->total_scratch) { -- 1.8.4.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev