On 22 May 2012 11:31, Kenneth Graunke <kenn...@whitecape.org> wrote: > On 05/11/2012 11:03 AM, Paul Berry wrote: > >> This patch modifies the "blorp" WM program so that it can be run in >> MSDISPMODE_PERSAMPLE (which means that every single sample of a >> multisampled render target is dispatched to the WM program, not just >> every pixel). >> >> Previously we were using the ugly hack of configuring multisampled >> destination surfaces as single-sampled, and generating sample indices >> other than zero by swizzling the pixel coordinates in the WM program. >> --- >> src/mesa/drivers/dri/i965/brw_**blorp.h | 12 ++++ >> src/mesa/drivers/dri/i965/brw_**blorp_blit.cpp | 87 >> +++++++++++++++++++------- >> src/mesa/drivers/dri/i965/**gen6_blorp.cpp | 5 +- >> src/mesa/drivers/dri/i965/**gen7_blorp.cpp | 10 ++- >> 4 files changed, 87 insertions(+), 27 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp.h >> b/src/mesa/drivers/dri/i965/**brw_blorp.h >> index f14a5c7..b911356 100644 >> --- a/src/mesa/drivers/dri/i965/**brw_blorp.h >> +++ b/src/mesa/drivers/dri/i965/**brw_blorp.h >> @@ -132,6 +132,12 @@ const unsigned int BRW_BLORP_NUM_PUSH_CONST_REGS = >> struct brw_blorp_prog_data >> { >> unsigned int first_curbe_grf; >> + >> + /** >> + * True if the WM program should be run in MSDISPMODE_PERSAMPLE with >> more >> + * than one sample per pixel. >> + */ >> + bool persample_msaa_dispatch; >> }; >> >> class brw_blorp_params >> @@ -218,6 +224,12 @@ struct brw_blorp_blit_prog_key >> * pixels that are outside the destination rectangle. >> */ >> bool use_kill; >> + >> + /** >> + * True if the WM program should be run in MSDISPMODE_PERSAMPLE with >> more >> + * than one sample per pixel. >> + */ >> + bool persample_msaa_dispatch; >> }; >> >> class brw_blorp_blit_params : public brw_blorp_params >> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp_blit.cpp >> b/src/mesa/drivers/dri/i965/**brw_blorp_blit.cpp >> index e985fad..07e9dd7 100644 >> --- a/src/mesa/drivers/dri/i965/**brw_blorp_blit.cpp >> +++ b/src/mesa/drivers/dri/i965/**brw_blorp_blit.cpp >> @@ -437,13 +437,14 @@ brw_blorp_blit_program::**compile(struct >> brw_context *brw, >> GLuint *program_size) >> { >> /* Sanity checks */ >> - if (key->dst_tiled_w) { >> - /* If the destination image is W tiled, then dst_samples must be 0. >> - * Otherwise, after conversion between W and Y tiling, there's no >> + if (key->dst_tiled_w&& key->rt_samples> 0) { >> >> + /* If the destination image is W tiled and multisampled, then the >> thread >> + * must be dispatched once per sample, not once per pixel. This is >> + * necessary because after conversion between W and Y tiling, >> there's no >> * guarantee that all samples corresponding to a single pixel will >> still >> * be together. >> */ >> - assert(key->rt_samples == 0); >> + assert(key->persample_msaa_**dispatch); >> } >> >> if (key->blend) { >> @@ -459,6 +460,17 @@ brw_blorp_blit_program::**compile(struct >> brw_context *brw, >> assert(key->tex_samples> 0); >> } >> >> + if (key->persample_msaa_dispatch) { >> + /* It only makes sense to do persample dispatch if the render >> target is >> + * configured as multisampled. >> + */ >> + assert(key->rt_samples> 0); >> + } >> + >> + /* Set up prog_data */ >> + memset(&prog_data, 0, sizeof(prog_data)); >> + prog_data.persample_msaa_**dispatch = key->persample_msaa_dispatch; >> + >> brw_set_compression_control(&**func, BRW_COMPRESSION_NONE); >> >> alloc_regs(); >> @@ -658,11 +670,29 @@ brw_blorp_blit_program::**compute_frag_coords() >> */ >> brw_ADD(&func, Y, stride(suboffset(R1, 5), 2, 4, 0), >> brw_imm_v(0x11001100)); >> >> - /* Since we always run the WM in a mode that causes a single fragment >> - * dispatch per pixel, it's not meaningful to compute a sample value. >> Just >> - * set it to 0. >> - */ >> - s_is_zero = true; >> + if (key->persample_msaa_dispatch) { >> + /* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples> 0. >> + * Therefore, subspan 0 will represent sample 0, subspan 1 will >> + * represent sample 1, and so on. >> + * >> + * So we need to populate S with the sequence (0, 0, 0, 0, 1, 1, >> 1, 1, >> + * 2, 2, 2, 2, 3, 3, 3, 3). The easiest way to do this is to >> populate a >> + * temporary variable with the sequence (0, 1, 2, 3), and then >> copy from >> + * it using vstride=1, width=4, hstride=0. >> + * >> + * TODO: implement appropriate calculation for Gen7. >> + */ >> > > I think you mean: > > TODO: implement the necessary calculation for 8x multisampling. >
You're right. Good catch. > > Otherwise, > Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> > > + brw_MOV(&func, t1, brw_imm_v(0x3210)); >> + brw_MOV(&func, S, stride(t1, 1, 4, 0)); >> + s_is_zero = false; >> + } else { >> + /* Either the destination surface is single-sampled, or the WM >> will be >> + * run in MSDISPMODE_PERPIXEL (which causes a single fragment >> dispatch >> + * per pixel). In either case, it's not meaningful to compute a >> sample >> + * value. Just set it to 0. >> + */ >> + s_is_zero = true; >> + } >> } >> >> /** >> @@ -1065,22 +1095,23 @@ brw_blorp_blit_params::brw_**blorp_blit_params(struct >> intel_mipmap_tree *src_mt, >> use_wm_prog = true; >> memset(&wm_prog_key, 0, sizeof(wm_prog_key)); >> >> - if (dst.map_stencil_as_y_tiled) { >> - /* If the destination surface is a W-tiled stencil buffer that >> we're >> - * mapping as Y tiled, then we need to set up the surface state as >> - * single-sampled, because the memory layout of related samples >> doesn't >> - * match between W and Y tiling. >> + if (dst.map_stencil_as_y_tiled&& dst.num_samples> 0) { >> >> + /* If the destination surface is a W-tiled multisampled stencil >> buffer >> + * that we're mapping as Y tiled, then we need to arrange for the >> WM >> + * program to run once per sample rather than once per pixel, >> because >> + * the memory layout of related samples doesn't match between W >> and Y >> + * tiling. >> */ >> - dst.num_samples = 0; >> + wm_prog_key.persample_msaa_**dispatch = true; >> } >> >> - if (src_mt->num_samples> 0&& dst_mt->num_samples> 0) { >> + if (src.num_samples> 0&& dst.num_samples> 0) { >> >> /* We are blitting from a multisample buffer to a multisample >> buffer, so >> * we must preserve samples within a pixel. This means we have to >> - * configure the render target as single-sampled, so that the WM >> program >> - * generate each sample separately. >> + * arrange for the WM program to run once per sample rather than >> once >> + * per pixel. >> */ >> - dst.num_samples = 0; >> + wm_prog_key.persample_msaa_**dispatch = true; >> } >> >> /* The render path must be configured to use the same number of >> samples as >> @@ -1138,12 +1169,22 @@ brw_blorp_blit_params::brw_**blorp_blit_params(struct >> intel_mipmap_tree *src_mt, >> * dimensions 64x64. We must also align it to a multiple of the >> tile >> * size, because the differences between W and Y tiling formats >> will >> * mean that pixels are scrambled within the tile. >> + * >> + * Note: if the destination surface configured as an MSAA surface, >> then >> + * the effective tile size we need to align it to is smaller, >> because >> + * each pixel covers a 2x2 or a 4x2 block of samples. >> + * >> * TODO: what if this makes the coordinates too large? >> */ >> - x0 = (x0 * 2)& ~127; >> - y0 = (y0 / 2)& ~31; >> >> - x1 = ALIGN(x1 * 2, 128); >> - y1 = ALIGN(y1 / 2, 32); >> + unsigned x_align = 64, y_align = 64; >> + if (dst_mt->num_samples> 0) { >> + x_align /= (dst_mt->num_samples == 4 ? 2 : 4); >> + y_align /= 2; >> + } >> + x0 = (x0& ~(x_align - 1)) * 2; >> + y0 = (y0& ~(y_align - 1)) / 2; >> >> + x1 = ALIGN(x1, x_align) * 2; >> + y1 = ALIGN(y1, y_align) / 2; >> wm_prog_key.use_kill = true; >> } >> } >> diff --git a/src/mesa/drivers/dri/i965/**gen6_blorp.cpp >> b/src/mesa/drivers/dri/i965/**gen6_blorp.cpp >> index 8eed9dc..85a8ee6 100644 >> --- a/src/mesa/drivers/dri/i965/**gen6_blorp.cpp >> +++ b/src/mesa/drivers/dri/i965/**gen6_blorp.cpp >> @@ -742,7 +742,10 @@ gen6_blorp_emit_wm_config(**struct brw_context *brw, >> >> if (params->num_samples> 0) { >> dw6 |= GEN6_WM_MSRAST_ON_PATTERN; >> - dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; >> + if (prog_data&& prog_data->persample_msaa_**dispatch) >> >> + dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; >> + else >> + dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; >> } else { >> dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; >> dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; >> diff --git a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> index fbb94df..e5b27dd 100644 >> --- a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> +++ b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> @@ -399,7 +399,8 @@ gen7_blorp_emit_sf_config(**struct brw_context *brw, >> */ >> static void >> gen7_blorp_emit_wm_config(**struct brw_context *brw, >> - const brw_blorp_params *params) >> + const brw_blorp_params *params, >> + brw_blorp_prog_data *prog_data) >> { >> struct intel_context *intel =&brw->intel; >> >> >> @@ -431,7 +432,10 @@ gen7_blorp_emit_wm_config(**struct brw_context *brw, >> >> if (params->num_samples> 0) { >> dw1 |= GEN7_WM_MSRAST_ON_PATTERN; >> - dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; >> + if (prog_data&& prog_data->persample_msaa_**dispatch) >> >> + dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; >> + else >> + dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; >> } else { >> dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; >> dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; >> @@ -757,7 +761,7 @@ gen7_blorp_exec(struct intel_context *intel, >> gen7_blorp_emit_streamout_**disable(brw, params); >> gen6_blorp_emit_clip_disable(**brw, params); >> gen7_blorp_emit_sf_config(brw, params); >> - gen7_blorp_emit_wm_config(brw, params); >> + gen7_blorp_emit_wm_config(brw, params, prog_data); >> if (params->use_wm_prog) { >> gen7_blorp_emit_binding_table_**pointers_ps(brw, params, >> wm_bind_bo_offset); >> > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev