On 22 May 2013 12:30, Ian Romanick <i...@freedesktop.org> wrote: > On 05/21/2013 04:52 PM, Paul Berry wrote: > >> Since we defer allocation of the MCS miptree until the time of the >> fast clear operation, this patch also implements creation of the MCS >> miptree. >> >> In addition, this patch adds the field >> intel_mipmap_tree::fast_clear_**color_value, which holds the most recent >> fast color clear value, if any. We use it to set the SURFACE_STATE's >> clear color for render targets. >> --- >> src/mesa/drivers/dri/i965/brw_**blorp.cpp | 1 + >> src/mesa/drivers/dri/i965/brw_**blorp.h | 11 +- >> src/mesa/drivers/dri/i965/brw_**blorp_clear.cpp | 143 >> +++++++++++++++++++++- >> src/mesa/drivers/dri/i965/brw_**clear.c | 2 +- >> src/mesa/drivers/dri/i965/brw_**defines.h | 2 + >> src/mesa/drivers/dri/i965/**gen7_blorp.cpp | 18 ++- >> src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c | 10 +- >> src/mesa/drivers/dri/intel/**intel_mipmap_tree.c | 47 +++++++ >> src/mesa/drivers/dri/intel/**intel_mipmap_tree.h | 13 ++ >> 9 files changed, 233 insertions(+), 14 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp.cpp >> b/src/mesa/drivers/dri/i965/**brw_blorp.cpp >> index 20f7153..c6019d1 100644 >> --- a/src/mesa/drivers/dri/i965/**brw_blorp.cpp >> +++ b/src/mesa/drivers/dri/i965/**brw_blorp.cpp >> @@ -147,6 +147,7 @@ brw_blorp_params::brw_blorp_**params() >> y1(0), >> depth_format(0), >> hiz_op(GEN6_HIZ_OP_NONE), >> + fast_clear_op(GEN7_FAST_CLEAR_**OP_NONE), >> num_samples(0), >> use_wm_prog(false) >> { >> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp.h >> b/src/mesa/drivers/dri/i965/**brw_blorp.h >> index 6360a62..687d7eb 100644 >> --- a/src/mesa/drivers/dri/i965/**brw_blorp.h >> +++ b/src/mesa/drivers/dri/i965/**brw_blorp.h >> @@ -46,7 +46,8 @@ brw_blorp_blit_miptrees(struct intel_context *intel, >> bool mirror_x, bool mirror_y); >> >> bool >> -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer >> *fb); >> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer >> *fb, >> + bool partial_clear); >> >> #ifdef __cplusplus >> } /* end extern "C" */ >> @@ -195,6 +196,13 @@ struct brw_blorp_prog_data >> bool persample_msaa_dispatch; >> }; >> >> + >> +enum gen7_fast_clear_op { >> + GEN7_FAST_CLEAR_OP_NONE, >> + GEN7_FAST_CLEAR_OP_FAST_CLEAR, >> +}; >> + >> + >> class brw_blorp_params >> { >> public: >> @@ -212,6 +220,7 @@ public: >> brw_blorp_surface_info src; >> brw_blorp_surface_info dst; >> enum gen6_hiz_op hiz_op; >> + enum gen7_fast_clear_op fast_clear_op; >> unsigned num_samples; >> bool use_wm_prog; >> brw_blorp_wm_push_constants wm_push_consts; >> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp_clear.cpp >> b/src/mesa/drivers/dri/i965/**brw_blorp_clear.cpp >> index 28d7ad0..675289b 100644 >> --- a/src/mesa/drivers/dri/i965/**brw_blorp_clear.cpp >> +++ b/src/mesa/drivers/dri/i965/**brw_blorp_clear.cpp >> @@ -49,7 +49,8 @@ public: >> brw_blorp_clear_params(struct brw_context *brw, >> struct gl_framebuffer *fb, >> struct gl_renderbuffer *rb, >> - GLubyte *color_mask); >> + GLubyte *color_mask, >> + bool partial_clear); >> >> virtual uint32_t get_wm_prog(struct brw_context *brw, >> brw_blorp_prog_data **prog_data) const; >> @@ -105,10 +106,49 @@ brw_blorp_clear_program::~brw_** >> blorp_clear_program() >> ralloc_free(mem_ctx); >> } >> >> + >> +/** >> + * Determine if fast color clear supports the given clear color. >> + * >> + * Fast color clear can only clear to color values of 1.0 or 0.0. At the >> + * moment we only support floating point buffers. >> + */ >> +static bool >> +is_color_fast_clear_**compatible(gl_format format, >> + const union gl_color_union *color) >> +{ >> + if (_mesa_is_format_integer_**color(format)) >> + return false; >> + >> + for (int i = 0; i < 4; i++) { >> + if (color->f[i] != 0.0 && color->f[i] != 1.0) >> + return false; >> > > Should this generate a perf debug message? Eric may have an opinion about > generating warnings for the non-fast path...
Sounds reasonable to me. We already have perf debug messages for other things that can inhibit fast clears (e.g. scissor preventing fast depth clear). I'll add it unless I hear an objection. > > + } >> + return true; >> +} >> + >> + >> +/** >> + * Convert the given color to a bitfield suitable for ORing into DWORD 7 >> of >> + * SURFACE_STATE. >> + */ >> +static uint32_t >> +compute_fast_clear_color_**bits(const union gl_color_union *color) >> +{ >> + uint32_t bits = 0; >> + for (int i = 0; i < 4; i++) { >> + if (color->f[i] != 0.0) >> + bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_**SHIFT + (3 - i)); >> + } >> + return bits; >> +} >> + >> + >> brw_blorp_clear_params::brw_**blorp_clear_params(struct brw_context >> *brw, >> struct gl_framebuffer >> *fb, >> struct gl_renderbuffer >> *rb, >> - GLubyte *color_mask) >> + GLubyte *color_mask, >> + bool partial_clear) >> { >> struct intel_context *intel = &brw->intel; >> struct gl_context *ctx = &intel->ctx; >> @@ -163,6 +203,56 @@ brw_blorp_clear_params::brw_**blorp_clear_params(struct >> brw_context *brw, >> wm_prog_key.use_simd16_**replicated_data = false; >> } >> } >> + >> + /* If we can do this as a fast color clear, do so. */ >> + if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear && >> + wm_prog_key.use_simd16_**replicated_data && >> + is_color_fast_clear_**compatible(format, >> &ctx->Color.ClearColor)) { >> + memset(push_consts, 0xff, 4*sizeof(float)); >> + fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR; >> + >> + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render >> + * Target(s)", beneath the "Fast Color Clear" bullet (p327): >> + * >> + * Clear pass must have a clear rectangle that must follow >> alignment >> + * rules in terms of pixels and lines as shown in the table >> + * below. Further, the clear-rectangle height and width must be >> + * multiple of the following dimensions. If the height and >> width of >> + * the render target being cleared do not meet these >> requirements, >> + * an MCS buffer can be created such that it follows the >> requirement >> + * and covers the RT. >> + * >> + * The alignment size in the table that follows is related to the >> + * alignment size returned by intel_get_non_msrt_mcs_**alignment(), >> but >> + * with X alignment multiplied by 16 and Y alignment multiplied by >> 32. >> + */ >> + unsigned x_align, y_align; >> + intel_get_non_msrt_mcs_**alignment(intel, irb->mt, &x_align, >> &y_align); >> + x_align *= 16; >> + y_align *= 32; >> + x0 = ROUND_DOWN_TO(x0, x_align); >> + y0 = ROUND_DOWN_TO(y0, y_align); >> + x1 = ALIGN(x1, x_align); >> + y1 = ALIGN(y1, y_align); >> + >> + /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render >> + * Target(s)", beneath the "Fast Color Clear" bullet (p327): >> + * >> + * In order to optimize the performance MCS buffer (when bound >> to 1X >> + * RT) clear similarly to MCS buffer clear for MSRT case, >> clear rect >> + * is required to be scaled by the following factors in the >> + * horizontal and vertical directions: >> + * >> + * The X and Y scale down factors in the table that follows are >> each >> + * equal to half the alignment value computed above. >> + */ >> + unsigned x_scaledown = x_align / 2; >> + unsigned y_scaledown = y_align / 2; >> + x0 /= x_scaledown; >> + y0 /= y_scaledown; >> + x1 /= x_scaledown; >> + y1 /= y_scaledown; >> + } >> } >> >> uint32_t >> @@ -266,7 +356,8 @@ brw_blorp_clear_program::**compile(struct >> brw_context *brw, >> >> extern "C" { >> bool >> -brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer >> *fb) >> +brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer >> *fb, >> + bool partial_clear) >> { >> struct gl_context *ctx = &intel->ctx; >> struct brw_context *brw = brw_context(ctx); >> @@ -288,6 +379,7 @@ brw_blorp_clear_color(struct intel_context *intel, >> struct gl_framebuffer *fb) >> >> for (unsigned buf = 0; buf < ctx->DrawBuffer->_**NumColorDrawBuffers; >> buf++) { >> struct gl_renderbuffer *rb = ctx->DrawBuffer->_** >> ColorDrawBuffers[buf]; >> + struct intel_renderbuffer *irb = intel_renderbuffer(rb); >> >> /* If this is an ES2 context or GL_ARB_ES2_compatibility is >> supported, >> * the framebuffer can be complete with some attachments missing. >> In >> @@ -296,8 +388,51 @@ brw_blorp_clear_color(struct intel_context *intel, >> struct gl_framebuffer *fb) >> if (rb == NULL) >> continue; >> >> - brw_blorp_clear_params params(brw, fb, rb, >> ctx->Color.ColorMask[buf]); >> + brw_blorp_clear_params params(brw, fb, rb, >> ctx->Color.ColorMask[buf], >> + partial_clear); >> + >> + bool is_fast_clear = >> + (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR)**; >> + if (is_fast_clear) { >> + /* Record the clear color in the miptree so that it will be >> + * programmed in SURFACE_STATE by later rendering and resolve >> + * operations. >> + */ >> + uint32_t new_color_value = >> + compute_fast_clear_color_bits(**&ctx->Color.ClearColor); >> + if (irb->mt->fast_clear_color_**value != new_color_value) { >> + irb->mt->fast_clear_color_**value = new_color_value; >> + brw->state.dirty.brw |= BRW_NEW_SURFACES; >> + } >> + >> + /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear >> is >> + * redundant and can be skipped. >> + */ >> + if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR) >> + continue; >> + >> + /* If the MCS buffer hasn't been allocated yet, we need to >> allocate >> + * it now. >> + */ >> + if (!irb->mt->mcs_mt && >> + !intel_miptree_alloc_non_msrt_**mcs(intel, irb->mt)) { >> + /* MCS allocation failed--probably this will only happen in >> + * out-of-memory conditions. But in any case, try to >> recover by >> + * falling back to a non-blorp clear technique. >> + */ >> + return false; >> + } >> + } >> + >> brw_blorp_exec(intel, ¶ms); >> + >> + if (is_fast_clear) { >> + /* Now that the fast clear has occurred, put the buffer in >> + * INTEL_MCS_STATE_CLEAR so that we won't waste time doing >> redundant >> + * clears. >> + */ >> + irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR; >> + } >> } >> >> return true; >> diff --git a/src/mesa/drivers/dri/i965/**brw_clear.c >> b/src/mesa/drivers/dri/i965/**brw_clear.c >> index 2b999bf..80b7a0c 100644 >> --- a/src/mesa/drivers/dri/i965/**brw_clear.c >> +++ b/src/mesa/drivers/dri/i965/**brw_clear.c >> @@ -234,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask) >> /* BLORP is currently only supported on Gen6+. */ >> if (intel->gen >= 6) { >> if (mask & BUFFER_BITS_COLOR) { >> - if (brw_blorp_clear_color(intel, fb)) { >> + if (brw_blorp_clear_color(intel, fb, partial_clear)) { >> debug_mask("blorp color", mask & BUFFER_BITS_COLOR); >> mask &= ~BUFFER_BITS_COLOR; >> } >> diff --git a/src/mesa/drivers/dri/i965/**brw_defines.h >> b/src/mesa/drivers/dri/i965/**brw_defines.h >> index fedd78c..90b16ab 100644 >> --- a/src/mesa/drivers/dri/i965/**brw_defines.h >> +++ b/src/mesa/drivers/dri/i965/**brw_defines.h >> @@ -555,6 +555,7 @@ >> #define GEN7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3) >> >> /* Surface state DW7 */ >> +#define GEN7_SURFACE_CLEAR_COLOR_SHIFT 28 >> #define GEN7_SURFACE_SCS_R_SHIFT 25 >> #define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25) >> #define GEN7_SURFACE_SCS_G_SHIFT 22 >> @@ -1613,6 +1614,7 @@ enum brw_wm_barycentric_interp_mode { >> # define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11) >> # define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10) >> # define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9) >> +# define GEN7_PS_RENDER_TARGET_FAST_**CLEAR_ENABLE (1 << 8) >> # define GEN7_PS_DUAL_SOURCE_BLEND_**ENABLE (1 << 7) >> # define GEN7_PS_POSOFFSET_NONE (0 << 3) >> # define GEN7_PS_POSOFFSET_CENTROID (2 << 3) >> diff --git a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> index 2d09c7f..5f7e10f 100644 >> --- a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> +++ b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp >> @@ -202,11 +202,13 @@ gen7_blorp_emit_surface_state(**struct brw_context >> *brw, >> is_render_target); >> } >> >> + surf[7] = surface->mt->fast_clear_color_**value; >> + >> if (intel->is_haswell) { >> - surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | >> - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | >> - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | >> - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); >> + surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | >> + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | >> + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | >> + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A)); >> } >> >> /* Emit relocation to surface contents */ >> @@ -587,6 +589,14 @@ gen7_blorp_emit_ps_config(**struct brw_context *brw, >> dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_* >> *SHIFT_0; >> } >> >> + switch (params->fast_clear_op) { >> + case GEN7_FAST_CLEAR_OP_FAST_CLEAR: >> + dw4 |= GEN7_PS_RENDER_TARGET_FAST_**CLEAR_ENABLE; >> + break; >> + default: >> + break; >> + } >> + >> BEGIN_BATCH(8); >> OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); >> OUT_BATCH(params->use_wm_prog ? prog_offset : 0); >> diff --git a/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c >> b/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c >> index f5d2e43..fda4b2c 100644 >> --- a/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c >> +++ b/src/mesa/drivers/dri/i965/**gen7_wm_surface_state.c >> @@ -589,11 +589,13 @@ gen7_update_renderbuffer_**surface(struct >> brw_context *brw, >> irb->mt->mcs_mt, true /* is RT */); >> } >> >> + surf[7] = irb->mt->fast_clear_color_**value; >> + >> if (intel->is_haswell) { >> - surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | >> - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | >> - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | >> - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); >> + surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | >> + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | >> + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | >> + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A)); >> } >> >> drm_intel_bo_emit_reloc(brw->i**ntel.batch.bo<http://intel.batch.bo> >> , >> diff --git a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c >> b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c >> index 9d1b91a..657532f 100644 >> --- a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c >> +++ b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.c >> @@ -1163,6 +1163,53 @@ intel_miptree_alloc_mcs(struct intel_context >> *intel, >> #endif >> } >> >> + >> +bool >> +intel_miptree_alloc_non_msrt_**mcs(struct intel_context *intel, >> + struct intel_mipmap_tree *mt) >> +{ >> +#ifdef I915 >> + assert(!"MCS not supported on i915"); >> +#else >> + assert(mt->mcs_mt == NULL); >> + >> + /* The format of the MCS buffer is opaque to the driver; all that >> matters >> + * is that we get its size and pitch right. We'll pretend that the >> format >> + * is R32. Since an MCS tile covers 128 blocks horizontally, and a >> Y-tiled >> + * R32 buffer is 32 pixels across, we'll need to scale the width down >> by >> + * the block width and then a further factor of 4. Since an MCS tile >> + * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows >> high, >> + * we'll need to scale the height down by the block height and then a >> + * further factor of 8. >> + */ >> + const gl_format format = MESA_FORMAT_R_UINT32; >> + unsigned block_width_px; >> + unsigned block_height; >> + intel_get_non_msrt_mcs_**alignment(intel, mt, &block_width_px, >> &block_height); >> + unsigned width_divisor = block_width_px * 4; >> + unsigned height_divisor = block_height * 8; >> + unsigned mcs_width = >> + ALIGN(mt->logical_width0, width_divisor) / width_divisor; >> + unsigned mcs_height = >> + ALIGN(mt->logical_height0, height_divisor) / height_divisor; >> + assert(mt->logical_depth0 == 1); >> + mt->mcs_mt = intel_miptree_create(intel, >> + mt->target, >> + format, >> + mt->first_level, >> + mt->last_level, >> + mcs_width, >> + mcs_height, >> + mt->logical_depth0, >> + true, >> + 0 /* num_samples */, >> + true /* force_y_tiling */); >> + >> + return mt->mcs_mt; >> +#endif >> +} >> + >> + >> /** >> * Helper for intel_miptree_alloc_hiz() that sets >> * \c mt->level[level].slice[layer].**has_hiz. Return true if and only >> if >> diff --git a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h >> b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h >> index 5cd69cb..4c9ff94 100644 >> --- a/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h >> +++ b/src/mesa/drivers/dri/intel/**intel_mipmap_tree.h >> @@ -463,6 +463,15 @@ struct intel_mipmap_tree >> enum intel_mcs_state mcs_state; >> #endif >> >> + /** >> + * The SURFACE_STATE bits associated with the last fast color clear >> to this >> + * color mipmap tree, if any. >> + * >> + * This value will only ever contain ones in bits 28-31, so it is >> safe to >> + * OR into dword 7 of SURFACE_STATE. >> + */ >> + uint32_t fast_clear_color_value; >> + >> /* These are also refcounted: >> */ >> GLuint refcount; >> @@ -477,6 +486,10 @@ intel_get_non_msrt_mcs_**alignment(struct >> intel_context *intel, >> struct intel_mipmap_tree *mt, >> unsigned *width_px, unsigned *height); >> >> +bool >> +intel_miptree_alloc_non_msrt_**mcs(struct intel_context *intel, >> + struct intel_mipmap_tree *mt); >> + >> struct intel_mipmap_tree *intel_miptree_create(struct intel_context >> *intel, >> GLenum target, >> gl_format format, >> >> >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev