On Wed, May 31, 2017 at 8:43 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> On Wed, May 31, 2017 at 6:03 AM, Pohjolainen, Topi < > topi.pohjolai...@gmail.com> wrote: > >> On Fri, May 26, 2017 at 04:30:31PM -0700, Jason Ekstrand wrote: >> > This commit reworks the resolve tracking for CCS and MCS to use the new >> > isl_aux_state enum. This should provide much more accurate and easy to >> > reason about tracking. In order to understand, for instance, the >> > intel_miptree_prepare_ccs_access function, one only has to go look at >> > the giant comment for the isl_aux_state enum and follow the arrows. >> > Unfortunately, there's no good way to split this up without making a >> > real mess so there are a bunch of changes in here: >> > >> > 1) We now do partial resolves. I really have no idea how this ever >> > worked before. So far as I can tell, the only time the old code >> > ever did a partial resolve was when it was using CCS_D where a >> > partial resolve and a full resolve are the same thing. >> > >> > 2) We are now tracking 4 states instead of 3 for CCS_E. In particular, >> > we distinguish between compressed with clear and compressed without >> > clear. The end result is that you will never get two partial >> > resolves in a row. >> > >> > 3) The texture view rules are now more correct. Previously, we would >> > only bail if compression was not supported by the destination >> > format. However, this is not actually correct. Not all format >> > pairs are supported for texture views with CCS even if both support >> > CCS individually. Fortunately, ISL has a helper for this. >> > >> > 4) We are no longer using intel_resolve_map for tracking aux state but >> > are instead using a simple array of enum isl_aux_state indexed by >> > level and layer. This is because, now that we're tracking 4 >> > different states, it's no longer clear which should be the "default" >> > and array lookups are faster than linked list searches. >> > >> > 5) The new code is very assert-happy. Incorrect transitions will now >> > get caught by assertions rather than by rendering corruption. >> > --- >> > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 479 >> +++++++++++++++++--------- >> > src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 39 +-- >> > src/mesa/drivers/dri/i965/intel_resolve_map.h | 62 +--- >> > 3 files changed, 320 insertions(+), 260 deletions(-) >> > >> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> > index cae8358..3e30b2a 100644 >> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> > @@ -326,7 +326,7 @@ intel_miptree_create_layout(struct brw_context >> *brw, >> > mt->aux_disable |= INTEL_AUX_DISABLE_CCS; >> > mt->is_scanout = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) != 0; >> > exec_list_make_empty(&mt->hiz_map); >> > - exec_list_make_empty(&mt->color_resolve_map); >> > + mt->aux_state = NULL; >> > mt->cpp = _mesa_get_format_bytes(format); >> > mt->num_samples = num_samples; >> > mt->compressed = _mesa_is_format_compressed(format); >> > @@ -576,6 +576,46 @@ intel_lower_compressed_format(struct brw_context >> *brw, mesa_format format) >> > } >> > } >> > >> > +static enum isl_aux_state ** >> > +create_aux_state_map(struct intel_mipmap_tree *mt, >> > + enum isl_aux_state initial) >> > +{ >> > + const uint32_t levels = mt->last_level + 1; >> > + >> > + uint32_t total_slices = 0; >> > + for (uint32_t level = 0; level < levels; level++) >> > + total_slices += mt->level[level].depth; >> > + >> > + const size_t per_level_array_size = levels * sizeof(enum >> isl_aux_state *); >> > + >> > + /* We're going to allocate a single chunk of data for both the >> per-level >> > + * reference array and the arrays of aux_state. This makes cleanup >> > + * significantly easier. >> > + */ >> > + const size_t total_size = per_level_array_size + >> > + total_slices * sizeof(enum isl_aux_state); >> > + void *data = malloc(total_size); >> > + if (data == NULL) >> > + return NULL; >> > + >> > + enum isl_aux_state **per_level_arr = data; >> > + enum isl_aux_state *s = data + per_level_array_size; >> > + for (uint32_t level = 0; level < levels; level++) { >> > + per_level_arr[level] = s; >> > + for (uint32_t a = 0; a < mt->level[level].depth; a++) >> > + *(s++) = initial; >> > + } >> > + assert((void *)s == data + total_size); >> > + >> > + return per_level_arr; >> > +} >> > + >> > +static void >> > +free_aux_state_map(enum isl_aux_state **state) >> > +{ >> > + free(state); >> > +} >> > + >> > static struct intel_mipmap_tree * >> > miptree_create(struct brw_context *brw, >> > GLenum target, >> > @@ -935,7 +975,7 @@ intel_miptree_release(struct intel_mipmap_tree **mt) >> > free((*mt)->mcs_buf); >> > } >> > intel_resolve_map_clear(&(*mt)->hiz_map); >> > - intel_resolve_map_clear(&(*mt)->color_resolve_map); >> > + free_aux_state_map((*mt)->aux_state); >> > >> > intel_miptree_release(&(*mt)->plane[0]); >> > intel_miptree_release(&(*mt)->plane[1]); >> > @@ -1482,27 +1522,31 @@ intel_miptree_alloc_mcs(struct brw_context *brw, >> > unreachable("Unrecognized sample count in >> intel_miptree_alloc_mcs"); >> > }; >> > >> > + /* Multisampled miptrees are only supported for single level. */ >> > + assert(mt->first_level == 0); >> > + enum isl_aux_state **aux_state = >> > + create_aux_state_map(mt, ISL_AUX_STATE_CLEAR); >> > + if (!aux_state) >> > + return false; >> > + >> > mt->mcs_buf = >> > intel_mcs_miptree_buf_create(brw, mt, >> > format, >> > mt->logical_width0, >> > mt->logical_height0, >> > MIPTREE_LAYOUT_ACCELERATED_UPLOAD); >> > - if (!mt->mcs_buf) >> > + if (!mt->mcs_buf) { >> > + free(aux_state); >> > return false; >> > + } >> > >> > - intel_miptree_init_mcs(brw, mt, 0xFF); >> > + mt->aux_state = aux_state; >> > >> > - /* Multisampled miptrees are only supported for single level. */ >> > - assert(mt->first_level == 0); >> > - intel_miptree_set_fast_clear_state(brw, mt, mt->first_level, 0, >> > - mt->logical_depth0, >> > - INTEL_FAST_CLEAR_STATE_CLEAR); >> > + intel_miptree_init_mcs(brw, mt, 0xFF); >> > >> > return true; >> > } >> > >> > - >> > bool >> > intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, >> > struct intel_mipmap_tree *mt, >> > @@ -1528,6 +1572,13 @@ intel_miptree_alloc_non_msrt_mcs(struct >> brw_context *brw, >> > if (!buf) >> > return false; >> > >> > + enum isl_aux_state **aux_state = >> > + create_aux_state_map(mt, ISL_AUX_STATE_PASS_THROUGH); >> > + if (!aux_state) { >> > + free(buf); >> > + return false; >> > + } >> > + >> > buf->size = temp_ccs_surf.size; >> > buf->pitch = temp_ccs_surf.row_pitch; >> > buf->qpitch = isl_surf_get_array_pitch_sa_rows(&temp_ccs_surf); >> > @@ -1549,10 +1600,12 @@ intel_miptree_alloc_non_msrt_mcs(struct >> brw_context *brw, >> > 1, I915_TILING_Y, &buf->pitch, >> alloc_flags); >> > if (!buf->bo) { >> > free(buf); >> > + free(aux_state); >> > return false; >> > } >> > >> > mt->mcs_buf = buf; >> > + mt->aux_state = aux_state; >> > >> > /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers are >> > * used for lossless compression which requires similar >> initialisation >> > @@ -1975,19 +2028,35 @@ intel_miptree_all_slices_resolve_depth(struct >> brw_context *brw, >> > BLORP_HIZ_OP_DEPTH_RESOLVE); >> > } >> > >> > -enum intel_fast_clear_state >> > -intel_miptree_get_fast_clear_state(const struct intel_mipmap_tree *mt, >> > - unsigned level, unsigned layer) >> > +bool >> > +intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt, >> > + unsigned start_level, unsigned >> num_levels, >> > + unsigned start_layer, unsigned >> num_layers) >> > { >> > - intel_miptree_check_level_layer(mt, level, layer); >> > + assert(_mesa_is_format_color_format(mt->format)); >> > >> > - const struct intel_resolve_map *item = >> > - intel_resolve_map_const_get(&mt->color_resolve_map, level, >> layer); >> > + if (!mt->mcs_buf) >> > + return false; >> > >> > - if (!item) >> > - return INTEL_FAST_CLEAR_STATE_RESOLVED; >> > + /* Clamp the level range to fit the miptree */ >> > + assert(start_level + num_levels >= start_level); >> >> This assert looks odd, and did you mean to have it after adjusting? >> > > No, this is a "does the sum overflow" assertion that we do right before we > do a sum. There's a bunch of places where we pass [0, UINT32_MAX] as the > range and if someone decided to pass [5, UINT32_MAX], it would overflow. > > That said, I think my intention was to do the more direct "if (num_levels > == INTEL_REMAINING_LEVELS)" check. I switch it to that. > Ok, I think I have something better now. Take a look at https://cgit.freedesktop.org/~jekstrand/mesa/commit/?h=wip/i965-resolve-rework-v3&id=7ae758d95bee414d677df845d4781254f9f334f3 If you like it, I'm happy to rebase a bit and scatter the changes through the series. > > + const uint32_t last_level = >> > + MIN2(mt->last_level, start_level + num_levels - 1); >> > + start_level = MAX2(mt->first_level, start_level); >> > + num_levels = last_level - start_level + 1; >> > + >> > + for (uint32_t level = start_level; level <= last_level; level++) { >> > + const uint32_t level_layers = MIN2(num_layers, >> mt->level[level].depth); >> > + for (unsigned a = 0; a < level_layers; a++) { >> > + enum isl_aux_state aux_state = >> > + intel_miptree_get_aux_state(mt, level, start_layer + a); >> > + assert(aux_state != ISL_AUX_STATE_AUX_INVALID); >> > + if (aux_state != ISL_AUX_STATE_PASS_THROUGH) >> > + return true; >> > + } >> > + } >> > >> > - return item->fast_clear_state; >> > + return false; >> > } >> > >> > static void >> > @@ -2014,135 +2083,188 @@ intel_miptree_check_color_resolve(const >> struct brw_context *brw, >> > (void)layer; >> > } >> > >> > -void >> > -intel_miptree_set_fast_clear_state(const struct brw_context *brw, >> > - struct intel_mipmap_tree *mt, >> > - unsigned level, >> > - unsigned first_layer, >> > - unsigned num_layers, >> > - enum intel_fast_clear_state >> new_state) >> > -{ >> > - /* Setting the state to resolved means removing the item from the >> list >> > - * altogether. >> > - */ >> > - assert(new_state != INTEL_FAST_CLEAR_STATE_RESOLVED); >> > +static enum blorp_fast_clear_op >> > +get_ccs_d_resolve_op(enum isl_aux_state aux_state, >> > + bool ccs_supported, bool fast_clear_supported) >> > +{ >> > + assert(ccs_supported == fast_clear_supported); >> > >> > - intel_miptree_check_color_resolve(brw, mt, level, first_layer); >> > + switch (aux_state) { >> > + case ISL_AUX_STATE_CLEAR: >> > + case ISL_AUX_STATE_COMPRESSED_CLEAR: >> > + if (!ccs_supported) >> > + return BLORP_FAST_CLEAR_OP_RESOLVE_FULL; >> > + else >> > + return BLORP_FAST_CLEAR_OP_NONE; >> > >> > - assert(first_layer + num_layers <= mt->physical_depth0); >> > + case ISL_AUX_STATE_PASS_THROUGH: >> > + return BLORP_FAST_CLEAR_OP_NONE; >> > >> > - for (unsigned i = 0; i < num_layers; i++) >> > - intel_resolve_map_set(&mt->color_resolve_map, level, >> > - first_layer + i, new_state); >> > -} >> > + case ISL_AUX_STATE_RESOLVED: >> > + case ISL_AUX_STATE_AUX_INVALID: >> > + case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: >> > + break; >> > + } >> > >> > -bool >> > -intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt, >> > - unsigned start_level, unsigned >> num_levels, >> > - unsigned start_layer, unsigned >> num_layers) >> > -{ >> > - return intel_resolve_map_find_any(&mt->color_resolve_map, >> > - start_level, num_levels, >> > - start_layer, num_layers) != NULL; >> > + unreachable("Invalid aux state for CCS_D"); >> > } >> > >> > -void >> > -intel_miptree_used_for_rendering(const struct brw_context *brw, >> > - struct intel_mipmap_tree *mt, >> unsigned level, >> > - unsigned start_layer, unsigned >> num_layers) >> > +static enum blorp_fast_clear_op >> > +get_ccs_e_resolve_op(enum isl_aux_state aux_state, >> > + bool ccs_supported, bool fast_clear_supported) >> > { >> > - const bool is_lossless_compressed = >> > - intel_miptree_is_lossless_compressed(brw, mt); >> > + switch (aux_state) { >> > + case ISL_AUX_STATE_CLEAR: >> > + case ISL_AUX_STATE_COMPRESSED_CLEAR: >> > + if (!ccs_supported) >> > + return BLORP_FAST_CLEAR_OP_RESOLVE_FULL; >> > + else if (!fast_clear_supported) >> > + return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL; >> > + else >> > + return BLORP_FAST_CLEAR_OP_NONE; >> > >> > - for (unsigned i = 0; i < num_layers; ++i) { >> > - const enum intel_fast_clear_state fast_clear_state = >> > - intel_miptree_get_fast_clear_state(mt, level, start_layer + >> i); >> > + case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: >> > + if (!ccs_supported) >> > + return BLORP_FAST_CLEAR_OP_RESOLVE_FULL; >> > + else >> > + return BLORP_FAST_CLEAR_OP_NONE; >> > >> > - /* If the buffer was previously in fast clear state, change it to >> > - * unresolved state, since it won't be guaranteed to be clear >> after >> > - * rendering occurs. >> > - */ >> > - if (is_lossless_compressed || >> > - fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR) { >> > - intel_miptree_set_fast_clear_state( >> > - brw, mt, level, start_layer + i, 1, >> > - INTEL_FAST_CLEAR_STATE_UNRESOLVED); >> > - } >> > + case ISL_AUX_STATE_PASS_THROUGH: >> > + return BLORP_FAST_CLEAR_OP_NONE; >> > + >> > + case ISL_AUX_STATE_RESOLVED: >> > + case ISL_AUX_STATE_AUX_INVALID: >> > + break; >> > } >> > + >> > + unreachable("Invalid aux state for CCS_E"); >> > } >> > >> > -static bool >> > -intel_miptree_needs_color_resolve(const struct brw_context *brw, >> > - const struct intel_mipmap_tree *mt, >> > - int flags) >> > +static void >> > +intel_miptree_prepare_ccs_access(struct brw_context *brw, >> > + struct intel_mipmap_tree *mt, >> > + uint32_t level, uint32_t layer, >> > + bool aux_supported, >> > + bool fast_clear_supported) >> > { >> > - if (mt->aux_disable & INTEL_AUX_DISABLE_CCS) >> > - return false; >> > + enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, >> level, layer); >> > >> > - const bool is_lossless_compressed = >> > - intel_miptree_is_lossless_compressed(brw, mt); >> > + enum blorp_fast_clear_op resolve_op; >> > + if (intel_miptree_is_lossless_compressed(brw, mt)) { >> > + resolve_op = get_ccs_e_resolve_op(aux_state, aux_supported, >> > + fast_clear_supported); >> > + } else { >> > + resolve_op = get_ccs_d_resolve_op(aux_state, aux_supported, >> > + fast_clear_supported); >> > + } >> > >> > - /* From gen9 onwards there is new compression scheme for single >> sampled >> > - * surfaces called "lossless compressed". These don't need to be >> always >> > - * resolved. >> > - */ >> > - if ((flags & INTEL_MIPTREE_IGNORE_CCS_E) && is_lossless_compressed) >> > - return false; >> > + if (resolve_op != BLORP_FAST_CLEAR_OP_NONE) { >> > + intel_miptree_check_color_resolve(brw, mt, level, layer); >> > + brw_blorp_resolve_color(brw, mt, level, layer, resolve_op); >> > >> > - /* Fast color clear resolves only make sense for non-MSAA buffers. >> */ >> > - if (mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE && >> !is_lossless_compressed) >> > - return false; >> > + switch (resolve_op) { >> > + case BLORP_FAST_CLEAR_OP_RESOLVE_FULL: >> > + /* The CCS full resolve operation destroys the CCS and sets >> it to the >> > + * pass-through state. (You can also think of this as being >> both a >> > + * resolve and an ambiguate in one operation.) >> > + */ >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, >> > + ISL_AUX_STATE_PASS_THROUGH); >> > + break; >> > >> > - return true; >> > + case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL: >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, >> > + ISL_AUX_STATE_COMPRESSED_NO_C >> LEAR); >> > + break; >> > + >> > + default: >> > + unreachable("Invalid resolve op"); >> > + } >> > + } >> > } >> > >> > -static bool >> > -intel_miptree_resolve_color(struct brw_context *brw, >> > - struct intel_mipmap_tree *mt, >> > - uint32_t start_level, uint32_t num_levels, >> > - uint32_t start_layer, uint32_t num_layers, >> > - int flags) >> > +static void >> > +intel_miptree_finish_ccs_write(struct brw_context *brw, >> > + struct intel_mipmap_tree *mt, >> > + uint32_t level, uint32_t layer, >> > + bool written_with_ccs) >> > { >> > - intel_miptree_check_color_resolve(brw, mt, start_level, >> start_layer); >> > + enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, >> level, layer); >> > >> > - if (!intel_miptree_needs_color_resolve(brw, mt, flags)) >> > - return false; >> > + if (intel_miptree_is_lossless_compressed(brw, mt)) { >> > + switch (aux_state) { >> > + case ISL_AUX_STATE_CLEAR: >> > + assert(written_with_ccs); >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, >> > + ISL_AUX_STATE_COMPRESSED_CLEAR); >> > + break; >> > >> > - enum blorp_fast_clear_op resolve_op; >> > - if (brw->gen >= 9) { >> > - if (intel_miptree_is_lossless_compressed(brw, mt)) { >> > - resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; >> > - } else { >> > - resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL; >> > + case ISL_AUX_STATE_COMPRESSED_CLEAR: >> > + case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: >> > + assert(written_with_ccs); >> > + break; /* Nothing to do */ >> > + >> > + case ISL_AUX_STATE_PASS_THROUGH: >> > + if (written_with_ccs) { >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, >> > + ISL_AUX_STATE_COMPRESSED_NO_CL >> EAR); >> > + } else { >> > + /* Nothing to do */ >> > + } >> > + break; >> > + >> > + case ISL_AUX_STATE_RESOLVED: >> > + case ISL_AUX_STATE_AUX_INVALID: >> > + unreachable("Invalid aux state for CCS_E"); >> > } >> > } else { >> > - /* Broadwell and earlier do not have a partial resolve */ >> > - assert(!intel_miptree_is_lossless_compressed(brw, mt)); >> > - resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; >> > - } >> > + /* CCS_D is a bit simpler */ >> > + switch (aux_state) { >> > + case ISL_AUX_STATE_CLEAR: >> > + assert(written_with_ccs); >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, >> > + ISL_AUX_STATE_COMPRESSED_CLEAR); >> > + break; >> > >> > - bool resolved = false; >> > - foreach_list_typed_safe(struct intel_resolve_map, map, link, >> > - &mt->color_resolve_map) { >> > - if (map->level < start_level || >> > - map->level >= (start_level + num_levels) || >> > - map->layer < start_layer || >> > - map->layer >= (start_layer + num_layers)) >> > - continue; >> > + case ISL_AUX_STATE_COMPRESSED_CLEAR: >> > + assert(written_with_ccs); >> > + break; /* Nothing to do */ >> > + >> > + case ISL_AUX_STATE_PASS_THROUGH: >> > + /* Nothing to do */ >> > + break; >> > >> > - /* Arrayed fast clear is only supported for gen8+. */ >> > - assert(brw->gen >= 8 || map->level == 0); >> > + case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: >> > + case ISL_AUX_STATE_RESOLVED: >> > + case ISL_AUX_STATE_AUX_INVALID: >> > + unreachable("Invalid aux state for CCS_D"); >> > + } >> > + } >> > +} >> > >> > - intel_miptree_check_level_layer(mt, map->level, map->layer); >> > +static void >> > +intel_miptree_finish_mcs_write(struct brw_context *brw, >> > + struct intel_mipmap_tree *mt, >> > + uint32_t level, uint32_t layer, >> > + bool written_with_aux) >> > +{ >> > + switch (intel_miptree_get_aux_state(mt, level, layer)) { >> > + case ISL_AUX_STATE_CLEAR: >> > + assert(written_with_aux); >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, >> > + ISL_AUX_STATE_COMPRESSED_CLEAR); >> > + break; >> > >> > - assert(map->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVE >> D); >> > + case ISL_AUX_STATE_COMPRESSED_CLEAR: >> > + assert(written_with_aux); >> > + break; /* Nothing to do */ >> > >> > - brw_blorp_resolve_color(brw, mt, map->level, map->layer, >> resolve_op); >> > - intel_resolve_map_remove(map); >> > - resolved = true; >> > + case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: >> > + case ISL_AUX_STATE_RESOLVED: >> > + case ISL_AUX_STATE_PASS_THROUGH: >> > + case ISL_AUX_STATE_AUX_INVALID: >> > + unreachable("Invalid aux state for MCS"); >> > } >> > - >> > - return resolved; >> > } >> > >> > void >> > @@ -2152,17 +2274,29 @@ intel_miptree_prepare_access(struct >> brw_context *brw, >> > uint32_t start_layer, uint32_t num_layers, >> > bool aux_supported, bool >> fast_clear_supported) >> > { >> > + /* Clamp the level range to fit the miptree */ >> > + assert(start_level + num_levels >= start_level); >> >> This also looks odd. >> >> > + const uint32_t last_level = >> > + MIN2(mt->last_level, start_level + num_levels - 1); >> > + start_level = MAX2(mt->first_level, start_level); >> > + num_levels = last_level - start_level + 1; >> > + >> > if (_mesa_is_format_color_format(mt->format)) { >> > if (!mt->mcs_buf) >> > return; >> > >> > if (mt->num_samples > 1) { >> > /* Nothing to do for MSAA */ >> > + assert(aux_supported && fast_clear_supported); >> > } else { >> > - /* TODO: This is fairly terrible. We can do better. */ >> > - if (!aux_supported || !fast_clear_supported) { >> > - intel_miptree_resolve_color(brw, mt, start_level, >> num_levels, >> > - start_layer, num_layers, 0); >> > + for (uint32_t level = start_level; level <= last_level; >> level++) { >> > + const uint32_t level_layers = >> > + MIN2(num_layers, mt->level[level].depth); >> > + for (uint32_t a = 0; a < level_layers; a++) { >> > + intel_miptree_prepare_ccs_access(brw, mt, level, >> > + start_layer + a, >> aux_supported, >> > + fast_clear_supported); >> > + } >> > } >> > } >> > } else if (mt->format == MESA_FORMAT_S_UINT8) { >> > @@ -2197,12 +2331,18 @@ intel_miptree_finish_write(struct brw_context >> *brw, >> > assert(num_layers <= mt->level[level].depth - start_layer); >> > >> > if (_mesa_is_format_color_format(mt->format)) { >> > + if (!mt->mcs_buf) >> > + return; >> > + >> > if (mt->num_samples > 1) { >> > - /* Nothing to do for MSAA */ >> > + for (uint32_t a = 0; a < num_layers; a++) { >> > + intel_miptree_finish_mcs_write(brw, mt, level, >> start_layer + a, >> > + written_with_aux); >> > + } >> > } else { >> > - if (written_with_aux) { >> > - intel_miptree_used_for_rendering(brw, mt, level, >> > - start_layer, num_layers); >> > + for (uint32_t a = 0; a < num_layers; a++) { >> > + intel_miptree_finish_ccs_write(brw, mt, level, >> start_layer + a, >> > + written_with_aux); >> > } >> > } >> > } else if (mt->format == MESA_FORMAT_S_UINT8) { >> > @@ -2231,22 +2371,12 @@ enum isl_aux_state >> > intel_miptree_get_aux_state(const struct intel_mipmap_tree *mt, >> > uint32_t level, uint32_t layer) >> > { >> > + intel_miptree_check_level_layer(mt, level, layer); >> > + >> > if (_mesa_is_format_color_format(mt->format)) { >> > assert(mt->mcs_buf != NULL); >> > - if (mt->num_samples > 1) { >> > - return ISL_AUX_STATE_COMPRESSED_CLEAR; >> > - } else { >> > - switch (intel_miptree_get_fast_clear_state(mt, level, >> layer)) { >> > - case INTEL_FAST_CLEAR_STATE_RESOLVED: >> > - return ISL_AUX_STATE_RESOLVED; >> > - case INTEL_FAST_CLEAR_STATE_UNRESOLVED: >> > - return ISL_AUX_STATE_COMPRESSED_CLEAR; >> > - case INTEL_FAST_CLEAR_STATE_CLEAR: >> > - return ISL_AUX_STATE_CLEAR; >> > - default: >> > - unreachable("Invalid fast clear state"); >> > - } >> > - } >> > + assert(mt->num_samples <= 1 || mt->msaa_layout == >> INTEL_MSAA_LAYOUT_CMS); >> > + return mt->aux_state[level][layer]; >> > } else if (mt->format == MESA_FORMAT_S_UINT8) { >> > unreachable("Cannot get aux state for stencil"); >> > } else { >> > @@ -2277,19 +2407,20 @@ intel_miptree_set_aux_state(struct brw_context >> *brw, >> > num_layers = mt->level[level].depth - start_layer; >> > assert(num_layers <= mt->level[level].depth - start_layer); >> > >> > - /* Right now, this only applies to clears. */ >> > - assert(aux_state == ISL_AUX_STATE_CLEAR); >> > - >> > if (_mesa_is_format_color_format(mt->format)) { >> > - if (mt->num_samples > 1) >> > - assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS); >> > + assert(mt->mcs_buf != NULL); >> > + assert(mt->num_samples <= 1 || mt->msaa_layout == >> INTEL_MSAA_LAYOUT_CMS); >> > >> > - assert(level == 0 && start_layer == 0 && num_layers == 1); >> > - intel_miptree_set_fast_clear_state(brw, mt, 0, 0, 1, >> > - INTEL_FAST_CLEAR_STATE_CLEAR) >> ; >> > + for (unsigned a = 0; a < num_layers; a++) >> > + mt->aux_state[level][start_layer + a] = aux_state; >> > } else if (mt->format == MESA_FORMAT_S_UINT8) { >> > - assert(!"Cannot set aux state for stencil"); >> > + unreachable("Cannot get aux state for stencil"); >> > } else { >> > + assert(mt->hiz_buf != NULL); >> > + >> > + /* Right now, this only applies to clears. */ >> > + assert(aux_state == ISL_AUX_STATE_CLEAR); >> > + >> > for (unsigned a = 0; a < num_layers; a++) { >> > intel_miptree_check_level_layer(mt, level, start_layer); >> > intel_resolve_map_set(&mt->hiz_map, level, start_layer + a, >> > @@ -2310,22 +2441,23 @@ intel_miptree_set_aux_state(struct brw_context >> *brw, >> > * set). >> > */ >> > static bool >> > -intel_texture_view_requires_resolve(struct brw_context *brw, >> > - struct intel_mipmap_tree *mt, >> > - mesa_format format) >> > +can_texture_with_ccs(struct brw_context *brw, >> > + struct intel_mipmap_tree *mt, >> > + mesa_format view_format) >> > { >> > - if (brw->gen < 9 || >> > - !intel_miptree_is_lossless_compressed(brw, mt)) >> > - return false; >> > + if (!intel_miptree_is_lossless_compressed(brw, mt)) >> > + return false; >> > >> > - const enum isl_format isl_format = brw_isl_format_for_mesa_format >> (format); >> > + enum isl_format isl_mt_format = brw_isl_format_for_mesa_format >> (mt->format); >> > + enum isl_format isl_view_format = brw_isl_format_for_mesa_format >> (view_format); >> > >> > - if (isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format)) >> > + if (!isl_formats_are_ccs_e_compatible(&brw->screen->devinfo, >> > + isl_mt_format, >> isl_view_format)) { >> > + perf_debug("Incompatible sampling format (%s) for rbc (%s)\n", >> > + _mesa_get_format_name(view_format), >> > + _mesa_get_format_name(mt->format)); >> > return false; >> > - >> > - perf_debug("Incompatible sampling format (%s) for rbc (%s)\n", >> > - _mesa_get_format_name(format), >> > - _mesa_get_format_name(mt->format)); >> > + } >> > >> > return true; >> > } >> > @@ -2338,19 +2470,29 @@ intel_miptree_prepare_texture_slices(struct >> brw_context *brw, >> > uint32_t start_layer, uint32_t >> num_layers, >> > bool *aux_supported_out) >> > { >> > - bool aux_supported; >> > + bool aux_supported, clear_supported; >> > if (_mesa_is_format_color_format(mt->format)) { >> > - aux_supported = intel_miptree_is_lossless_compressed(brw, mt) && >> > - !intel_texture_view_requires_resolve(brw, mt, >> view_format); >> > + if (mt->num_samples > 1) { >> > + aux_supported = clear_supported = true; >> > + } else { >> > + aux_supported = can_texture_with_ccs(brw, mt, view_format); >> > + >> > + /* Clear color is specified as ints or floats and the >> conversion is >> > + * done by the sampler. If we have a texture view, we would >> have to >> > + * perform the clear color conversion manually. Just disable >> clear >> > + * color. >> > + */ >> > + clear_supported = aux_supported && (mt->format == >> view_format); >> > + } >> > } else if (mt->format == MESA_FORMAT_S_UINT8) { >> > - aux_supported = false; >> > + aux_supported = clear_supported = false; >> > } else { >> > - aux_supported = intel_miptree_sample_with_hiz(brw, mt); >> > + aux_supported = clear_supported = intel_miptree_sample_with_hiz(brw, >> mt); >> > } >> > >> > intel_miptree_prepare_access(brw, mt, start_level, num_levels, >> > start_layer, num_layers, >> > - aux_supported, aux_supported); >> > + aux_supported, clear_supported); >> > if (aux_supported_out) >> > *aux_supported_out = aux_supported; >> > } >> > @@ -2488,7 +2630,8 @@ intel_miptree_make_shareable(struct brw_context >> *brw, >> > * execute any will likely crash due to the missing aux buffer. >> So let's >> > * delete all pending ops. >> > */ >> > - exec_list_make_empty(&mt->color_resolve_map); >> > + free(mt->aux_state); >> > + mt->aux_state = NULL; >> > } >> > >> > if (mt->hiz_buf) { >> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h >> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h >> > index 3f3a78d..528c32c 100644 >> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h >> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h >> > @@ -557,7 +557,14 @@ struct intel_mipmap_tree >> > * indicates state other than RESOLVED. >> > */ >> > struct exec_list hiz_map; /* List of intel_resolve_map. */ >> > - struct exec_list color_resolve_map; /* List of intel_resolve_map. */ >> > + >> > + /** >> > + * \brief Maps miptree slices to their current aux state >> > + * >> > + * This two-dimensional array is indexed as [level][layer] and >> stores an >> > + * aux state for each slice. >> > + */ >> > + enum isl_aux_state **aux_state; >> > >> > /** >> > * \brief Stencil miptree for depthstencil textures. >> > @@ -827,41 +834,11 @@ intel_miptree_all_slices_resolve_depth(struct >> brw_context *brw, >> > >> > /**\}*/ >> > >> > -enum intel_fast_clear_state >> > -intel_miptree_get_fast_clear_state(const struct intel_mipmap_tree *mt, >> > - unsigned level, unsigned layer); >> > - >> > -void >> > -intel_miptree_set_fast_clear_state(const struct brw_context *brw, >> > - struct intel_mipmap_tree *mt, >> > - unsigned level, >> > - unsigned first_layer, >> > - unsigned num_layers, >> > - enum intel_fast_clear_state >> new_state); >> > - >> > bool >> > intel_miptree_has_color_unresolved(const struct intel_mipmap_tree *mt, >> > unsigned start_level, unsigned >> num_levels, >> > unsigned start_layer, unsigned >> num_layers); >> > >> > -/** >> > - * Update the fast clear state for a miptree to indicate that it has >> been used >> > - * for rendering. >> > - */ >> > -void >> > -intel_miptree_used_for_rendering(const struct brw_context *brw, >> > - struct intel_mipmap_tree *mt, >> unsigned level, >> > - unsigned start_layer, unsigned >> num_layers); >> > - >> > -/** >> > - * Flag values telling color resolve pass which special types of >> buffers >> > - * can be ignored. >> > - * >> > - * INTEL_MIPTREE_IGNORE_CCS_E: Lossless compressed (single-sample >> > - * compression scheme since gen9) >> > - */ >> > -#define INTEL_MIPTREE_IGNORE_CCS_E (1 << 0) >> > - >> > >> > #define INTEL_REMAINING_LAYERS UINT32_MAX >> > #define INTEL_REMAINING_LEVELS UINT32_MAX >> > diff --git a/src/mesa/drivers/dri/i965/intel_resolve_map.h >> b/src/mesa/drivers/dri/i965/intel_resolve_map.h >> > index 17d3983..771d855 100644 >> > --- a/src/mesa/drivers/dri/i965/intel_resolve_map.h >> > +++ b/src/mesa/drivers/dri/i965/intel_resolve_map.h >> > @@ -33,63 +33,6 @@ extern "C" { >> > #endif >> > >> > /** >> > - * Enum for keeping track of the fast clear state of a buffer >> associated with >> > - * a miptree. >> > - * >> > - * Fast clear works by deferring the memory writes that would be used >> to clear >> > - * the buffer, so that instead of performing them at the time of the >> clear >> > - * operation, the hardware automatically performs them at the time >> that the >> > - * buffer is later accessed for rendering. The MCS buffer keeps track >> of >> > - * which regions of the buffer still have pending clear writes. >> > - * >> > - * This enum keeps track of the driver's knowledge of pending fast >> clears in >> > - * the MCS buffer. >> > - * >> > - * MCS buffers only exist on Gen7+. >> > - */ >> > -enum intel_fast_clear_state >> > -{ >> > - /** >> > - * No deferred clears are pending for this miptree, and the >> contents of the >> > - * color buffer are entirely correct. An MCS buffer may or may not >> exist >> > - * for this miptree. If it does exist, it is entirely in the "no >> deferred >> > - * clears pending" state. If it does not exist, it will be created >> the >> > - * first time a fast color clear is executed. >> > - * >> > - * In this state, the color buffer can be used for purposes other >> than >> > - * rendering without needing a render target resolve. >> > - * >> > - * Since there is no such thing as a "fast color clear resolve" for >> MSAA >> > - * buffers, an MSAA buffer will never be in this state. >> > - */ >> > - INTEL_FAST_CLEAR_STATE_RESOLVED, >> > - >> > - /** >> > - * An MCS buffer exists for this miptree, and deferred clears are >> pending >> > - * for some regions of the color buffer, as indicated by the MCS >> buffer. >> > - * The contents of the color buffer are only correct for the >> regions where >> > - * the MCS buffer doesn't indicate a deferred clear. >> > - * >> > - * If a single-sample buffer is in this state, a render target >> resolve must >> > - * be performed before it can be used for purposes other than >> rendering. >> > - */ >> > - INTEL_FAST_CLEAR_STATE_UNRESOLVED, >> > - >> > - /** >> > - * An MCS buffer exists for this miptree, and deferred clears are >> pending >> > - * for the entire color buffer, and the contents of the MCS buffer >> reflect >> > - * this. The contents of the color buffer are undefined. >> > - * >> > - * If a single-sample buffer is in this state, a render target >> resolve must >> > - * be performed before it can be used for purposes other than >> rendering. >> > - * >> > - * If the client attempts to clear a buffer which is already in >> this state, >> > - * the clear can be safely skipped, since the buffer is already >> clear. >> > - */ >> > - INTEL_FAST_CLEAR_STATE_CLEAR, >> > -}; >> > - >> > -/** >> > * \brief Map of miptree slices to needed resolves. >> > * >> > * The map is implemented as a linear doubly-linked list. >> > @@ -121,10 +64,7 @@ struct intel_resolve_map { >> > uint32_t level; >> > uint32_t layer; >> > >> > - union { >> > - enum blorp_hiz_op need; >> > - enum intel_fast_clear_state fast_clear_state; >> > - }; >> > + enum blorp_hiz_op need; >> > }; >> > >> > void >> > -- >> > 2.5.0.400.gff86faf >> > >> > _______________________________________________ >> > mesa-dev mailing list >> > mesa-dev@lists.freedesktop.org >> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev