On Wed, May 31, 2017 at 11:05 AM, Pohjolainen, Topi < topi.pohjolai...@gmail.com> wrote:
> On Wed, May 31, 2017 at 10:01:30AM -0700, Jason Ekstrand wrote: > > On Wed, May 31, 2017 at 8:43 AM, Jason Ekstrand <ja...@jlekstrand.net> > > wrote: > > > > > On Wed, May 31, 2017 at 6:03 AM, Pohjolainen, Topi < > > > topi.pohjolai...@gmail.com> wrote: > > > > > >> On Fri, May 26, 2017 at 04:30:31PM -0700, Jason Ekstrand wrote: > > >> > This commit reworks the resolve tracking for CCS and MCS to use the > new > > >> > isl_aux_state enum. This should provide much more accurate and > easy to > > >> > reason about tracking. In order to understand, for instance, the > > >> > intel_miptree_prepare_ccs_access function, one only has to go look > at > > >> > the giant comment for the isl_aux_state enum and follow the arrows. > > >> > Unfortunately, there's no good way to split this up without making a > > >> > real mess so there are a bunch of changes in here: > > >> > > > >> > 1) We now do partial resolves. I really have no idea how this ever > > >> > worked before. So far as I can tell, the only time the old code > > >> > ever did a partial resolve was when it was using CCS_D where a > > >> > partial resolve and a full resolve are the same thing. > > >> > > > >> > 2) We are now tracking 4 states instead of 3 for CCS_E. In > particular, > > >> > we distinguish between compressed with clear and compressed > without > > >> > clear. The end result is that you will never get two partial > > >> > resolves in a row. > > >> > > > >> > 3) The texture view rules are now more correct. Previously, we > would > > >> > only bail if compression was not supported by the destination > > >> > format. However, this is not actually correct. Not all format > > >> > pairs are supported for texture views with CCS even if both > support > > >> > CCS individually. Fortunately, ISL has a helper for this. > > >> > > > >> > 4) We are no longer using intel_resolve_map for tracking aux state > but > > >> > are instead using a simple array of enum isl_aux_state indexed > by > > >> > level and layer. This is because, now that we're tracking 4 > > >> > different states, it's no longer clear which should be the > "default" > > >> > and array lookups are faster than linked list searches. > > >> > > > >> > 5) The new code is very assert-happy. Incorrect transitions will > now > > >> > get caught by assertions rather than by rendering corruption. > > >> > --- > > >> > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 479 > > >> +++++++++++++++++--------- > > >> > src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 39 +-- > > >> > src/mesa/drivers/dri/i965/intel_resolve_map.h | 62 +--- > > >> > 3 files changed, 320 insertions(+), 260 deletions(-) > > >> > > > >> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > >> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > >> > index cae8358..3e30b2a 100644 > > >> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > >> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > > >> > @@ -326,7 +326,7 @@ intel_miptree_create_layout(struct brw_context > > >> *brw, > > >> > mt->aux_disable |= INTEL_AUX_DISABLE_CCS; > > >> > mt->is_scanout = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) != > 0; > > >> > exec_list_make_empty(&mt->hiz_map); > > >> > - exec_list_make_empty(&mt->color_resolve_map); > > >> > + mt->aux_state = NULL; > > >> > mt->cpp = _mesa_get_format_bytes(format); > > >> > mt->num_samples = num_samples; > > >> > mt->compressed = _mesa_is_format_compressed(format); > > >> > @@ -576,6 +576,46 @@ intel_lower_compressed_format(struct > brw_context > > >> *brw, mesa_format format) > > >> > } > > >> > } > > >> > > > >> > +static enum isl_aux_state ** > > >> > +create_aux_state_map(struct intel_mipmap_tree *mt, > > >> > + enum isl_aux_state initial) > > >> > +{ > > >> > + const uint32_t levels = mt->last_level + 1; > > >> > + > > >> > + uint32_t total_slices = 0; > > >> > + for (uint32_t level = 0; level < levels; level++) > > >> > + total_slices += mt->level[level].depth; > > >> > + > > >> > + const size_t per_level_array_size = levels * sizeof(enum > > >> isl_aux_state *); > > >> > + > > >> > + /* We're going to allocate a single chunk of data for both the > > >> per-level > > >> > + * reference array and the arrays of aux_state. This makes > cleanup > > >> > + * significantly easier. > > >> > + */ > > >> > + const size_t total_size = per_level_array_size + > > >> > + total_slices * sizeof(enum > isl_aux_state); > > >> > + void *data = malloc(total_size); > > >> > + if (data == NULL) > > >> > + return NULL; > > >> > + > > >> > + enum isl_aux_state **per_level_arr = data; > > >> > + enum isl_aux_state *s = data + per_level_array_size; > > >> > + for (uint32_t level = 0; level < levels; level++) { > > >> > + per_level_arr[level] = s; > > >> > + for (uint32_t a = 0; a < mt->level[level].depth; a++) > > >> > + *(s++) = initial; > > >> > + } > > >> > + assert((void *)s == data + total_size); > > >> > + > > >> > + return per_level_arr; > > >> > +} > > >> > + > > >> > +static void > > >> > +free_aux_state_map(enum isl_aux_state **state) > > >> > +{ > > >> > + free(state); > > >> > +} > > >> > + > > >> > static struct intel_mipmap_tree * > > >> > miptree_create(struct brw_context *brw, > > >> > GLenum target, > > >> > @@ -935,7 +975,7 @@ intel_miptree_release(struct intel_mipmap_tree > **mt) > > >> > free((*mt)->mcs_buf); > > >> > } > > >> > intel_resolve_map_clear(&(*mt)->hiz_map); > > >> > - intel_resolve_map_clear(&(*mt)->color_resolve_map); > > >> > + free_aux_state_map((*mt)->aux_state); > > >> > > > >> > intel_miptree_release(&(*mt)->plane[0]); > > >> > intel_miptree_release(&(*mt)->plane[1]); > > >> > @@ -1482,27 +1522,31 @@ intel_miptree_alloc_mcs(struct brw_context > *brw, > > >> > unreachable("Unrecognized sample count in > > >> intel_miptree_alloc_mcs"); > > >> > }; > > >> > > > >> > + /* Multisampled miptrees are only supported for single level. */ > > >> > + assert(mt->first_level == 0); > > >> > + enum isl_aux_state **aux_state = > > >> > + create_aux_state_map(mt, ISL_AUX_STATE_CLEAR); > > >> > + if (!aux_state) > > >> > + return false; > > >> > + > > >> > mt->mcs_buf = > > >> > intel_mcs_miptree_buf_create(brw, mt, > > >> > format, > > >> > mt->logical_width0, > > >> > mt->logical_height0, > > >> > MIPTREE_LAYOUT_ACCELERATED_ > UPLOAD); > > >> > - if (!mt->mcs_buf) > > >> > + if (!mt->mcs_buf) { > > >> > + free(aux_state); > > >> > return false; > > >> > + } > > >> > > > >> > - intel_miptree_init_mcs(brw, mt, 0xFF); > > >> > + mt->aux_state = aux_state; > > >> > > > >> > - /* Multisampled miptrees are only supported for single level. */ > > >> > - assert(mt->first_level == 0); > > >> > - intel_miptree_set_fast_clear_state(brw, mt, mt->first_level, 0, > > >> > - mt->logical_depth0, > > >> > - > INTEL_FAST_CLEAR_STATE_CLEAR); > > >> > + intel_miptree_init_mcs(brw, mt, 0xFF); > > >> > > > >> > return true; > > >> > } > > >> > > > >> > - > > >> > bool > > >> > intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, > > >> > struct intel_mipmap_tree *mt, > > >> > @@ -1528,6 +1572,13 @@ intel_miptree_alloc_non_msrt_mcs(struct > > >> brw_context *brw, > > >> > if (!buf) > > >> > return false; > > >> > > > >> > + enum isl_aux_state **aux_state = > > >> > + create_aux_state_map(mt, ISL_AUX_STATE_PASS_THROUGH); > > >> > + if (!aux_state) { > > >> > + free(buf); > > >> > + return false; > > >> > + } > > >> > + > > >> > buf->size = temp_ccs_surf.size; > > >> > buf->pitch = temp_ccs_surf.row_pitch; > > >> > buf->qpitch = isl_surf_get_array_pitch_sa_rows(&temp_ccs_surf); > > >> > @@ -1549,10 +1600,12 @@ intel_miptree_alloc_non_msrt_mcs(struct > > >> brw_context *brw, > > >> > 1, I915_TILING_Y, &buf->pitch, > > >> alloc_flags); > > >> > if (!buf->bo) { > > >> > free(buf); > > >> > + free(aux_state); > > >> > return false; > > >> > } > > >> > > > >> > mt->mcs_buf = buf; > > >> > + mt->aux_state = aux_state; > > >> > > > >> > /* From Gen9 onwards single-sampled (non-msrt) auxiliary > buffers are > > >> > * used for lossless compression which requires similar > > >> initialisation > > >> > @@ -1975,19 +2028,35 @@ intel_miptree_all_slices_ > resolve_depth(struct > > >> brw_context *brw, > > >> > > BLORP_HIZ_OP_DEPTH_RESOLVE); > > >> > } > > >> > > > >> > -enum intel_fast_clear_state > > >> > -intel_miptree_get_fast_clear_state(const struct intel_mipmap_tree > *mt, > > >> > - unsigned level, unsigned layer) > > >> > +bool > > >> > +intel_miptree_has_color_unresolved(const struct intel_mipmap_tree > *mt, > > >> > + unsigned start_level, unsigned > > >> num_levels, > > >> > + unsigned start_layer, unsigned > > >> num_layers) > > >> > { > > >> > - intel_miptree_check_level_layer(mt, level, layer); > > >> > + assert(_mesa_is_format_color_format(mt->format)); > > >> > > > >> > - const struct intel_resolve_map *item = > > >> > - intel_resolve_map_const_get(&mt->color_resolve_map, level, > > >> layer); > > >> > + if (!mt->mcs_buf) > > >> > + return false; > > >> > > > >> > - if (!item) > > >> > - return INTEL_FAST_CLEAR_STATE_RESOLVED; > > >> > + /* Clamp the level range to fit the miptree */ > > >> > + assert(start_level + num_levels >= start_level); > > >> > > >> This assert looks odd, and did you mean to have it after adjusting? > > >> > > > > > > No, this is a "does the sum overflow" assertion that we do right > before we > > > do a sum. There's a bunch of places where we pass [0, UINT32_MAX] as > the > > > range and if someone decided to pass [5, UINT32_MAX], it would > overflow. > > > > > > That said, I think my intention was to do the more direct "if > (num_levels > > > == INTEL_REMAINING_LEVELS)" check. I switch it to that. > > > > > > > Ok, I think I have something better now. Take a look at > > > > https://cgit.freedesktop.org/~jekstrand/mesa/commit/?h=wip/ > i965-resolve-rework-v3&id=7ae758d95bee414d677df845d4781254f9f334f3 > > > > If you like it, I'm happy to rebase a bit and scatter the changes through > > the series. > > That looks nice. Just a small nit, you can pass miptree as const as it is > only > used for reading. > Done. > > > > > > > > + const uint32_t last_level = > > >> > + MIN2(mt->last_level, start_level + num_levels - 1); > > >> > + start_level = MAX2(mt->first_level, start_level); > > >> > + num_levels = last_level - start_level + 1; > > >> > + > > >> > + for (uint32_t level = start_level; level <= last_level; > level++) { > > >> > + const uint32_t level_layers = MIN2(num_layers, > > >> mt->level[level].depth); > > >> > + for (unsigned a = 0; a < level_layers; a++) { > > >> > + enum isl_aux_state aux_state = > > >> > + intel_miptree_get_aux_state(mt, level, start_layer + > a); > > >> > + assert(aux_state != ISL_AUX_STATE_AUX_INVALID); > > >> > + if (aux_state != ISL_AUX_STATE_PASS_THROUGH) > > >> > + return true; > > >> > + } > > >> > + } > > >> > > > >> > - return item->fast_clear_state; > > >> > + return false; > > >> > } > > >> > > > >> > static void > > >> > @@ -2014,135 +2083,188 @@ intel_miptree_check_color_resolve(const > > >> struct brw_context *brw, > > >> > (void)layer; > > >> > } > > >> > > > >> > -void > > >> > -intel_miptree_set_fast_clear_state(const struct brw_context *brw, > > >> > - struct intel_mipmap_tree *mt, > > >> > - unsigned level, > > >> > - unsigned first_layer, > > >> > - unsigned num_layers, > > >> > - enum intel_fast_clear_state > > >> new_state) > > >> > -{ > > >> > - /* Setting the state to resolved means removing the item from > the > > >> list > > >> > - * altogether. > > >> > - */ > > >> > - assert(new_state != INTEL_FAST_CLEAR_STATE_RESOLVED); > > >> > +static enum blorp_fast_clear_op > > >> > +get_ccs_d_resolve_op(enum isl_aux_state aux_state, > > >> > + bool ccs_supported, bool fast_clear_supported) > > >> > +{ > > >> > + assert(ccs_supported == fast_clear_supported); > > >> > > > >> > - intel_miptree_check_color_resolve(brw, mt, level, first_layer); > > >> > + switch (aux_state) { > > >> > + case ISL_AUX_STATE_CLEAR: > > >> > + case ISL_AUX_STATE_COMPRESSED_CLEAR: > > >> > + if (!ccs_supported) > > >> > + return BLORP_FAST_CLEAR_OP_RESOLVE_FULL; > > >> > + else > > >> > + return BLORP_FAST_CLEAR_OP_NONE; > > >> > > > >> > - assert(first_layer + num_layers <= mt->physical_depth0); > > >> > + case ISL_AUX_STATE_PASS_THROUGH: > > >> > + return BLORP_FAST_CLEAR_OP_NONE; > > >> > > > >> > - for (unsigned i = 0; i < num_layers; i++) > > >> > - intel_resolve_map_set(&mt->color_resolve_map, level, > > >> > - first_layer + i, new_state); > > >> > -} > > >> > + case ISL_AUX_STATE_RESOLVED: > > >> > + case ISL_AUX_STATE_AUX_INVALID: > > >> > + case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: > > >> > + break; > > >> > + } > > >> > > > >> > -bool > > >> > -intel_miptree_has_color_unresolved(const struct intel_mipmap_tree > *mt, > > >> > - unsigned start_level, unsigned > > >> num_levels, > > >> > - unsigned start_layer, unsigned > > >> num_layers) > > >> > -{ > > >> > - return intel_resolve_map_find_any(&mt->color_resolve_map, > > >> > - start_level, num_levels, > > >> > - start_layer, num_layers) != > NULL; > > >> > + unreachable("Invalid aux state for CCS_D"); > > >> > } > > >> > > > >> > -void > > >> > -intel_miptree_used_for_rendering(const struct brw_context *brw, > > >> > - struct intel_mipmap_tree *mt, > > >> unsigned level, > > >> > - unsigned start_layer, unsigned > > >> num_layers) > > >> > +static enum blorp_fast_clear_op > > >> > +get_ccs_e_resolve_op(enum isl_aux_state aux_state, > > >> > + bool ccs_supported, bool fast_clear_supported) > > >> > { > > >> > - const bool is_lossless_compressed = > > >> > - intel_miptree_is_lossless_compressed(brw, mt); > > >> > + switch (aux_state) { > > >> > + case ISL_AUX_STATE_CLEAR: > > >> > + case ISL_AUX_STATE_COMPRESSED_CLEAR: > > >> > + if (!ccs_supported) > > >> > + return BLORP_FAST_CLEAR_OP_RESOLVE_FULL; > > >> > + else if (!fast_clear_supported) > > >> > + return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL; > > >> > + else > > >> > + return BLORP_FAST_CLEAR_OP_NONE; > > >> > > > >> > - for (unsigned i = 0; i < num_layers; ++i) { > > >> > - const enum intel_fast_clear_state fast_clear_state = > > >> > - intel_miptree_get_fast_clear_state(mt, level, > start_layer + > > >> i); > > >> > + case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: > > >> > + if (!ccs_supported) > > >> > + return BLORP_FAST_CLEAR_OP_RESOLVE_FULL; > > >> > + else > > >> > + return BLORP_FAST_CLEAR_OP_NONE; > > >> > > > >> > - /* If the buffer was previously in fast clear state, change > it to > > >> > - * unresolved state, since it won't be guaranteed to be clear > > >> after > > >> > - * rendering occurs. > > >> > - */ > > >> > - if (is_lossless_compressed || > > >> > - fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR) { > > >> > - intel_miptree_set_fast_clear_state( > > >> > - brw, mt, level, start_layer + i, 1, > > >> > - INTEL_FAST_CLEAR_STATE_UNRESOLVED); > > >> > - } > > >> > + case ISL_AUX_STATE_PASS_THROUGH: > > >> > + return BLORP_FAST_CLEAR_OP_NONE; > > >> > + > > >> > + case ISL_AUX_STATE_RESOLVED: > > >> > + case ISL_AUX_STATE_AUX_INVALID: > > >> > + break; > > >> > } > > >> > + > > >> > + unreachable("Invalid aux state for CCS_E"); > > >> > } > > >> > > > >> > -static bool > > >> > -intel_miptree_needs_color_resolve(const struct brw_context *brw, > > >> > - const struct intel_mipmap_tree > *mt, > > >> > - int flags) > > >> > +static void > > >> > +intel_miptree_prepare_ccs_access(struct brw_context *brw, > > >> > + struct intel_mipmap_tree *mt, > > >> > + uint32_t level, uint32_t layer, > > >> > + bool aux_supported, > > >> > + bool fast_clear_supported) > > >> > { > > >> > - if (mt->aux_disable & INTEL_AUX_DISABLE_CCS) > > >> > - return false; > > >> > + enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, > > >> level, layer); > > >> > > > >> > - const bool is_lossless_compressed = > > >> > - intel_miptree_is_lossless_compressed(brw, mt); > > >> > + enum blorp_fast_clear_op resolve_op; > > >> > + if (intel_miptree_is_lossless_compressed(brw, mt)) { > > >> > + resolve_op = get_ccs_e_resolve_op(aux_state, aux_supported, > > >> > + fast_clear_supported); > > >> > + } else { > > >> > + resolve_op = get_ccs_d_resolve_op(aux_state, aux_supported, > > >> > + fast_clear_supported); > > >> > + } > > >> > > > >> > - /* From gen9 onwards there is new compression scheme for single > > >> sampled > > >> > - * surfaces called "lossless compressed". These don't need to be > > >> always > > >> > - * resolved. > > >> > - */ > > >> > - if ((flags & INTEL_MIPTREE_IGNORE_CCS_E) && > is_lossless_compressed) > > >> > - return false; > > >> > + if (resolve_op != BLORP_FAST_CLEAR_OP_NONE) { > > >> > + intel_miptree_check_color_resolve(brw, mt, level, layer); > > >> > + brw_blorp_resolve_color(brw, mt, level, layer, resolve_op); > > >> > > > >> > - /* Fast color clear resolves only make sense for non-MSAA > buffers. > > >> */ > > >> > - if (mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE && > > >> !is_lossless_compressed) > > >> > - return false; > > >> > + switch (resolve_op) { > > >> > + case BLORP_FAST_CLEAR_OP_RESOLVE_FULL: > > >> > + /* The CCS full resolve operation destroys the CCS and > sets > > >> it to the > > >> > + * pass-through state. (You can also think of this as > being > > >> both a > > >> > + * resolve and an ambiguate in one operation.) > > >> > + */ > > >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, > > >> > + ISL_AUX_STATE_PASS_THROUGH); > > >> > + break; > > >> > > > >> > - return true; > > >> > + case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL: > > >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, > > >> > + ISL_AUX_STATE_COMPRESSED_NO_C > > >> LEAR); > > >> > + break; > > >> > + > > >> > + default: > > >> > + unreachable("Invalid resolve op"); > > >> > + } > > >> > + } > > >> > } > > >> > > > >> > -static bool > > >> > -intel_miptree_resolve_color(struct brw_context *brw, > > >> > - struct intel_mipmap_tree *mt, > > >> > - uint32_t start_level, uint32_t > num_levels, > > >> > - uint32_t start_layer, uint32_t > num_layers, > > >> > - int flags) > > >> > +static void > > >> > +intel_miptree_finish_ccs_write(struct brw_context *brw, > > >> > + struct intel_mipmap_tree *mt, > > >> > + uint32_t level, uint32_t layer, > > >> > + bool written_with_ccs) > > >> > { > > >> > - intel_miptree_check_color_resolve(brw, mt, start_level, > > >> start_layer); > > >> > + enum isl_aux_state aux_state = intel_miptree_get_aux_state(mt, > > >> level, layer); > > >> > > > >> > - if (!intel_miptree_needs_color_resolve(brw, mt, flags)) > > >> > - return false; > > >> > + if (intel_miptree_is_lossless_compressed(brw, mt)) { > > >> > + switch (aux_state) { > > >> > + case ISL_AUX_STATE_CLEAR: > > >> > + assert(written_with_ccs); > > >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, > > >> > + ISL_AUX_STATE_COMPRESSED_ > CLEAR); > > >> > + break; > > >> > > > >> > - enum blorp_fast_clear_op resolve_op; > > >> > - if (brw->gen >= 9) { > > >> > - if (intel_miptree_is_lossless_compressed(brw, mt)) { > > >> > - resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; > > >> > - } else { > > >> > - resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL; > > >> > + case ISL_AUX_STATE_COMPRESSED_CLEAR: > > >> > + case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: > > >> > + assert(written_with_ccs); > > >> > + break; /* Nothing to do */ > > >> > + > > >> > + case ISL_AUX_STATE_PASS_THROUGH: > > >> > + if (written_with_ccs) { > > >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, > > >> > + > ISL_AUX_STATE_COMPRESSED_NO_CL > > >> EAR); > > >> > + } else { > > >> > + /* Nothing to do */ > > >> > + } > > >> > + break; > > >> > + > > >> > + case ISL_AUX_STATE_RESOLVED: > > >> > + case ISL_AUX_STATE_AUX_INVALID: > > >> > + unreachable("Invalid aux state for CCS_E"); > > >> > } > > >> > } else { > > >> > - /* Broadwell and earlier do not have a partial resolve */ > > >> > - assert(!intel_miptree_is_lossless_compressed(brw, mt)); > > >> > - resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; > > >> > - } > > >> > + /* CCS_D is a bit simpler */ > > >> > + switch (aux_state) { > > >> > + case ISL_AUX_STATE_CLEAR: > > >> > + assert(written_with_ccs); > > >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, > > >> > + ISL_AUX_STATE_COMPRESSED_ > CLEAR); > > >> > + break; > > >> > > > >> > - bool resolved = false; > > >> > - foreach_list_typed_safe(struct intel_resolve_map, map, link, > > >> > - &mt->color_resolve_map) { > > >> > - if (map->level < start_level || > > >> > - map->level >= (start_level + num_levels) || > > >> > - map->layer < start_layer || > > >> > - map->layer >= (start_layer + num_layers)) > > >> > - continue; > > >> > + case ISL_AUX_STATE_COMPRESSED_CLEAR: > > >> > + assert(written_with_ccs); > > >> > + break; /* Nothing to do */ > > >> > + > > >> > + case ISL_AUX_STATE_PASS_THROUGH: > > >> > + /* Nothing to do */ > > >> > + break; > > >> > > > >> > - /* Arrayed fast clear is only supported for gen8+. */ > > >> > - assert(brw->gen >= 8 || map->level == 0); > > >> > + case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: > > >> > + case ISL_AUX_STATE_RESOLVED: > > >> > + case ISL_AUX_STATE_AUX_INVALID: > > >> > + unreachable("Invalid aux state for CCS_D"); > > >> > + } > > >> > + } > > >> > +} > > >> > > > >> > - intel_miptree_check_level_layer(mt, map->level, map->layer); > > >> > +static void > > >> > +intel_miptree_finish_mcs_write(struct brw_context *brw, > > >> > + struct intel_mipmap_tree *mt, > > >> > + uint32_t level, uint32_t layer, > > >> > + bool written_with_aux) > > >> > +{ > > >> > + switch (intel_miptree_get_aux_state(mt, level, layer)) { > > >> > + case ISL_AUX_STATE_CLEAR: > > >> > + assert(written_with_aux); > > >> > + intel_miptree_set_aux_state(brw, mt, level, layer, 1, > > >> > + ISL_AUX_STATE_COMPRESSED_CLEAR); > > >> > + break; > > >> > > > >> > - assert(map->fast_clear_state != > INTEL_FAST_CLEAR_STATE_RESOLVE > > >> D); > > >> > + case ISL_AUX_STATE_COMPRESSED_CLEAR: > > >> > + assert(written_with_aux); > > >> > + break; /* Nothing to do */ > > >> > > > >> > - brw_blorp_resolve_color(brw, mt, map->level, map->layer, > > >> resolve_op); > > >> > - intel_resolve_map_remove(map); > > >> > - resolved = true; > > >> > + case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: > > >> > + case ISL_AUX_STATE_RESOLVED: > > >> > + case ISL_AUX_STATE_PASS_THROUGH: > > >> > + case ISL_AUX_STATE_AUX_INVALID: > > >> > + unreachable("Invalid aux state for MCS"); > > >> > } > > >> > - > > >> > - return resolved; > > >> > } > > >> > > > >> > void > > >> > @@ -2152,17 +2274,29 @@ intel_miptree_prepare_access(struct > > >> brw_context *brw, > > >> > uint32_t start_layer, uint32_t > num_layers, > > >> > bool aux_supported, bool > > >> fast_clear_supported) > > >> > { > > >> > + /* Clamp the level range to fit the miptree */ > > >> > + assert(start_level + num_levels >= start_level); > > >> > > >> This also looks odd. > > >> > > >> > + const uint32_t last_level = > > >> > + MIN2(mt->last_level, start_level + num_levels - 1); > > >> > + start_level = MAX2(mt->first_level, start_level); > > >> > + num_levels = last_level - start_level + 1; > > >> > + > > >> > if (_mesa_is_format_color_format(mt->format)) { > > >> > if (!mt->mcs_buf) > > >> > return; > > >> > > > >> > if (mt->num_samples > 1) { > > >> > /* Nothing to do for MSAA */ > > >> > + assert(aux_supported && fast_clear_supported); > > >> > } else { > > >> > - /* TODO: This is fairly terrible. We can do better. */ > > >> > - if (!aux_supported || !fast_clear_supported) { > > >> > - intel_miptree_resolve_color(brw, mt, start_level, > > >> num_levels, > > >> > - start_layer, num_layers, > 0); > > >> > + for (uint32_t level = start_level; level <= last_level; > > >> level++) { > > >> > + const uint32_t level_layers = > > >> > + MIN2(num_layers, mt->level[level].depth); > > >> > + for (uint32_t a = 0; a < level_layers; a++) { > > >> > + intel_miptree_prepare_ccs_access(brw, mt, level, > > >> > + start_layer + a, > > >> aux_supported, > > >> > + > fast_clear_supported); > > >> > + } > > >> > } > > >> > } > > >> > } else if (mt->format == MESA_FORMAT_S_UINT8) { > > >> > @@ -2197,12 +2331,18 @@ intel_miptree_finish_write(struct > brw_context > > >> *brw, > > >> > assert(num_layers <= mt->level[level].depth - start_layer); > > >> > > > >> > if (_mesa_is_format_color_format(mt->format)) { > > >> > + if (!mt->mcs_buf) > > >> > + return; > > >> > + > > >> > if (mt->num_samples > 1) { > > >> > - /* Nothing to do for MSAA */ > > >> > + for (uint32_t a = 0; a < num_layers; a++) { > > >> > + intel_miptree_finish_mcs_write(brw, mt, level, > > >> start_layer + a, > > >> > + written_with_aux); > > >> > + } > > >> > } else { > > >> > - if (written_with_aux) { > > >> > - intel_miptree_used_for_rendering(brw, mt, level, > > >> > - start_layer, > num_layers); > > >> > + for (uint32_t a = 0; a < num_layers; a++) { > > >> > + intel_miptree_finish_ccs_write(brw, mt, level, > > >> start_layer + a, > > >> > + written_with_aux); > > >> > } > > >> > } > > >> > } else if (mt->format == MESA_FORMAT_S_UINT8) { > > >> > @@ -2231,22 +2371,12 @@ enum isl_aux_state > > >> > intel_miptree_get_aux_state(const struct intel_mipmap_tree *mt, > > >> > uint32_t level, uint32_t layer) > > >> > { > > >> > + intel_miptree_check_level_layer(mt, level, layer); > > >> > + > > >> > if (_mesa_is_format_color_format(mt->format)) { > > >> > assert(mt->mcs_buf != NULL); > > >> > - if (mt->num_samples > 1) { > > >> > - return ISL_AUX_STATE_COMPRESSED_CLEAR; > > >> > - } else { > > >> > - switch (intel_miptree_get_fast_clear_state(mt, level, > > >> layer)) { > > >> > - case INTEL_FAST_CLEAR_STATE_RESOLVED: > > >> > - return ISL_AUX_STATE_RESOLVED; > > >> > - case INTEL_FAST_CLEAR_STATE_UNRESOLVED: > > >> > - return ISL_AUX_STATE_COMPRESSED_CLEAR; > > >> > - case INTEL_FAST_CLEAR_STATE_CLEAR: > > >> > - return ISL_AUX_STATE_CLEAR; > > >> > - default: > > >> > - unreachable("Invalid fast clear state"); > > >> > - } > > >> > - } > > >> > + assert(mt->num_samples <= 1 || mt->msaa_layout == > > >> INTEL_MSAA_LAYOUT_CMS); > > >> > + return mt->aux_state[level][layer]; > > >> > } else if (mt->format == MESA_FORMAT_S_UINT8) { > > >> > unreachable("Cannot get aux state for stencil"); > > >> > } else { > > >> > @@ -2277,19 +2407,20 @@ intel_miptree_set_aux_state(struct > brw_context > > >> *brw, > > >> > num_layers = mt->level[level].depth - start_layer; > > >> > assert(num_layers <= mt->level[level].depth - start_layer); > > >> > > > >> > - /* Right now, this only applies to clears. */ > > >> > - assert(aux_state == ISL_AUX_STATE_CLEAR); > > >> > - > > >> > if (_mesa_is_format_color_format(mt->format)) { > > >> > - if (mt->num_samples > 1) > > >> > - assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS); > > >> > + assert(mt->mcs_buf != NULL); > > >> > + assert(mt->num_samples <= 1 || mt->msaa_layout == > > >> INTEL_MSAA_LAYOUT_CMS); > > >> > > > >> > - assert(level == 0 && start_layer == 0 && num_layers == 1); > > >> > - intel_miptree_set_fast_clear_state(brw, mt, 0, 0, 1, > > >> > - > INTEL_FAST_CLEAR_STATE_CLEAR) > > >> ; > > >> > + for (unsigned a = 0; a < num_layers; a++) > > >> > + mt->aux_state[level][start_layer + a] = aux_state; > > >> > } else if (mt->format == MESA_FORMAT_S_UINT8) { > > >> > - assert(!"Cannot set aux state for stencil"); > > >> > + unreachable("Cannot get aux state for stencil"); > > >> > } else { > > >> > + assert(mt->hiz_buf != NULL); > > >> > + > > >> > + /* Right now, this only applies to clears. */ > > >> > + assert(aux_state == ISL_AUX_STATE_CLEAR); > > >> > + > > >> > for (unsigned a = 0; a < num_layers; a++) { > > >> > intel_miptree_check_level_layer(mt, level, start_layer); > > >> > intel_resolve_map_set(&mt->hiz_map, level, start_layer + > a, > > >> > @@ -2310,22 +2441,23 @@ intel_miptree_set_aux_state(struct > brw_context > > >> *brw, > > >> > * set). > > >> > */ > > >> > static bool > > >> > -intel_texture_view_requires_resolve(struct brw_context *brw, > > >> > - struct intel_mipmap_tree *mt, > > >> > - mesa_format format) > > >> > +can_texture_with_ccs(struct brw_context *brw, > > >> > + struct intel_mipmap_tree *mt, > > >> > + mesa_format view_format) > > >> > { > > >> > - if (brw->gen < 9 || > > >> > - !intel_miptree_is_lossless_compressed(brw, mt)) > > >> > - return false; > > >> > + if (!intel_miptree_is_lossless_compressed(brw, mt)) > > >> > + return false; > > >> > > > >> > - const enum isl_format isl_format = > brw_isl_format_for_mesa_format > > >> (format); > > >> > + enum isl_format isl_mt_format = brw_isl_format_for_mesa_format > > >> (mt->format); > > >> > + enum isl_format isl_view_format = brw_isl_format_for_mesa_format > > >> (view_format); > > >> > > > >> > - if (isl_format_supports_ccs_e(&brw->screen->devinfo, > isl_format)) > > >> > + if (!isl_formats_are_ccs_e_compatible(&brw->screen->devinfo, > > >> > + isl_mt_format, > > >> isl_view_format)) { > > >> > + perf_debug("Incompatible sampling format (%s) for rbc > (%s)\n", > > >> > + _mesa_get_format_name(view_format), > > >> > + _mesa_get_format_name(mt->format)); > > >> > return false; > > >> > - > > >> > - perf_debug("Incompatible sampling format (%s) for rbc (%s)\n", > > >> > - _mesa_get_format_name(format), > > >> > - _mesa_get_format_name(mt->format)); > > >> > + } > > >> > > > >> > return true; > > >> > } > > >> > @@ -2338,19 +2470,29 @@ intel_miptree_prepare_texture_slices(struct > > >> brw_context *brw, > > >> > uint32_t start_layer, uint32_t > > >> num_layers, > > >> > bool *aux_supported_out) > > >> > { > > >> > - bool aux_supported; > > >> > + bool aux_supported, clear_supported; > > >> > if (_mesa_is_format_color_format(mt->format)) { > > >> > - aux_supported = intel_miptree_is_lossless_compressed(brw, > mt) && > > >> > - !intel_texture_view_requires_resolve(brw, > mt, > > >> view_format); > > >> > + if (mt->num_samples > 1) { > > >> > + aux_supported = clear_supported = true; > > >> > + } else { > > >> > + aux_supported = can_texture_with_ccs(brw, mt, > view_format); > > >> > + > > >> > + /* Clear color is specified as ints or floats and the > > >> conversion is > > >> > + * done by the sampler. If we have a texture view, we > would > > >> have to > > >> > + * perform the clear color conversion manually. Just > disable > > >> clear > > >> > + * color. > > >> > + */ > > >> > + clear_supported = aux_supported && (mt->format == > > >> view_format); > > >> > + } > > >> > } else if (mt->format == MESA_FORMAT_S_UINT8) { > > >> > - aux_supported = false; > > >> > + aux_supported = clear_supported = false; > > >> > } else { > > >> > - aux_supported = intel_miptree_sample_with_hiz(brw, mt); > > >> > + aux_supported = clear_supported = > intel_miptree_sample_with_hiz(brw, > > >> mt); > > >> > } > > >> > > > >> > intel_miptree_prepare_access(brw, mt, start_level, num_levels, > > >> > start_layer, num_layers, > > >> > - aux_supported, aux_supported); > > >> > + aux_supported, clear_supported); > > >> > if (aux_supported_out) > > >> > *aux_supported_out = aux_supported; > > >> > } > > >> > @@ -2488,7 +2630,8 @@ intel_miptree_make_shareable(struct > brw_context > > >> *brw, > > >> > * execute any will likely crash due to the missing aux > buffer. > > >> So let's > > >> > * delete all pending ops. > > >> > */ > > >> > - exec_list_make_empty(&mt->color_resolve_map); > > >> > + free(mt->aux_state); > > >> > + mt->aux_state = NULL; > > >> > } > > >> > > > >> > if (mt->hiz_buf) { > > >> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > >> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > >> > index 3f3a78d..528c32c 100644 > > >> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > >> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h > > >> > @@ -557,7 +557,14 @@ struct intel_mipmap_tree > > >> > * indicates state other than RESOLVED. > > >> > */ > > >> > struct exec_list hiz_map; /* List of intel_resolve_map. */ > > >> > - struct exec_list color_resolve_map; /* List of > intel_resolve_map. */ > > >> > + > > >> > + /** > > >> > + * \brief Maps miptree slices to their current aux state > > >> > + * > > >> > + * This two-dimensional array is indexed as [level][layer] and > > >> stores an > > >> > + * aux state for each slice. > > >> > + */ > > >> > + enum isl_aux_state **aux_state; > > >> > > > >> > /** > > >> > * \brief Stencil miptree for depthstencil textures. > > >> > @@ -827,41 +834,11 @@ intel_miptree_all_slices_resolve_depth(struct > > >> brw_context *brw, > > >> > > > >> > /**\}*/ > > >> > > > >> > -enum intel_fast_clear_state > > >> > -intel_miptree_get_fast_clear_state(const struct intel_mipmap_tree > *mt, > > >> > - unsigned level, unsigned layer); > > >> > - > > >> > -void > > >> > -intel_miptree_set_fast_clear_state(const struct brw_context *brw, > > >> > - struct intel_mipmap_tree *mt, > > >> > - unsigned level, > > >> > - unsigned first_layer, > > >> > - unsigned num_layers, > > >> > - enum intel_fast_clear_state > > >> new_state); > > >> > - > > >> > bool > > >> > intel_miptree_has_color_unresolved(const struct intel_mipmap_tree > *mt, > > >> > unsigned start_level, unsigned > > >> num_levels, > > >> > unsigned start_layer, unsigned > > >> num_layers); > > >> > > > >> > -/** > > >> > - * Update the fast clear state for a miptree to indicate that it > has > > >> been used > > >> > - * for rendering. > > >> > - */ > > >> > -void > > >> > -intel_miptree_used_for_rendering(const struct brw_context *brw, > > >> > - struct intel_mipmap_tree *mt, > > >> unsigned level, > > >> > - unsigned start_layer, unsigned > > >> num_layers); > > >> > - > > >> > -/** > > >> > - * Flag values telling color resolve pass which special types of > > >> buffers > > >> > - * can be ignored. > > >> > - * > > >> > - * INTEL_MIPTREE_IGNORE_CCS_E: Lossless compressed (single-sample > > >> > - * compression scheme since gen9) > > >> > - */ > > >> > -#define INTEL_MIPTREE_IGNORE_CCS_E (1 << 0) > > >> > - > > >> > > > >> > #define INTEL_REMAINING_LAYERS UINT32_MAX > > >> > #define INTEL_REMAINING_LEVELS UINT32_MAX > > >> > diff --git a/src/mesa/drivers/dri/i965/intel_resolve_map.h > > >> b/src/mesa/drivers/dri/i965/intel_resolve_map.h > > >> > index 17d3983..771d855 100644 > > >> > --- a/src/mesa/drivers/dri/i965/intel_resolve_map.h > > >> > +++ b/src/mesa/drivers/dri/i965/intel_resolve_map.h > > >> > @@ -33,63 +33,6 @@ extern "C" { > > >> > #endif > > >> > > > >> > /** > > >> > - * Enum for keeping track of the fast clear state of a buffer > > >> associated with > > >> > - * a miptree. > > >> > - * > > >> > - * Fast clear works by deferring the memory writes that would be > used > > >> to clear > > >> > - * the buffer, so that instead of performing them at the time of > the > > >> clear > > >> > - * operation, the hardware automatically performs them at the time > > >> that the > > >> > - * buffer is later accessed for rendering. The MCS buffer keeps > track > > >> of > > >> > - * which regions of the buffer still have pending clear writes. > > >> > - * > > >> > - * This enum keeps track of the driver's knowledge of pending fast > > >> clears in > > >> > - * the MCS buffer. > > >> > - * > > >> > - * MCS buffers only exist on Gen7+. > > >> > - */ > > >> > -enum intel_fast_clear_state > > >> > -{ > > >> > - /** > > >> > - * No deferred clears are pending for this miptree, and the > > >> contents of the > > >> > - * color buffer are entirely correct. An MCS buffer may or may > not > > >> exist > > >> > - * for this miptree. If it does exist, it is entirely in the > "no > > >> deferred > > >> > - * clears pending" state. If it does not exist, it will be > created > > >> the > > >> > - * first time a fast color clear is executed. > > >> > - * > > >> > - * In this state, the color buffer can be used for purposes > other > > >> than > > >> > - * rendering without needing a render target resolve. > > >> > - * > > >> > - * Since there is no such thing as a "fast color clear resolve" > for > > >> MSAA > > >> > - * buffers, an MSAA buffer will never be in this state. > > >> > - */ > > >> > - INTEL_FAST_CLEAR_STATE_RESOLVED, > > >> > - > > >> > - /** > > >> > - * An MCS buffer exists for this miptree, and deferred clears > are > > >> pending > > >> > - * for some regions of the color buffer, as indicated by the MCS > > >> buffer. > > >> > - * The contents of the color buffer are only correct for the > > >> regions where > > >> > - * the MCS buffer doesn't indicate a deferred clear. > > >> > - * > > >> > - * If a single-sample buffer is in this state, a render target > > >> resolve must > > >> > - * be performed before it can be used for purposes other than > > >> rendering. > > >> > - */ > > >> > - INTEL_FAST_CLEAR_STATE_UNRESOLVED, > > >> > - > > >> > - /** > > >> > - * An MCS buffer exists for this miptree, and deferred clears > are > > >> pending > > >> > - * for the entire color buffer, and the contents of the MCS > buffer > > >> reflect > > >> > - * this. The contents of the color buffer are undefined. > > >> > - * > > >> > - * If a single-sample buffer is in this state, a render target > > >> resolve must > > >> > - * be performed before it can be used for purposes other than > > >> rendering. > > >> > - * > > >> > - * If the client attempts to clear a buffer which is already in > > >> this state, > > >> > - * the clear can be safely skipped, since the buffer is already > > >> clear. > > >> > - */ > > >> > - INTEL_FAST_CLEAR_STATE_CLEAR, > > >> > -}; > > >> > - > > >> > -/** > > >> > * \brief Map of miptree slices to needed resolves. > > >> > * > > >> > * The map is implemented as a linear doubly-linked list. > > >> > @@ -121,10 +64,7 @@ struct intel_resolve_map { > > >> > uint32_t level; > > >> > uint32_t layer; > > >> > > > >> > - union { > > >> > - enum blorp_hiz_op need; > > >> > - enum intel_fast_clear_state fast_clear_state; > > >> > - }; > > >> > + enum blorp_hiz_op need; > > >> > }; > > >> > > > >> > void > > >> > -- > > >> > 2.5.0.400.gff86faf > > >> > > > >> > _______________________________________________ > > >> > mesa-dev mailing list > > >> > mesa-dev@lists.freedesktop.org > > >> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > >> > > > > > > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev