On Mon, Nov 27, 2017 at 07:06:03PM -0800, Jason Ekstrand wrote: > This makes us start tracking two bits per level for aux to describe both > whether or not something is fast-cleared and whether or not it is > compressed as opposed to a single "needs resolve" bit. The previous > scheme only worked because we assumed that CCS_E compressed images would > always be read with CCS_E enabled and so they didn't need any sort of > resolve if there was no fast clear color. > > The assumptions of the previous scheme held because the one case where > we do need a full resolve when CCS_E is enabled is for window-system > images. Since we only ever allowed X-tiled window-system images, CCS > was entirely disabled on gen9+ and we never got CCS_E. With the advent > of Y-tiled window-system buffers, we now need to properly support doing > a full resolve images marked CCS_E. This requires us to track things > more granularly because, if the client does two back-to-back transitions > where we first do a partial resolve and then a full resolve, we need > both resolves to happen. > --- > src/intel/vulkan/anv_private.h | 10 +-- > src/intel/vulkan/genX_cmd_buffer.c | 158 > ++++++++++++++++++++++++++++++------- > 2 files changed, 135 insertions(+), 33 deletions(-) > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h > index f805246..e875705 100644 > --- a/src/intel/vulkan/anv_private.h > +++ b/src/intel/vulkan/anv_private.h > @@ -2476,16 +2476,16 @@ anv_fast_clear_state_entry_size(const struct > anv_device *device) > { > assert(device); > /* Entry contents: > - * +--------------------------------------------+ > - * | clear value dword(s) | needs resolve dword | > - * +--------------------------------------------+ > + * +---------------------------------------------+ > + * | clear value dword(s) | needs resolve dwords | > + * +---------------------------------------------+ > */ > > - /* Ensure that the needs resolve dword is in fact dword-aligned to enable > + /* Ensure that the needs resolve dwords are in fact dword-aligned to > enable > * GPU memcpy operations. > */ > assert(device->isl_dev.ss.clear_value_size % 4 == 0); > - return device->isl_dev.ss.clear_value_size + 4; > + return device->isl_dev.ss.clear_value_size + 8; > } > > static inline struct anv_address > diff --git a/src/intel/vulkan/genX_cmd_buffer.c > b/src/intel/vulkan/genX_cmd_buffer.c > index 6aeffa3..2d47179 100644 > --- a/src/intel/vulkan/genX_cmd_buffer.c > +++ b/src/intel/vulkan/genX_cmd_buffer.c > @@ -404,6 +404,22 @@ transition_depth_buffer(struct anv_cmd_buffer > *cmd_buffer, > 0, 0, 1, hiz_op); > } > > +/** Bitfield representing the needed resolves. > + * > + * This bitfield represents the kinds of compression we may or may not have > in > + * a given image. The ANV_IMAGE_NEEDS_* can be ANDed with the ANV_IMAGE_HAS > + * bits to determine when a given resolve actually needs to happen. > + * > + * For convenience of dealing with MI_PREDICATE, we blow these two bits out > + * into two dwords in the image meatadata page.
s/meatadata/metadata/ > + */ > +enum anv_image_resolve_bits { > + ANV_IMAGE_HAS_FAST_CLEAR_BIT = 0x1, > + ANV_IMAGE_HAS_COMPRESSION_BIT = 0x2, > + ANV_IMAGE_NEEDS_PARTIAL_RESOLVE_BITS = 0x1, > + ANV_IMAGE_NEEDS_FULL_RESOLVE_BITS = 0x3, > +}; > + > #define MI_PREDICATE_SRC0 0x2400 > #define MI_PREDICATE_SRC1 0x2408 > > @@ -411,23 +427,62 @@ transition_depth_buffer(struct anv_cmd_buffer > *cmd_buffer, > * performed properly. > */ > static void > -set_image_needs_resolve(struct anv_cmd_buffer *cmd_buffer, > - const struct anv_image *image, > - VkImageAspectFlagBits aspect, > - unsigned level, bool needs_resolve) > +set_image_needs_resolve_bits(struct anv_cmd_buffer *cmd_buffer, > + const struct anv_image *image, > + VkImageAspectFlagBits aspect, > + unsigned level, > + enum anv_image_resolve_bits set_bits) > { > assert(cmd_buffer && image); > assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); > assert(level < anv_image_aux_levels(image, aspect)); > > - /* The HW docs say that there is no way to guarantee the completion of > - * the following command. We use it nevertheless because it shows no > - * issues in testing is currently being used in the GL driver. > - */ Why are we dropping this comment now? > - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { > - sdi.Address = anv_image_get_needs_resolve_addr(cmd_buffer->device, > - image, aspect, level); > - sdi.ImmediateData = needs_resolve; > + const struct anv_address resolve_flag_addr = > + anv_image_get_needs_resolve_addr(cmd_buffer->device, > + image, aspect, level); > + > + if (set_bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) { > + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { > + sdi.Address = resolve_flag_addr; > + sdi.ImmediateData = 1; > + } > + } > + if (set_bits & ANV_IMAGE_HAS_COMPRESSION_BIT) { > + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { > + sdi.Address = resolve_flag_addr; > + sdi.Address.offset += 4; > + sdi.ImmediateData = 1; > + } > + } > +} > + > +static void > +clear_image_needs_resolve_bits(struct anv_cmd_buffer *cmd_buffer, > + const struct anv_image *image, > + VkImageAspectFlagBits aspect, > + unsigned level, > + enum anv_image_resolve_bits clear_bits) > +{ > + assert(cmd_buffer && image); > + assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); > + assert(level < anv_image_aux_levels(image, aspect)); > + > + const struct anv_address resolve_flag_addr = > + anv_image_get_needs_resolve_addr(cmd_buffer->device, > + image, aspect, level); > + > + if (clear_bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) { > + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { > + sdi.Address = resolve_flag_addr; > + sdi.ImmediateData = 0; > + } > + } > + if (clear_bits & ANV_IMAGE_HAS_COMPRESSION_BIT) { > + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { > + sdi.Address = resolve_flag_addr; > + sdi.Address.offset += 4; > + sdi.ImmediateData = 0; > + } Do set_image_needs_resolve_bits() and clear_image_needs_resolve_bits() deviate in later patches? Now the only difference is the value set for "sdi.ImmediateData". I can't help thinking of having one function and the value as an extra argument. The argument could be an enum documenting for the reader if SET or CLEAR is wanted. Not a big issue, just thinking aloud. Either way the logic itself looks good: Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com> > } > } > > @@ -435,7 +490,8 @@ static void > load_needs_resolve_predicate(struct anv_cmd_buffer *cmd_buffer, > const struct anv_image *image, > VkImageAspectFlagBits aspect, > - unsigned level) > + unsigned level, > + enum anv_image_resolve_bits bits) > { > assert(cmd_buffer && image); > assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); > @@ -450,9 +506,27 @@ load_needs_resolve_predicate(struct anv_cmd_buffer > *cmd_buffer, > */ > emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 , 0); > emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0); > - emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 , 0); > - emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, > - resolve_flag_addr.bo, resolve_flag_addr.offset); > + > + /* Conditionally load the two dwords into the high and low portions of > + * MI_PREDICATE_SRC0. This effectively ANDs the bits passed into this > + * function with the logical bits stored in the metadata page. Because > + * they're split out with one bit per dword, we don't need to use any > + * sort of MI math. > + */ > + if (bits & ANV_IMAGE_HAS_FAST_CLEAR_BIT) { > + emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0, > + resolve_flag_addr.bo, resolve_flag_addr.offset); > + } else { > + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0, 0); > + } > + > + if (bits & ANV_IMAGE_HAS_COMPRESSION_BIT) { > + emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, > + resolve_flag_addr.bo, resolve_flag_addr.offset + 4); > + } else { > + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0); > + } > + > anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) { > mip.LoadOperation = LOAD_LOADINV; > mip.CombineOperation = COMBINE_SET; > @@ -467,6 +541,18 @@ genX(cmd_buffer_mark_image_written)(struct > anv_cmd_buffer *cmd_buffer, > enum isl_aux_usage aux_usage, > unsigned level) > { > + /* The only compression types with more than just fast-clears are MCS, > + * CCS_E, and HiZ. With HiZ we just trust the layout and don't actually > + * track the current fast-clear and compression state. This leaves us > + * with just MCS and CCS_E. > + */ > + if (aux_usage != ISL_AUX_USAGE_CCS_E && > + aux_usage != ISL_AUX_USAGE_MCS) > + return; > + > + set_image_needs_resolve_bits(cmd_buffer, image, > + VK_IMAGE_ASPECT_COLOR_BIT, level, > + ANV_IMAGE_HAS_COMPRESSION_BIT); > } > > static void > @@ -488,8 +574,11 @@ init_fast_clear_state_entry(struct anv_cmd_buffer > *cmd_buffer, > * to return incorrect data. The fast clear data in CCS_D buffers should > * be removed because CCS_D isn't enabled all the time. > */ > - set_image_needs_resolve(cmd_buffer, image, aspect, level, > - aux_usage == ISL_AUX_USAGE_NONE); > + if (aux_usage == ISL_AUX_USAGE_NONE) { > + set_image_needs_resolve_bits(cmd_buffer, image, aspect, level, ~0); > + } else { > + clear_image_needs_resolve_bits(cmd_buffer, image, aspect, level, ~0); > + } > > /* The fast clear value dword(s) will be copied into a surface state > object. > * Ensure that the restrictions of the fields in the dword(s) are > followed. > @@ -812,12 +901,25 @@ transition_color_buffer(struct anv_cmd_buffer > *cmd_buffer, > layer_count = MIN2(layer_count, anv_image_aux_layers(image, aspect, > level)); > } > > - load_needs_resolve_predicate(cmd_buffer, image, aspect, level); > + enum anv_image_resolve_bits resolve_bits; > + switch (resolve_op) { > + case ISL_AUX_OP_FULL_RESOLVE: > + resolve_bits = ANV_IMAGE_NEEDS_FULL_RESOLVE_BITS; > + break; > + case ISL_AUX_OP_PARTIAL_RESOLVE: > + resolve_bits = ANV_IMAGE_NEEDS_PARTIAL_RESOLVE_BITS; > + break; > + default: > + unreachable("Invalid resolve op"); > + } > + load_needs_resolve_predicate(cmd_buffer, image, aspect, level, > + resolve_bits); > > anv_image_ccs_op(cmd_buffer, image, aspect, level, > base_layer, layer_count, resolve_op, true); > > - set_image_needs_resolve(cmd_buffer, image, aspect, level, false); > + clear_image_needs_resolve_bits(cmd_buffer, image, aspect, > + level, resolve_bits); > } > > cmd_buffer->state.pending_pipe_bits |= > @@ -2992,15 +3094,15 @@ cmd_buffer_subpass_sync_fast_clear_values(struct > anv_cmd_buffer *cmd_buffer) > * will match what's in every RENDER_SURFACE_STATE object when > it's > * being used for sampling. > */ > - set_image_needs_resolve(cmd_buffer, iview->image, > - VK_IMAGE_ASPECT_COLOR_BIT, > - iview->planes[0].isl.base_level, > - false); > + clear_image_needs_resolve_bits(cmd_buffer, iview->image, > + VK_IMAGE_ASPECT_COLOR_BIT, > + iview->planes[0].isl.base_level, > + ANV_IMAGE_HAS_FAST_CLEAR_BIT); > } else { > - set_image_needs_resolve(cmd_buffer, iview->image, > - VK_IMAGE_ASPECT_COLOR_BIT, > - iview->planes[0].isl.base_level, > - true); > + set_image_needs_resolve_bits(cmd_buffer, iview->image, > + VK_IMAGE_ASPECT_COLOR_BIT, > + iview->planes[0].isl.base_level, > + ANV_IMAGE_HAS_FAST_CLEAR_BIT); > } > } else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) { > /* The attachment may have been fast-cleared in a previous render > -- > 2.5.0.400.gff86faf > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev