On Tue, Nov 28, 2017 at 10:13:52AM -0800, Jason Ekstrand wrote: > On Tue, Nov 28, 2017 at 10:07 AM, Jason Ekstrand <ja...@jlekstrand.net> > wrote: > > > This patch causes a perf drop in sascha gears. I'm investigating. > > > > Found it! Read below. > > > > On Mon, Nov 27, 2017 at 7:06 PM, Jason Ekstrand <ja...@jlekstrand.net> > > wrote: > > > >> --- > >> src/intel/vulkan/genX_cmd_buffer.c | 187 +++++++++++++----------------- > >> ------- > >> 1 file changed, 65 insertions(+), 122 deletions(-) > >> > >> diff --git a/src/intel/vulkan/genX_cmd_buffer.c > >> b/src/intel/vulkan/genX_cmd_buffer.c > >> index 7901b0c..2c4ab38 100644 > >> --- a/src/intel/vulkan/genX_cmd_buffer.c > >> +++ b/src/intel/vulkan/genX_cmd_buffer.c > >> @@ -2982,120 +2982,6 @@ cmd_buffer_emit_depth_stencil(struct > >> anv_cmd_buffer *cmd_buffer) > >> cmd_buffer->state.hiz_enabled = info.hiz_usage == ISL_AUX_USAGE_HIZ; > >> } > >> > >> - > >> -/** > >> - * @brief Perform any layout transitions required at the beginning > >> and/or end > >> - * of the current subpass for depth buffers. > >> - * > >> - * TODO: Consider preprocessing the attachment reference array at render > >> pass > >> - * create time to determine if no layout transition is needed at > >> the > >> - * beginning and/or end of each subpass. > >> - * > >> - * @param cmd_buffer The command buffer the transition is happening > >> within. > >> - * @param subpass_end If true, marks that the transition is happening at > >> the > >> - * end of the subpass. > >> - */ > >> -static void > >> -cmd_buffer_subpass_transition_layouts(struct anv_cmd_buffer * const > >> cmd_buffer, > >> - const bool subpass_end) > >> -{ > >> - /* We need a non-NULL command buffer. */ > >> - assert(cmd_buffer); > >> - > >> - const struct anv_cmd_state * const cmd_state = &cmd_buffer->state; > >> - const struct anv_subpass * const subpass = cmd_state->subpass; > >> - > >> - /* This function must be called within a subpass. */ > >> - assert(subpass); > >> - > >> - /* If there are attachment references, the array shouldn't be NULL. > >> - */ > >> - if (subpass->attachment_count > 0) > >> - assert(subpass->attachments); > >> - > >> - /* Iterate over the array of attachment references. */ > >> - for (const VkAttachmentReference *att_ref = subpass->attachments; > >> - att_ref < subpass->attachments + subpass->attachment_count; > >> att_ref++) { > >> - > >> - /* If the attachment is unused, we can't perform a layout > >> transition. */ > >> - if (att_ref->attachment == VK_ATTACHMENT_UNUSED) > >> - continue; > >> - > >> - /* This attachment index shouldn't go out of bounds. */ > >> - assert(att_ref->attachment < cmd_state->pass->attachment_count); > >> - > >> - const struct anv_render_pass_attachment * const att_desc = > >> - &cmd_state->pass->attachments[att_ref->attachment]; > >> - struct anv_attachment_state * const att_state = > >> - &cmd_buffer->state.attachments[att_ref->attachment]; > >> - > >> - /* The attachment should not be used in a subpass after its last. > >> */ > >> - assert(att_desc->last_subpass_idx >= > >> anv_get_subpass_id(cmd_state)); > >> - > >> - if (subpass_end && anv_get_subpass_id(cmd_state) < > >> - att_desc->last_subpass_idx) { > >> - /* We're calling this function on a buffer twice in one subpass > >> and > >> - * this is not the last use of the buffer. The layout should > >> not have > >> - * changed from the first call and no transition is necessary. > >> - */ > >> - assert(att_state->current_layout == att_ref->layout || > >> - att_state->current_layout == > >> - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); > >> - continue; > >> - } > >> - > >> - /* The attachment index must be less than the number of attachments > >> - * within the framebuffer. > >> - */ > >> - assert(att_ref->attachment < cmd_state->framebuffer->attach > >> ment_count); > >> - > >> - const struct anv_image_view * const iview = > >> - cmd_state->framebuffer->attachments[att_ref->attachment]; > >> - const struct anv_image * const image = iview->image; > >> - > >> - /* Get the appropriate target layout for this attachment. */ > >> - VkImageLayout target_layout; > >> - > >> - /* A resolve is necessary before use as an input attachment if the > >> clear > >> - * color or auxiliary buffer usage isn't supported by the sampler. > >> - */ > >> - const bool input_needs_resolve = > >> - (att_state->fast_clear && !att_state->clear_color_is_zero_one) > >> || > >> - att_state->input_aux_usage != att_state->aux_usage; > >> - if (subpass_end) { > >> - target_layout = att_desc->final_layout; > >> - } else if (iview->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV > >> && > >> - !input_needs_resolve) { > >> - /* Layout transitions before the final only help to enable > >> sampling as > >> - * an input attachment. If the input attachment supports > >> sampling > >> - * using the auxiliary surface, we can skip such transitions by > >> making > >> - * the target layout one that is CCS-aware. > >> - */ > >> - target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; > >> - } else { > >> - target_layout = att_ref->layout; > >> - } > >> - > >> - /* Perform the layout transition. */ > >> - if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { > >> - transition_depth_buffer(cmd_buffer, image, > >> - att_state->current_layout, > >> target_layout); > >> - att_state->aux_usage = > >> - anv_layout_to_aux_usage(&cmd_buffer->device->info, image, > >> - VK_IMAGE_ASPECT_DEPTH_BIT, > >> target_layout); > >> - } else if (image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { > >> - assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); > >> - transition_color_buffer(cmd_buffer, image, > >> VK_IMAGE_ASPECT_COLOR_BIT, > >> - iview->planes[0].isl.base_level, 1, > >> - iview->planes[0].isl.base_array_layer, > >> - iview->planes[0].isl.array_len, > >> - att_state->current_layout, > >> target_layout); > >> - } > >> - > >> - att_state->current_layout = target_layout; > >> - } > >> -} > >> - > >> static void > >> cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, > >> uint32_t subpass_id) > >> @@ -3120,11 +3006,6 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer > >> *cmd_buffer, > >> cmd_buffer->state.pending_pipe_bits |= > >> cmd_buffer->state.pass->subpass_flushes[subpass_id]; > >> > >> - /* Perform transitions to the subpass layout before any writes have > >> - * occurred. > >> - */ > >> - cmd_buffer_subpass_transition_layouts(cmd_buffer, false); > >> - > >> VkRect2D render_area = cmd_buffer->state.render_area; > >> struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; > >> > >> @@ -3139,6 +3020,39 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer > >> *cmd_buffer, > >> struct anv_image_view *iview = fb->attachments[a]; > >> const struct anv_image *image = iview->image; > >> > >> + /* A resolve is necessary before use as an input attachment if the > >> clear > >> + * color or auxiliary buffer usage isn't supported by the sampler. > >> + */ > >> + const bool input_needs_resolve = > >> + (att_state->fast_clear && !att_state->clear_color_is_zero_one) > >> || > >> + att_state->input_aux_usage != att_state->aux_usage; > >> + > >> + VkImageLayout target_layout; > >> + if (iview->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV && > >> + !input_needs_resolve) { > >> + /* Layout transitions before the final only help to enable > >> sampling > >> + * as an input attachment. If the input attachment supports > >> sampling > >> + * using the auxiliary surface, we can skip such transitions by > >> + * making the target layout one that is CCS-aware. > >> + */ > >> + target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; > >> + } else { > >> + target_layout = subpass->attachments[i].layout; > >> + } > >> + > >> + if (image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { > >> + assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); > >> + transition_color_buffer(cmd_buffer, image, > >> VK_IMAGE_ASPECT_COLOR_BIT, > >> + iview->planes[0].isl.base_level, 1, > >> + iview->planes[0].isl.base_array_layer, > >> + iview->planes[0].isl.array_len, > >> + att_state->current_layout, > >> target_layout); > >> + } else if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { > >> + transition_depth_buffer(cmd_buffer, image, > >> + att_state->current_layout, > >> target_layout); > >> > > > I accidentally dropped a bit here: > > + att_state->aux_usage = > + anv_layout_to_aux_usage(&cmd_buffer->device->info, image, > + VK_IMAGE_ASPECT_DEPTH_BIT, > target_layout); > > I've added it locally. > > > > + } > >> + att_state->current_layout = target_layout; > >> + > >> if (att_state->pending_clear_aspects & VK_IMAGE_ASPECT_COLOR_BIT) > >> { > >> assert(att_state->pending_clear_aspects == > >> VK_IMAGE_ASPECT_COLOR_BIT); > >> > >> @@ -3251,13 +3165,42 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer > >> *cmd_buffer, > >> static void > >> cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer) > >> { > >> + struct anv_cmd_state *cmd_state = &cmd_buffer->state; > >> + struct anv_subpass *subpass = cmd_state->subpass; > >> uint32_t subpass_id = anv_get_subpass_id(&cmd_buffer->state); > >> > >> anv_cmd_buffer_resolve_subpass(cmd_buffer); > >> > >> - /* Perform transitions to the final layout after all writes have > >> occurred. > >> - */ > >> - cmd_buffer_subpass_transition_layouts(cmd_buffer, true); > >> + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; > >> + for (uint32_t i = 0; i < subpass->attachment_count; ++i) { > >> + const uint32_t a = subpass->attachments[i].attachment; > >> + if (a == VK_ATTACHMENT_UNUSED) > >> + continue; > >> + > >> + if (cmd_state->pass->attachments[a].last_subpass_idx != > >> subpass_id) > >> + continue; > >> + > >> + assert(a < cmd_state->pass->attachment_count); > >> + struct anv_attachment_state *att_state = > >> &cmd_state->attachments[a]; > >> + struct anv_image_view *iview = fb->attachments[a]; > >> + const struct anv_image *image = iview->image; > >> + > >> + /* Transition the image into the final layout for this render pass > >> */ > >> + VkImageLayout target_layout = > >> + cmd_state->pass->attachments[a].final_layout; > >> + > >> + if (image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { > >> + assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); > >> + transition_color_buffer(cmd_buffer, image, > >> VK_IMAGE_ASPECT_COLOR_BIT, > >> + iview->planes[0].isl.base_level, 1, > >> + iview->planes[0].isl.base_array_layer, > >> + iview->planes[0].isl.array_len, > >> + att_state->current_layout, > >> target_layout); > >> + } else if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { > >> + transition_depth_buffer(cmd_buffer, image, > >> + att_state->current_layout, > >> target_layout);
It looks to me original had it in this case also? > >> + } > >> + } > >> > >> /* Accumulate any subpass flushes that need to happen after the > >> subpass. > >> * Yes, they do get accumulated twice in the NextSubpass case but > >> since > >> -- > >> 2.5.0.400.gff86faf > >> > >> > > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev