On 2016-11-19 11:40:08, Jason Ekstrand wrote: > --- > src/intel/vulkan/anv_blorp.c | 102 > ++++++++++++++++++++++++++++++++----- > src/intel/vulkan/anv_private.h | 3 ++ > src/intel/vulkan/genX_cmd_buffer.c | 100 ++++++++++++++++++++++++++++++------ > 3 files changed, 176 insertions(+), 29 deletions(-) > > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c > index 24b98ab..cab1906 100644 > --- a/src/intel/vulkan/anv_blorp.c > +++ b/src/intel/vulkan/anv_blorp.c > @@ -1178,16 +1178,35 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer > *cmd_buffer) > struct blorp_surf surf; > get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, > att_state->aux_usage, &surf); > + surf.clear_color = vk_to_isl_color(att_state->clear_value.color); > > const VkRect2D render_area = cmd_buffer->state.render_area; > > - blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle, > - iview->isl.base_level, > - iview->isl.base_array_layer, fb->layers, > - render_area.offset.x, render_area.offset.y, > - render_area.offset.x + render_area.extent.width, > - render_area.offset.y + render_area.extent.height, > - vk_to_isl_color(att_state->clear_value.color), NULL); > + if (att_state->fast_clear) { > + blorp_fast_clear(&batch, &surf, iview->isl.format, > + iview->isl.base_level, > + iview->isl.base_array_layer, fb->layers, > + render_area.offset.x, render_area.offset.y, > + render_area.offset.x + render_area.extent.width, > + render_area.offset.y + render_area.extent.height); > + > + /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": > + * > + * "After Render target fast clear, pipe-control with color cache > + * write-flush must be issued before sending any DRAW commands on > + * that render target." > + */ > + cmd_buffer->state.pending_pipe_bits |= > + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; > + } else { > + blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle, > + iview->isl.base_level, > + iview->isl.base_array_layer, fb->layers, > + render_area.offset.x, render_area.offset.y, > + render_area.offset.x + render_area.extent.width, > + render_area.offset.y + render_area.extent.height, > + surf.clear_color, NULL); > + } > > att_state->pending_clear_aspects = 0; > } > @@ -1298,10 +1317,12 @@ ccs_resolve_attachment(struct anv_cmd_buffer > *cmd_buffer, > struct anv_attachment_state *att_state = > &cmd_buffer->state.attachments[att]; > > - assert(att_state->aux_usage != ISL_AUX_USAGE_CCS_D); > - if (att_state->aux_usage != ISL_AUX_USAGE_CCS_E) > + if (att_state->aux_usage == ISL_AUX_USAGE_NONE) > return; /* Nothing to resolve */ > > + assert(att_state->aux_usage == ISL_AUX_USAGE_CCS_E || > + att_state->aux_usage == ISL_AUX_USAGE_CCS_D); > + > struct anv_render_pass *pass = cmd_buffer->state.pass; > struct anv_subpass *subpass = cmd_buffer->state.subpass; > unsigned subpass_idx = subpass - pass->subpasses; > @@ -1312,14 +1333,17 @@ ccs_resolve_attachment(struct anv_cmd_buffer > *cmd_buffer, > * of a particular attachment. That way we only resolve once but it's > * still hot in the cache. > */ > + bool found_draw = false; > + enum anv_subpass_usage usage = 0; > for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) { > - enum anv_subpass_usage usage = pass->attachments[att].subpass_usage[s]; > + usage |= pass->attachments[att].subpass_usage[s]; > > if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) { > /* We found another subpass that draws to this attachment. We'll > * wait to resolve until then. > */ > - return; > + found_draw = true; > + break; > } > } > > @@ -1327,12 +1351,60 @@ ccs_resolve_attachment(struct anv_cmd_buffer > *cmd_buffer, > const struct anv_image *image = iview->image; > assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); > > - if (image->aux_usage == ISL_AUX_USAGE_CCS_E) > + enum blorp_fast_clear_op resolve_op = BLORP_FAST_CLEAR_OP_NONE; > + if (!found_draw) { > + /* This is the last subpass that writes to this attachment so we need > to > + * resolve here. Ideally, we would like to only resolve if the storeOp > + * is set to VK_ATTACHMENT_STORE_OP_STORE. However, we need to ensure > + * that the CCS bits are set to "resolved" because there may be copy or > + * blit operations (which may ignore CCS) between now and the next time > + * we render and we need to ensure that anything they write will be > + * respected in the next render. Unfortunately, the hardware does not > + * provide us with any sort of "invalidate" pass that sets the CCS to > + * "resolved" without writing to the render target. > + */ > + if (iview->image->aux_usage != ISL_AUX_USAGE_CCS_E) { > + /* The image destination surface doesn't support compression outside > + * the render pass. We need a full resolve. > + */ > + resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; > + } else if (att_state->fast_clear) { > + /* We don't know what to do with clear colors outside the render > + * pass. We need a partial resolve. > + */ > + resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL; > + } else { > + /* The image "natively" supports all the compression we care about > + * and we don't need to resolve at all. If this is the case, we > also > + * don't need to resolve for any of the input attachment cases > below. > + */ > + } > + } else if (usage & ANV_SUBPASS_USAGE_INPUT) { > + /* Input attachments are clear-color aware so, at least on Sky Lake, we > + * can frequently sample from them with no resolves at all. > + */ > + if (att_state->aux_usage != att_state->input_aux_usage) { > + assert(att_state->input_aux_usage == ISL_AUX_USAGE_NONE); > + resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL; > + } else if (!att_state->clear_color_is_zero_one) { > + /* Sky Lake PRM, Vol. 2d, RENDER_SURFACE_STATE::Red Clear Color: > + * > + * "If Number of Multisamples is MULTISAMPLECOUNT_1 AND if this > RT > + * is fast cleared with non-0/1 clear value, this RT must be > + * partially resolved (refer to Partial Resolve operation) before > + * binding this surface to Sampler." > + */ > + resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL; > + } > + } > + > + if (resolve_op == BLORP_FAST_CLEAR_OP_NONE) > return; > > struct blorp_surf surf; > get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, > att_state->aux_usage, &surf); > + surf.clear_color = vk_to_isl_color(att_state->clear_value.color); > > /* From the Sky Lake PRM Vol. 7, "Render Target Resolve": > * > @@ -1353,12 +1425,14 @@ ccs_resolve_attachment(struct anv_cmd_buffer > *cmd_buffer, > blorp_ccs_resolve(batch, &surf, > iview->isl.base_level, > iview->isl.base_array_layer + layer, > - iview->isl.format, > - BLORP_FAST_CLEAR_OP_RESOLVE_FULL); > + iview->isl.format, resolve_op); > } > > cmd_buffer->state.pending_pipe_bits |= > ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; > + > + /* Once we've done any sort of resolve, we're no longer fast-cleared */ > + att_state->fast_clear = false; > } > > void > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h > index 7b521b1..4e6049c 100644 > --- a/src/intel/vulkan/anv_private.h > +++ b/src/intel/vulkan/anv_private.h > @@ -1087,11 +1087,14 @@ void anv_dynamic_state_copy(struct anv_dynamic_state > *dest, > */ > struct anv_attachment_state { > enum isl_aux_usage aux_usage; > + enum isl_aux_usage input_aux_usage; > struct anv_state color_rt_state; > struct anv_state input_att_state; > > VkImageAspectFlags pending_clear_aspects; > + bool fast_clear; > VkClearValue clear_value; > + bool clear_color_is_zero_one; > }; > > /** State required while building cmd buffer */ > diff --git a/src/intel/vulkan/genX_cmd_buffer.c > b/src/intel/vulkan/genX_cmd_buffer.c > index 1ad28fd..38579ce 100644 > --- a/src/intel/vulkan/genX_cmd_buffer.c > +++ b/src/intel/vulkan/genX_cmd_buffer.c > @@ -191,23 +191,87 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer, > } > } > > -static enum isl_aux_usage > -fb_attachment_get_aux_usage(struct anv_device *device, > - struct anv_framebuffer *fb, > - uint32_t attachment) > +static bool > +color_is_zero_one(VkClearColorValue value, enum isl_format format) > { > - struct anv_image_view *iview = fb->attachments[attachment]; > + if (isl_format_has_int_channel(format)) { > + for (unsigned i = 0; i < 4; i++) { > + if (value.int32[i] != 0 && value.int32[i] != 1) > + return false; > + } > + } else { > + for (unsigned i = 0; i < 4; i++) { > + if (value.float32[i] != 0.0f && value.float32[i] != 1.0f) > + return false; > + } > + } > > - if (iview->image->aux_surface.isl.size == 0) > - return ISL_AUX_USAGE_NONE; /* No aux surface */ > + return true; > +} > + > +static void > +color_attachment_compute_aux_usage(struct anv_device *device, > + struct anv_attachment_state *att_state, > + struct anv_image_view *iview, > + VkRect2D render_area, > + union isl_color_value *fast_clear_color) > +{ > + if (iview->image->aux_surface.isl.size == 0) { > + att_state->aux_usage = ISL_AUX_USAGE_NONE; > + att_state->input_aux_usage = ISL_AUX_USAGE_NONE; > + att_state->fast_clear = false; > + return; > + } > > assert(iview->image->aux_surface.isl.usage & ISL_SURF_USAGE_CCS_BIT); > > - if (isl_format_supports_lossless_compression(&device->info, > - iview->isl.format)) > - return ISL_AUX_USAGE_CCS_E; > + att_state->clear_color_is_zero_one = > + color_is_zero_one(att_state->clear_value.color, iview->isl.format); > + > + if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { > + /* Start off assuming fast clears are possible */ > + att_state->fast_clear = true; > > - return ISL_AUX_USAGE_NONE; > + /* Potentially, we could do partial fast-clears but doing so has crazy > + * alignment restrictions. It's easier to just restrict to full size > + * fast clears for now. > + */ > + if (render_area.offset.x != 0 || > + render_area.offset.y != 0 || > + render_area.extent.width != iview->extent.width || > + render_area.extent.height != iview->extent.height) > + att_state->fast_clear = false; > + > + if (att_state->fast_clear) { > + memcpy(fast_clear_color->u32, att_state->clear_value.color.uint32, > + sizeof(fast_clear_color->u32));
Maybe vk_to_isl_color from patch 2 should be available to all of anv? -Jordan > + } > + } else { > + att_state->fast_clear = false; > + } > + > + if (isl_format_supports_lossless_compression(&device->info, > + iview->isl.format)) { > + att_state->aux_usage = ISL_AUX_USAGE_CCS_E; > + att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E; > + } else if (att_state->fast_clear) { > + att_state->aux_usage = ISL_AUX_USAGE_CCS_D; > + /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode: > + * > + * "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D > + * setting is only allowed if Surface Format supported for Fast > + * Clear. In addition, if the surface is bound to the sampling > + * engine, Surface Format must be supported for Render Target > + * Compression for surfaces bound to the sampling engine." > + * > + * In other words, we can't sample from a fast-cleared image if it > + * doesn't also support color compression. > + */ > + att_state->input_aux_usage = ISL_AUX_USAGE_NONE; > + } else { > + att_state->aux_usage = ISL_AUX_USAGE_NONE; > + att_state->input_aux_usage = ISL_AUX_USAGE_NONE; > + } > } > > static bool > @@ -350,9 +414,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer > *cmd_buffer, > struct anv_image_view *iview = framebuffer->attachments[i]; > assert(iview->vk_format == att->format); > > + union isl_color_value clear_color = { .u32 = { 0, } }; > if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { > - state->attachments[i].aux_usage = > - fb_attachment_get_aux_usage(cmd_buffer->device, framebuffer, > i); > + color_attachment_compute_aux_usage(cmd_buffer->device, > + &state->attachments[i], > + iview, begin->renderArea, > + &clear_color); > > struct isl_view view = iview->isl; > view.usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; > @@ -362,6 +429,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer > *cmd_buffer, > .view = &view, > .aux_surf = &iview->image->aux_surface.isl, > .aux_usage = state->attachments[i].aux_usage, > + .clear_color = clear_color, > .mocs = cmd_buffer->device->default_mocs); > > add_image_view_relocs(cmd_buffer, iview, > @@ -369,6 +437,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer > *cmd_buffer, > state->attachments[i].color_rt_state); > } else { > state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE; > + state->attachments[i].input_aux_usage = ISL_AUX_USAGE_NONE; > } > > if (need_input_attachment_state(&pass->attachments[i])) { > @@ -386,11 +455,12 @@ genX(cmd_buffer_setup_attachments)(struct > anv_cmd_buffer *cmd_buffer, > .surf = surf, > .view = &view, > .aux_surf = &iview->image->aux_surface.isl, > - .aux_usage = state->attachments[i].aux_usage, > + .aux_usage = > state->attachments[i].input_aux_usage, > + .clear_color = clear_color, > .mocs = cmd_buffer->device->default_mocs); > > add_image_view_relocs(cmd_buffer, iview, > - state->attachments[i].aux_usage, > + state->attachments[i].input_aux_usage, > state->attachments[i].input_att_state); > } > } > -- > 2.5.0.400.gff86faf > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev