Hi Dave, I'd appreciate if you could split all the ac stuff into a separate patch.
The big issue I see with the patch is that we don't support multisample image stores yet. In particular, you have to copy over the fmask value (and find a way to skip over the fmask lookup in the tex instruction) or clear the fmask just for the dst rect. - Bas On Fri, Mar 9, 2018 at 7:30 AM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > It appears its quite legal to do image copies on multisample > images, however due to a bug in our txf handling and incomplete > tests we never actually noticed we didn't do it properly in radv. > > This patch implements a compute shader to copy multiple samples > of an image to another image. It implements the nir txf_ms_mcs > opcode for getting the fmask value, and then uses that value > to either copy sample 0 to all samples, or iterate across > the valid samples copying them. > > The shader is inspired by one RE'd from AMDVLK. > > Fixes: > dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image* > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/amd/common/ac_nir_to_llvm.c | 54 ++++++--- > src/amd/vulkan/radv_meta_bufimage.c | 229 > ++++++++++++++++++++++++++++++++++-- > src/amd/vulkan/radv_meta_copy.c | 2 +- > src/amd/vulkan/radv_private.h | 2 +- > 4 files changed, 258 insertions(+), 29 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 9b850698608..071f54a5b61 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -2277,6 +2277,7 @@ static LLVMValueRef build_tex_intrinsic(struct > ac_nir_context *ctx, > switch (instr->op) { > case nir_texop_txf: > case nir_texop_txf_ms: > + case nir_texop_txf_ms_mcs: > case nir_texop_samples_identical: > args->opcode = lod_is_zero || > instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? > @@ -3417,6 +3418,24 @@ glsl_is_array_image(const struct glsl_type *type) > } > > > +static LLVMValueRef get_fmask_desc_valid(struct ac_llvm_context *ctx, > + LLVMValueRef fmask_desc_ptr) > +{ > + LLVMValueRef fmask_desc = > + LLVMBuildBitCast(ctx->builder, fmask_desc_ptr, > + ctx->v8i32, ""); > + > + LLVMValueRef fmask_word1 = > + LLVMBuildExtractElement(ctx->builder, fmask_desc, > + ctx->i32_1, ""); > + > + LLVMValueRef word1_is_nonzero = > + LLVMBuildICmp(ctx->builder, LLVMIntNE, > + fmask_word1, ctx->i32_0, ""); > + > + return word1_is_nonzero; > +} > + > /* Adjust the sample index according to FMASK. > * > * For uncompressed MSAA surfaces, FMASK should return 0x76543210, > @@ -3475,17 +3494,7 @@ static LLVMValueRef > adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, > /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK > * resource descriptor is 0 (invalid), > */ > - LLVMValueRef fmask_desc = > - LLVMBuildBitCast(ctx->builder, fmask_desc_ptr, > - ctx->v8i32, ""); > - > - LLVMValueRef fmask_word1 = > - LLVMBuildExtractElement(ctx->builder, fmask_desc, > - ctx->i32_1, ""); > - > - LLVMValueRef word1_is_nonzero = > - LLVMBuildICmp(ctx->builder, LLVMIntNE, > - fmask_word1, ctx->i32_0, ""); > + LLVMValueRef word1_is_nonzero = get_fmask_desc_valid(ctx, > fmask_desc_ptr); > > /* Replace the MSAA sample index. */ > sample_index = > @@ -3518,7 +3527,7 @@ static LLVMValueRef get_image_coords(struct > ac_nir_context *ctx, > bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == > GLSL_SAMPLER_DIM_1D; > count = image_type_to_components_count(dim, is_array); > > - if (is_ms) { > + if (is_ms && instr->intrinsic == nir_intrinsic_image_load) { > LLVMValueRef fmask_load_address[3]; > int chan; > > @@ -4899,7 +4908,7 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx, > if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) > *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, > *samp_ptr); > } > - if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || > + if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || > instr->op == nir_texop_txf_ms_mcs || > instr->op == > nir_texop_samples_identical)) > *fmask_ptr = get_sampler_desc(ctx, instr->texture, > AC_DESC_FMASK, instr, false, false); > } > @@ -5150,7 +5159,7 @@ static void visit_tex(struct ac_nir_context *ctx, > nir_tex_instr *instr) > /* Pack LOD */ > if (lod && ((instr->op == nir_texop_txl || instr->op == > nir_texop_txf) && !lod_is_zero)) { > address[count++] = lod; > - } else if (instr->op == nir_texop_txf_ms && sample_index) { > + } else if ((instr->op == nir_texop_txf_ms || instr->op == > nir_texop_txf) && sample_index) { > address[count++] = sample_index; > } else if(instr->op == nir_texop_txs) { > count = 0; > @@ -5165,7 +5174,8 @@ static void visit_tex(struct ac_nir_context *ctx, > nir_tex_instr *instr) > address[chan], ctx->ac.i32, > ""); > } > > - if (instr->op == nir_texop_samples_identical) { > + if (instr->op == nir_texop_samples_identical || > + instr->op == nir_texop_txf_ms_mcs) { > LLVMValueRef txf_address[4]; > struct ac_image_args txf_args = { 0 }; > unsigned txf_count = count; > @@ -5182,12 +5192,22 @@ static void visit_tex(struct ac_nir_context *ctx, > nir_tex_instr *instr) > result = build_tex_intrinsic(ctx, instr, false, &txf_args); > > result = LLVMBuildExtractElement(ctx->ac.builder, result, > ctx->ac.i32_0, ""); > - result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, > ctx->ac.i32_0); > + > + /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the > FMASK > + * resource descriptor is 0 (invalid), > + */ > + LLVMValueRef word1_is_nonzero = > get_fmask_desc_valid(&ctx->ac, fmask_ptr); > + result = LLVMBuildSelect(ctx->ac.builder, word1_is_nonzero, > + result, instr->op == > nir_texop_samples_identical ? ctx->ac.i32_0 : LLVMConstInt(ctx->ac.i32, > 0x76543210, false), ""); > + > + if (instr->op == nir_texop_samples_identical) > + result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, > ctx->ac.i32_0); > + > goto write_result; > } > > if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS && > - instr->op != nir_texop_txs) { > + instr->op == nir_texop_txf_ms) { > unsigned sample_chan = instr->is_array ? 3 : 2; > address[sample_chan] = > adjust_sample_index_using_fmask(&ctx->ac, > > address[0], > diff --git a/src/amd/vulkan/radv_meta_bufimage.c > b/src/amd/vulkan/radv_meta_bufimage.c > index adf610a933e..534a39f032c 100644 > --- a/src/amd/vulkan/radv_meta_bufimage.c > +++ b/src/amd/vulkan/radv_meta_bufimage.c > @@ -478,6 +478,192 @@ radv_device_finish_meta_btoi_state(struct radv_device > *device) > state->btoi.pipeline_3d, &state->alloc); > } > > +/* > + * for multisample copies we need to copy per the fmask value in the src. > + * - fetch the fmask value using txf_ms_mcs. > + * - if it's 0 all samples are identical, write the 0 sample > + * into all samples. > + * - if it's a mask value have to iterate per sample > + * and write the sample to the correct dest sample. > + */ > +static nir_shader * > +build_nir_itoi_compute_shader_ms(struct radv_device *dev) > +{ > + nir_builder b; > + enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_MS; > + const struct glsl_type *buf_type = glsl_sampler_type(dim, > + false, > + false, > + GLSL_TYPE_FLOAT); > + const struct glsl_type *img_type = glsl_sampler_type(dim, > + false, > + false, > + GLSL_TYPE_FLOAT); > + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); > + b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_cs_ms"); > + > + b.shader->info.cs.local_size[0] = 16; > + b.shader->info.cs.local_size[1] = 16; > + b.shader->info.cs.local_size[2] = 1; > + nir_variable *input_img = nir_variable_create(b.shader, > nir_var_uniform, > + buf_type, "s_tex"); > + input_img->data.descriptor_set = 0; > + input_img->data.binding = 0; > + > + nir_variable *output_img = nir_variable_create(b.shader, > nir_var_uniform, > + img_type, "out_img"); > + output_img->data.descriptor_set = 0; > + output_img->data.binding = 1; > + > + nir_ssa_def *invoc_id = nir_load_system_value(&b, > nir_intrinsic_load_local_invocation_id, 0); > + nir_ssa_def *wg_id = nir_load_system_value(&b, > nir_intrinsic_load_work_group_id, 0); > + nir_ssa_def *block_size = nir_imm_ivec4(&b, > + > b.shader->info.cs.local_size[0], > + > b.shader->info.cs.local_size[1], > + > b.shader->info.cs.local_size[2], 0); > + > + nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, > block_size), invoc_id); > + > + nir_intrinsic_instr *src_offset = > nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); > + nir_intrinsic_set_base(src_offset, 0); > + nir_intrinsic_set_range(src_offset, 24); > + src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); > + src_offset->num_components = 3; > + nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 3, 32, > "src_offset"); > + nir_builder_instr_insert(&b, &src_offset->instr); > + > + nir_intrinsic_instr *dst_offset = > nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); > + nir_intrinsic_set_base(dst_offset, 0); > + nir_intrinsic_set_range(dst_offset, 24); > + dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12)); > + dst_offset->num_components = 3; > + nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 3, 32, > "dst_offset"); > + nir_builder_instr_insert(&b, &dst_offset->instr); > + > + nir_ssa_def *src_coord = nir_iadd(&b, global_id, > &src_offset->dest.ssa); > + > + nir_ssa_def *dst_coord = nir_iadd(&b, global_id, > &dst_offset->dest.ssa); > + > + nir_tex_instr *mcs_mask = nir_tex_instr_create(b.shader, 2); > + mcs_mask->sampler_dim = dim; > + mcs_mask->op = nir_texop_txf_ms_mcs; > + mcs_mask->src[0].src_type = nir_tex_src_coord; > + mcs_mask->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, > 0x3)); > + mcs_mask->src[1].src_type = nir_tex_src_ms_index; > + mcs_mask->src[1].src = nir_src_for_ssa(nir_channel(&b, src_coord, 2)); > + mcs_mask->dest_type = nir_type_uint32; > + mcs_mask->is_array = false; > + mcs_mask->coord_components = 2; > + mcs_mask->texture = nir_deref_var_create(mcs_mask, input_img); > + mcs_mask->sampler = NULL; > + > + nir_ssa_dest_init(&mcs_mask->instr, &mcs_mask->dest, 4, 32, "mcs"); > + nir_builder_instr_insert(&b, &mcs_mask->instr); > + > + nir_ssa_def *mask_is_zero = nir_ieq(&b, nir_channel(&b, > &mcs_mask->dest.ssa, 0), nir_imm_int(&b, 0)); > + nir_if *if_miz = nir_push_if(&b, mask_is_zero); > + > + /* if all samples are 0 - just load once */ > + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2); > + tex->sampler_dim = dim; > + tex->op = nir_texop_txf_ms; > + tex->src[0].src_type = nir_tex_src_coord; > + tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 0x3)); > + tex->src[1].src_type = nir_tex_src_ms_index; > + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); > + tex->dest_type = nir_type_float; > + tex->is_array = false; > + tex->coord_components = 2; > + tex->texture = nir_deref_var_create(tex, input_img); > + tex->sampler = NULL; > + > + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); > + nir_builder_instr_insert(&b, &tex->instr); > + > + /* src_offset[2] is num samples */ > + /* write out all the samples */ > + nir_variable *loop_count = nir_local_variable_create(b.impl, > glsl_int_type(), "loop_count"); > + nir_variable *sample_idx = nir_local_variable_create(b.impl, > glsl_uint_type(), "sample_idx"); > + nir_store_var(&b, loop_count, nir_channel(&b, &src_offset->dest.ssa, > 2), 0x1); > + nir_store_var(&b, sample_idx, nir_imm_int(&b, 0), 0x1); > + nir_loop *loop1 = nir_push_loop(&b); > + > + nir_ssa_def *current_sample_idx = nir_load_var(&b, sample_idx); > + nir_ssa_def *outval = &tex->dest.ssa; > + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, > nir_intrinsic_image_store); > + store->src[0] = nir_src_for_ssa(dst_coord); > + store->src[1] = nir_src_for_ssa(current_sample_idx); > + store->src[2] = nir_src_for_ssa(outval); > + store->variables[0] = nir_deref_var_create(store, output_img); > + nir_builder_instr_insert(&b, &store->instr); > + current_sample_idx = nir_iadd(&b, current_sample_idx, nir_imm_int(&b, > 1)); > + nir_store_var(&b, sample_idx, current_sample_idx, 0x1); > + > + /* decrement loop count */ > + nir_ssa_def *current_loop_count = nir_load_var(&b, loop_count); > + current_loop_count = nir_isub(&b, current_loop_count, nir_imm_int(&b, > 1)); > + nir_store_var(&b, loop_count, current_loop_count, 0x1); > + > + nir_ssa_def *lcmp = nir_ieq(&b, current_loop_count, nir_imm_int(&b, > 0)); > + nir_if *loop_over = nir_push_if(&b, lcmp); > + nir_jump(&b, nir_jump_break); > + nir_pop_if(&b, loop_over); > + > + nir_pop_loop(&b, loop1); > + > + nir_push_else(&b, if_miz); > + > + nir_variable *sample_mask_var = nir_local_variable_create(b.impl, > glsl_uint_type(), "sample_mask"); > + nir_store_var(&b, loop_count, nir_channel(&b, &src_offset->dest.ssa, > 2), 0x1); > + nir_store_var(&b, sample_mask_var, nir_channel(&b, > &mcs_mask->dest.ssa, 0), 0x1); > + > + nir_loop *loop2 = nir_push_loop(&b); > + nir_ssa_def *sample_mask = nir_load_var(&b, sample_mask_var); > + nir_ssa_def *sample_id = nir_iand(&b, sample_mask, nir_imm_int(&b, > 0xf)); > + tex = nir_tex_instr_create(b.shader, 2); > + tex->sampler_dim = dim; > + tex->op = nir_texop_txf; > + tex->src[0].src_type = nir_tex_src_coord; > + tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 0x3)); > + tex->src[1].src_type = nir_tex_src_ms_index; > + tex->src[1].src = nir_src_for_ssa(sample_id); > + tex->dest_type = nir_type_float; > + tex->is_array = false; > + tex->coord_components = 2; > + tex->texture = nir_deref_var_create(tex, input_img); > + tex->sampler = NULL; > + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); > + nir_builder_instr_insert(&b, &tex->instr); > + > + outval = &tex->dest.ssa; > + store = nir_intrinsic_instr_create(b.shader, > nir_intrinsic_image_store); > + store->src[0] = nir_src_for_ssa(dst_coord); > + store->src[1] = nir_src_for_ssa(sample_id); > + store->src[2] = nir_src_for_ssa(outval); > + store->variables[0] = nir_deref_var_create(store, output_img); > + > + nir_builder_instr_insert(&b, &store->instr); > + > + sample_mask = nir_ushr(&b, sample_mask, nir_imm_int(&b, 4)); > + nir_store_var(&b, sample_mask_var, sample_mask, 0x1); > + > + /* decrement loop count */ > + current_loop_count = nir_load_var(&b, loop_count); > + current_loop_count = nir_isub(&b, current_loop_count, nir_imm_int(&b, > 1)); > + nir_store_var(&b, loop_count, current_loop_count, 0x1); > + > + lcmp = nir_ieq(&b, current_loop_count, nir_imm_int(&b, 0)); > + loop_over = nir_push_if(&b, lcmp); > + nir_jump(&b, nir_jump_break); > + nir_pop_if(&b, loop_over); > + > + nir_pop_loop(&b, loop2); > + > + nir_pop_if(&b, if_miz); > + > + return b.shader; > +} > + > static nir_shader * > build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d) > { > @@ -568,8 +754,10 @@ radv_device_init_meta_itoi_state(struct radv_device > *device) > { > VkResult result; > struct radv_shader_module cs = { .nir = NULL }; > + struct radv_shader_module cs_ms = { .nir = NULL }; > struct radv_shader_module cs_3d = { .nir = NULL }; > cs.nir = build_nir_itoi_compute_shader(device, false); > + cs_ms.nir = build_nir_itoi_compute_shader_ms(device); > if (device->physical_device->rad_info.chip_class >= GFX9) > cs_3d.nir = build_nir_itoi_compute_shader(device, true); > /* > @@ -631,17 +819,33 @@ radv_device_init_meta_itoi_state(struct radv_device > *device) > .pSpecializationInfo = NULL, > }; > > - VkComputePipelineCreateInfo vk_pipeline_info = { > - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, > - .stage = pipeline_shader_stage, > - .flags = 0, > - .layout = device->meta_state.itoi.img_p_layout, > + VkPipelineShaderStageCreateInfo pipeline_shader_stage_ms = { > + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, > + .stage = VK_SHADER_STAGE_COMPUTE_BIT, > + .module = radv_shader_module_to_handle(&cs_ms), > + .pName = "main", > + .pSpecializationInfo = NULL, > + }; > + > + VkComputePipelineCreateInfo vk_pipeline_info[2] = { > + { > + .sType = > VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, > + .stage = pipeline_shader_stage, > + .flags = 0, > + .layout = device->meta_state.itoi.img_p_layout, > + }, > + { > + .sType = > VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, > + .stage = pipeline_shader_stage_ms, > + .flags = 0, > + .layout = device->meta_state.itoi.img_p_layout, > + } > }; > > result = radv_CreateComputePipelines(radv_device_to_handle(device), > > radv_pipeline_cache_to_handle(&device->meta_state.cache), > - 1, &vk_pipeline_info, NULL, > - > &device->meta_state.itoi.pipeline); > + 2, vk_pipeline_info, NULL, > + > device->meta_state.itoi.pipeline); > if (result != VK_SUCCESS) > goto fail; > > @@ -688,7 +892,9 @@ radv_device_finish_meta_itoi_state(struct radv_device > *device) > state->itoi.img_ds_layout, > &state->alloc); > radv_DestroyPipeline(radv_device_to_handle(device), > - state->itoi.pipeline, &state->alloc); > + state->itoi.pipeline[0], &state->alloc); > + radv_DestroyPipeline(radv_device_to_handle(device), > + state->itoi.pipeline[1], &state->alloc); > if (device->physical_device->rad_info.chip_class >= GFX9) > radv_DestroyPipeline(radv_device_to_handle(device), > state->itoi.pipeline_3d, &state->alloc); > @@ -1173,15 +1379,18 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer > *cmd_buffer, > unsigned num_rects, > struct radv_meta_blit2d_rect *rects) > { > - VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline; > + VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[0]; > struct radv_device *device = cmd_buffer->device; > struct radv_image_view src_view, dst_view; > + int num_samples = dst->image->info.samples; > > create_iview(cmd_buffer, src, &src_view); > create_iview(cmd_buffer, dst, &dst_view); > > itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); > > + if (src->image->info.samples > 1) > + pipeline = cmd_buffer->device->meta_state.itoi.pipeline[1]; > if (device->physical_device->rad_info.chip_class >= GFX9 && > src->image->type == VK_IMAGE_TYPE_3D) > pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d; > @@ -1192,7 +1401,7 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer > *cmd_buffer, > unsigned push_constants[6] = { > rects[r].src_x, > rects[r].src_y, > - src->layer, > + num_samples > 1 ? num_samples : src->layer, > rects[r].dst_x, > rects[r].dst_y, > dst->layer, > diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c > index 2a3faa64f1c..d9a67659daf 100644 > --- a/src/amd/vulkan/radv_meta_copy.c > +++ b/src/amd/vulkan/radv_meta_copy.c > @@ -338,7 +338,7 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer, > uint32_t regionCount, > const VkImageCopy *pRegions) > { > - bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE; > + bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE || > dest_image->info.samples > 1; > struct radv_meta_saved_state saved_state; > > /* From the Vulkan 1.0 spec: > diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h > index 0f8ddb2e106..ab513e7dc9b 100644 > --- a/src/amd/vulkan/radv_private.h > +++ b/src/amd/vulkan/radv_private.h > @@ -483,7 +483,7 @@ struct radv_meta_state { > struct { > VkPipelineLayout img_p_layout; > VkDescriptorSetLayout img_ds_layout; > - VkPipeline pipeline; > + VkPipeline pipeline[2]; > VkPipeline pipeline_3d; > } itoi; > struct { > -- > 2.14.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev