On 21/06/18 17:55, Rostislav Pehlivanov wrote: > Signed-off-by: Rostislav Pehlivanov <atomnu...@gmail.com> > --- > configure | 1 + > libavfilter/Makefile | 1 + > libavfilter/allfilters.c | 1 + > libavfilter/vf_avgblur_vulkan.c | 343 ++++++++++++++++++++++++++++++++ > 4 files changed, 346 insertions(+) > create mode 100644 libavfilter/vf_avgblur_vulkan.c
This filter seems to always hang when run on current ANV? $ ./ffmpeg_g -v 55 -y -i in.mp4 -an -init_hw_device vulkan=v:'Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)' -filter_hw_device v -vf 'hwupload,avgblur_vulkan,hwdownload' -c:v libx264 -frames:v 1000 out.mp4 ... [Parsed_avgblur_vulkan_1 @ 0x55f1ba284ac0] Shader linked! Size: 3464 bytes INTEL-MESA: error: ../../../src/intel/vulkan/anv_device.c:2004: GPU hung on one of our command buffers (VK_ERROR_DEVICE_LOST) [AVHWDeviceContext @ 0x55f1b9219b00] Unable to submit command buffer: VK_ERROR_DEVICE_LOST with kernel log "[drm] GPU HANG: ecode 9:0:0x8ed9fff2, in ffmpeg_g [6451], reason: Hang on rcs0, action: reset". It runs on RADV and the output looks plausible, but it's nondeterministic somehow (checksums never match). I think that means the shader program must be racing or contain some undefined behaviour. > ... > + > +static int process_frames(AVFilterContext *avctx, AVVkFrame *out, AVVkFrame > *in) > +{ > + int err; > + AvgBlurVulkanContext *s = avctx->priv; > + int planes = av_pix_fmt_count_planes(s->vkctx.output_format); > + > + VkCommandBufferBeginInfo cmd_start = { > + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, > + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, > + }; > + > + VkComponentMapping null_map = { > + .r = VK_COMPONENT_SWIZZLE_IDENTITY, > + .g = VK_COMPONENT_SWIZZLE_IDENTITY, > + .b = VK_COMPONENT_SWIZZLE_IDENTITY, > + .a = VK_COMPONENT_SWIZZLE_IDENTITY, > + }; > + > + for (int i = 0; i < planes; i++) { > + RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in, > + > ff_vk_plane_rep_fmt(s->vkctx.input_format, i), > + ff_vk_aspect_flags(s->vkctx.input_format, > i), > + null_map, NULL)); > + > + RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, > out, > + > ff_vk_plane_rep_fmt(s->vkctx.output_format, i), > + > ff_vk_aspect_flags(s->vkctx.output_format, i), > + null_map, NULL)); > + > + s->input_images[i].imageLayout = > VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; > + s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; > + } Approximately this fragment seems to be common between all the filters - maybe it should be abstracted into vulkan.c? > + > + ff_vk_update_descriptor_set(avctx, 0); > + > + vkBeginCommandBuffer(s->exec.buf, &cmd_start); > + > + { > + VkImageMemoryBarrier bar[2] = { > + { > + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, > + .srcAccessMask = 0, > + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, > + .oldLayout = in->layout, > + .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, > + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, > + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, > + .image = in->img, > + .subresourceRange.aspectMask = > ff_vk_aspect_flags(s->vkctx.input_format, -1), > + .subresourceRange.levelCount = 1, > + .subresourceRange.layerCount = 1, > + }, > + { > + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, > + .srcAccessMask = 0, > + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, > + .oldLayout = out->layout, > + .newLayout = VK_IMAGE_LAYOUT_GENERAL, > + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, > + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, > + .image = out->img, > + .subresourceRange.aspectMask = > ff_vk_aspect_flags(s->vkctx.output_format, -1), > + .subresourceRange.levelCount = 1, > + .subresourceRange.layerCount = 1, > + }, > + }; > + > + vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, > + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, > + 0, NULL, 0, NULL, 2, bar); > + > + in->layout = bar[0].newLayout; > + in->access = bar[0].dstAccessMask; > + > + out->layout = bar[1].newLayout; > + out->access = bar[1].dstAccessMask; > + } > + > + vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, > s->vkctx.pipeline); > + vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, > s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, > 0, 0); > + vkCmdDispatch(s->exec.buf, > + FFALIGN(s->vkctx.output_width, > s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0], > + FFALIGN(s->vkctx.output_height, > s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1); > + > + vkEndCommandBuffer(s->exec.buf); > + > + VkSubmitInfo s_info = { > + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, > + .commandBufferCount = 1, > + .pCommandBuffers = &s->exec.buf, > + }; > + > + VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence); > + if (ret != VK_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", > + ff_vk_ret2str(ret)); > + return AVERROR_EXTERNAL; > + } else { > + vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, > UINT64_MAX); > + vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence); > + } > + > +fail: > + > + for (int i = 0; i < planes; i++) { > + ff_vk_destroy_imageview(avctx, s->input_images[i].imageView); > + ff_vk_destroy_imageview(avctx, s->output_images[i].imageView); > + } > + > + return err; > +} > + _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel