[FFmpeg-devel] [PATCH 3/6] lavfi: add a Vulkan avgblur filter
Mark Thompson
sw at jkqxz.net
Sun Sep 2 23:40:58 EEST 2018
On 21/06/18 17:55, Rostislav Pehlivanov wrote:
> Signed-off-by: Rostislav Pehlivanov <atomnuker at gmail.com>
> ---
> configure | 1 +
> libavfilter/Makefile | 1 +
> libavfilter/allfilters.c | 1 +
> libavfilter/vf_avgblur_vulkan.c | 343 ++++++++++++++++++++++++++++++++
> 4 files changed, 346 insertions(+)
> create mode 100644 libavfilter/vf_avgblur_vulkan.c
This filter seems to always hang when run on current ANV?
$ ./ffmpeg_g -v 55 -y -i in.mp4 -an -init_hw_device vulkan=v:'Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)' -filter_hw_device v -vf 'hwupload,avgblur_vulkan,hwdownload' -c:v libx264 -frames:v 1000 out.mp4
...
[Parsed_avgblur_vulkan_1 @ 0x55f1ba284ac0] Shader linked! Size: 3464 bytes
INTEL-MESA: error: ../../../src/intel/vulkan/anv_device.c:2004: GPU hung on one of our command buffers (VK_ERROR_DEVICE_LOST)
[AVHWDeviceContext @ 0x55f1b9219b00] Unable to submit command buffer: VK_ERROR_DEVICE_LOST
with kernel log "[drm] GPU HANG: ecode 9:0:0x8ed9fff2, in ffmpeg_g [6451], reason: Hang on rcs0, action: reset".
It runs on RADV and the output looks plausible, but it's nondeterministic somehow (checksums never match). I think that means the shader program must be racing or contain some undefined behaviour.
> ...
> +
> +static int process_frames(AVFilterContext *avctx, AVVkFrame *out, AVVkFrame *in)
> +{
> + int err;
> + AvgBlurVulkanContext *s = avctx->priv;
> + int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
> +
> + VkCommandBufferBeginInfo cmd_start = {
> + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
> + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
> + };
> +
> + VkComponentMapping null_map = {
> + .r = VK_COMPONENT_SWIZZLE_IDENTITY,
> + .g = VK_COMPONENT_SWIZZLE_IDENTITY,
> + .b = VK_COMPONENT_SWIZZLE_IDENTITY,
> + .a = VK_COMPONENT_SWIZZLE_IDENTITY,
> + };
> +
> + for (int i = 0; i < planes; i++) {
> + RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in,
> + ff_vk_plane_rep_fmt(s->vkctx.input_format, i),
> + ff_vk_aspect_flags(s->vkctx.input_format, i),
> + null_map, NULL));
> +
> + RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
> + ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
> + ff_vk_aspect_flags(s->vkctx.output_format, i),
> + null_map, NULL));
> +
> + s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
> + s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
> + }
Approximately this fragment seems to be common between all the filters - maybe it should be abstracted into vulkan.c?
> +
> + ff_vk_update_descriptor_set(avctx, 0);
> +
> + vkBeginCommandBuffer(s->exec.buf, &cmd_start);
> +
> + {
> + VkImageMemoryBarrier bar[2] = {
> + {
> + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
> + .srcAccessMask = 0,
> + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
> + .oldLayout = in->layout,
> + .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
> + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
> + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
> + .image = in->img,
> + .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
> + .subresourceRange.levelCount = 1,
> + .subresourceRange.layerCount = 1,
> + },
> + {
> + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
> + .srcAccessMask = 0,
> + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
> + .oldLayout = out->layout,
> + .newLayout = VK_IMAGE_LAYOUT_GENERAL,
> + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
> + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
> + .image = out->img,
> + .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.output_format, -1),
> + .subresourceRange.levelCount = 1,
> + .subresourceRange.layerCount = 1,
> + },
> + };
> +
> + vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
> + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
> + 0, NULL, 0, NULL, 2, bar);
> +
> + in->layout = bar[0].newLayout;
> + in->access = bar[0].dstAccessMask;
> +
> + out->layout = bar[1].newLayout;
> + out->access = bar[1].dstAccessMask;
> + }
> +
> + vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
> + vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
> + vkCmdDispatch(s->exec.buf,
> + FFALIGN(s->vkctx.output_width, s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
> + FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
> +
> + vkEndCommandBuffer(s->exec.buf);
> +
> + VkSubmitInfo s_info = {
> + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
> + .commandBufferCount = 1,
> + .pCommandBuffers = &s->exec.buf,
> + };
> +
> + VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
> + if (ret != VK_SUCCESS) {
> + av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
> + ff_vk_ret2str(ret));
> + return AVERROR_EXTERNAL;
> + } else {
> + vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
> + vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
> + }
> +
> +fail:
> +
> + for (int i = 0; i < planes; i++) {
> + ff_vk_destroy_imageview(avctx, s->input_images[i].imageView);
> + ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
> + }
> +
> + return err;
> +}
> +
More information about the ffmpeg-devel
mailing list