May 11, 2023, 18:04 by an...@khirnov.net: > Quoting Lynne (2023-04-24 17:56:38) > >> From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001 >> From: Lynne <d...@lynne.ee> >> Date: Tue, 15 Mar 2022 23:00:32 +0100 >> Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame >> operations >> >> --- >> libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++---------- >> libavutil/hwcontext_vulkan.h | 40 +++++++- >> 2 files changed, 167 insertions(+), 49 deletions(-) >> >> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c >> index 894b4b83f3..b0db59b2d8 100644 >> --- a/libavutil/hwcontext_vulkan.c >> +++ b/libavutil/hwcontext_vulkan.c >> @@ -27,6 +27,7 @@ >> #include <dlfcn.h> >> #endif >> >> +#include <pthread.h> >> #include <unistd.h> >> >> #include "config.h" >> @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv { >> VkPhysicalDeviceVulkan13Features device_features_1_3; >> >> /* Queues */ >> - uint32_t qfs[5]; >> - int num_qfs; >> + pthread_mutex_t **qf_mutex; >> + int nb_tot_qfs; >> + uint32_t img_qfs[5]; >> + int nb_img_qfs; >> > > This patch would be so much more readable without random renamings. >
They're not random, the meaning of each variable is different to what they meant before. nb_img_qfs is the total number of enabled queue familiesnb_tot_qfs is the total number of queue families listed by the driver >> /* Debug callback */ >> VkDebugUtilsMessengerEXT debug_ctx; >> @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv { >> } VulkanFramesPriv; >> >> typedef struct AVVkFrameInternal { >> + pthread_mutex_t update_mutex; >> > > As far as I can see, none of the mutices you're adding here are > ever destroyed. > Fixed. >> + >> #if CONFIG_CUDA >> /* Importing external memory into cuda is really expensive so we keep the >> * memory imported all the time */ >> @@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx) >> if (p->libvulkan) >> dlclose(p->libvulkan); >> >> + for (int i = 0; i < p->nb_tot_qfs; i++) >> + av_freep(&p->qf_mutex[i]); >> + av_freep(&p->qf_mutex); >> + >> RELEASE_PROPS(hwctx->enabled_inst_extensions, >> hwctx->nb_enabled_inst_extensions); >> RELEASE_PROPS(hwctx->enabled_dev_extensions, >> hwctx->nb_enabled_dev_extensions); >> } >> @@ -1436,13 +1445,26 @@ end: >> return err; >> } >> >> +static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index) >> > > It'd be nice to be consistent with types. > These are uint32 in vulkan, no? > Fixed. Though, they're more closely related to the number of queue families given in the hwcontext, which are 32-bit ints. >> +{ >> + VulkanDevicePriv *p = ctx->internal->priv; >> + pthread_mutex_lock(&p->qf_mutex[queue_family][index]); >> +} >> + >> +static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int >> index) >> +{ >> + VulkanDevicePriv *p = ctx->internal->priv; >> + pthread_mutex_unlock(&p->qf_mutex[queue_family][index]); >> +} >> + >> static int vulkan_device_init(AVHWDeviceContext *ctx) >> { >> int err; >> - uint32_t queue_num; >> + uint32_t qf_num; >> AVVulkanDeviceContext *hwctx = ctx->hwctx; >> VulkanDevicePriv *p = ctx->internal->priv; >> FFVulkanFunctions *vk = &p->vkfn; >> + VkQueueFamilyProperties *qf; >> int graph_index, comp_index, tx_index, enc_index, dec_index; >> >> /* Set device extension flags */ >> @@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) >> p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de); >> p->dev_is_intel = (p->props.properties.vendorID == 0x8086); >> >> - vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, >> NULL); >> - if (!queue_num) { >> + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, >> NULL); >> + if (!qf_num) { >> av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); >> return AVERROR_EXTERNAL; >> } >> >> + qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties)); >> + if (!qf) >> + return AVERROR(ENOMEM); >> + >> + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, >> qf); >> + >> + p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex)); >> > > av_calloc() > >> + if (!p->qf_mutex) >> + return AVERROR(ENOMEM); >> + p->nb_tot_qfs = qf_num; >> + >> + for (int i = 0; i < qf_num; i++) { >> + p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex)); >> > > av_calloc() > >> + if (!p->qf_mutex[i]) >> + return AVERROR(ENOMEM); >> + for (int j = 0; j < qf[i].queueCount; j++) >> + pthread_mutex_init(&p->qf_mutex[i][j], NULL); >> > > Should be checked. > Fixed all three. >> + } >> + >> graph_index = hwctx->queue_family_index; >> comp_index = hwctx->queue_family_comp_index; >> tx_index = hwctx->queue_family_tx_index; >> @@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) >> return AVERROR(EINVAL); >> \ >> } else if (fidx < 0 || ctx_qf < 0) { >> \ >> break; >> \ >> - } else if (ctx_qf >= queue_num) { >> \ >> + } else if (ctx_qf >= qf_num) { >> \ >> av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i >> families)!\n", \ >> - type, ctx_qf, queue_num); >> \ >> + type, ctx_qf, qf_num); >> \ >> return AVERROR(EINVAL); >> \ >> } >> \ >> \ >> @@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) >> tx_index = (ctx_qf == tx_index) ? -1 : tx_index; >> \ >> enc_index = (ctx_qf == enc_index) ? -1 : enc_index; >> \ >> dec_index = (ctx_qf == dec_index) ? -1 : dec_index; >> \ >> - p->qfs[p->num_qfs++] = ctx_qf; >> \ >> + p->img_qfs[p->nb_img_qfs++] = ctx_qf; >> \ >> } while (0) >> >> CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, >> hwctx->nb_graphics_queues); >> @@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) >> >> #undef CHECK_QUEUE >> >> + if (!hwctx->lock_queue) >> + hwctx->lock_queue = lock_queue; >> + if (!hwctx->unlock_queue) >> + hwctx->unlock_queue = unlock_queue; >> + >> /* Get device capabilities */ >> vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); >> >> @@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f) >> { >> AVVkFrameInternal *internal = f->internal; >> >> - if (!internal) >> - return; >> - >> #if CONFIG_CUDA >> if (internal->cuda_fc_ref) { >> AVHWFramesContext *cuda_fc = (AVHWFramesContext >> *)internal->cuda_fc_ref->data; >> @@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, >> VulkanExecCtx *ectx, >> uint32_t src_qf, dst_qf; >> VkImageLayout new_layout; >> VkAccessFlags new_access; >> + AVVulkanFramesContext *vkfc = hwfc->hwctx; >> const int planes = av_pix_fmt_count_planes(hwfc->sw_format); >> VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; >> FFVulkanFunctions *vk = &p->vkfn; >> + AVFrame tmp = { .data[0] = (uint8_t *)frame }; >> > > ??? > This enables us to use the common dependency/dispatch code. The prepare_frame function is used for both frame initialization and frame import/export queue family transfer operations. In the former case, no AVFrame exists yet, so, as this is purely libavutil code, we create a temporary frame on stack. Otherwise, we'd need to allocate multiple frames somewhere, one for each possible command buffer dispatch. Comment added to commit message. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".