[FFmpeg-devel] [PATCH v3] lavfi: add nlmeans CUDA filter
nlmeans CUDA filter From f4c332c308865a33d42c8a2dfe251625506a30f0 Mon Sep 17 00:00:00 2001 From: Dylan Fernando Date: Sat, 6 Nov 2021 10:30:15 + Subject: [PATCH] lavfi: add nlmeans_cuda filter Signed-off-by: Dylan Fernando --- configure | 2 + doc/filters.texi | 4 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/version.h | 4 +- libavfilter/vf_nlmeans_cuda.c | 883 + libavfilter/vf_nlmeans_cuda.cu | 378 ++ 7 files changed, 1272 insertions(+), 2 deletions(-) create mode 100644 libavfilter/vf_nlmeans_cuda.c create mode 100644 libavfilter/vf_nlmeans_cuda.cu diff --git a/configure b/configure index c01aa480c7..ac756ef630 100755 --- a/configure +++ b/configure @@ -3103,6 +3103,8 @@ thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm" transpose_npp_filter_deps="ffnvcodec libnpp" overlay_cuda_filter_deps="ffnvcodec" overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +nlmeans_cuda_filter_deps="ffnvcodec" +nlmeans_cuda_filter_deps_any="cuda_nvcc cuda_llvm" sharpen_npp_filter_deps="ffnvcodec libnpp" amf_deps_any="libdl LoadLibrary" diff --git a/doc/filters.texi b/doc/filters.texi index b537e421be..52e6208710 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15668,6 +15668,10 @@ Same as @option{r} but for chroma planes. The default value is @var{0} and means automatic. @end table +@section nlmeans_cuda + +Non-local Means denoise filter through CUDA, this filter accepts same options as @ref{nlmeans}. + @section nnedi Deinterlace video using neural network edge directed interpolation. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 552bd4e286..9e8f42c176 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -354,6 +354,8 @@ OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_negate.o OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o +OBJS-$(CONFIG_NLMEANS_CUDA_FILTER) += vf_nlmeans_cuda.o vf_nlmeans_cuda.ptx.o \ +cuda/load_helper.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 667b6fc246..8f812dcd87 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -340,6 +340,7 @@ extern const AVFilter ff_vf_msad; extern const AVFilter ff_vf_negate; extern const AVFilter ff_vf_nlmeans; extern const AVFilter ff_vf_nlmeans_opencl; +extern const AVFilter ff_vf_nlmeans_cuda; extern const AVFilter ff_vf_nnedi; extern const AVFilter ff_vf_noformat; extern const AVFilter ff_vf_noise; diff --git a/libavfilter/version.h b/libavfilter/version.h index 3bd3816698..cb831b4a1c 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -30,8 +30,8 @@ #include "libavutil/version.h" #define LIBAVFILTER_VERSION_MAJOR 8 -#define LIBAVFILTER_VERSION_MINOR 16 -#define LIBAVFILTER_VERSION_MICRO 101 +#define LIBAVFILTER_VERSION_MINOR 17 +#define LIBAVFILTER_VERSION_MICRO 100 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ diff --git a/libavfilter/vf_nlmeans_cuda.c b/libavfilter/vf_nlmeans_cuda.c new file mode 100644 index 00..1c838a8af1 --- /dev/null +++ b/libavfilter/vf_nlmeans_cuda.c @@ -0,0 +1,883 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" + +#include "avfilter.h" +#include "internal.h" + +#include "cuda/load_helper.h" + +static const enum AVPixelFormat supported_formats[] = { +AV_PIX_FMT_NV12, +AV_PIX_FMT_YUV420P, +AV_PIX_FMT_YUV444P +}; + + +#define CHECK
Re: [FFmpeg-devel] [PATCH v3] lavfi: add nlmeans CUDA filter
On Sat, Nov 6, 2021 at 10:34 AM Dylan Fernando wrote: > nlmeans CUDA filter > > > > removed query_formats > From 45dcc1bdc00657bb3613500e131b6fdeb64ac318 Mon Sep 17 00:00:00 2001 From: Dylan Fernando Date: Sat, 6 Nov 2021 17:33:48 + Subject: [PATCH] lavfi: add nlmeans_cuda filter Signed-off-by: Dylan Fernando --- configure | 2 + doc/filters.texi | 4 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/version.h | 4 +- libavfilter/vf_nlmeans_cuda.c | 871 + libavfilter/vf_nlmeans_cuda.cu | 378 ++ 7 files changed, 1260 insertions(+), 2 deletions(-) create mode 100644 libavfilter/vf_nlmeans_cuda.c create mode 100644 libavfilter/vf_nlmeans_cuda.cu diff --git a/configure b/configure index c01aa480c7..ac756ef630 100755 --- a/configure +++ b/configure @@ -3103,6 +3103,8 @@ thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm" transpose_npp_filter_deps="ffnvcodec libnpp" overlay_cuda_filter_deps="ffnvcodec" overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +nlmeans_cuda_filter_deps="ffnvcodec" +nlmeans_cuda_filter_deps_any="cuda_nvcc cuda_llvm" sharpen_npp_filter_deps="ffnvcodec libnpp" amf_deps_any="libdl LoadLibrary" diff --git a/doc/filters.texi b/doc/filters.texi index b537e421be..52e6208710 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15668,6 +15668,10 @@ Same as @option{r} but for chroma planes. The default value is @var{0} and means automatic. @end table +@section nlmeans_cuda + +Non-local Means denoise filter through CUDA, this filter accepts same options as @ref{nlmeans}. + @section nnedi Deinterlace video using neural network edge directed interpolation. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 552bd4e286..9e8f42c176 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -354,6 +354,8 @@ OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_negate.o OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o +OBJS-$(CONFIG_NLMEANS_CUDA_FILTER) += vf_nlmeans_cuda.o vf_nlmeans_cuda.ptx.o \ +cuda/load_helper.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 667b6fc246..8f812dcd87 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -340,6 +340,7 @@ extern const AVFilter ff_vf_msad; extern const AVFilter ff_vf_negate; extern const AVFilter ff_vf_nlmeans; extern const AVFilter ff_vf_nlmeans_opencl; +extern const AVFilter ff_vf_nlmeans_cuda; extern const AVFilter ff_vf_nnedi; extern const AVFilter ff_vf_noformat; extern const AVFilter ff_vf_noise; diff --git a/libavfilter/version.h b/libavfilter/version.h index 3bd3816698..cb831b4a1c 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -30,8 +30,8 @@ #include "libavutil/version.h" #define LIBAVFILTER_VERSION_MAJOR 8 -#define LIBAVFILTER_VERSION_MINOR 16 -#define LIBAVFILTER_VERSION_MICRO 101 +#define LIBAVFILTER_VERSION_MINOR 17 +#define LIBAVFILTER_VERSION_MICRO 100 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ diff --git a/libavfilter/vf_nlmeans_cuda.c b/libavfilter/vf_nlmeans_cuda.c new file mode 100644 index 00..cece797e15 --- /dev/null +++ b/libavfilter/vf_nlmeans_cuda.c @@ -0,0 +1,871 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" + +#include "avfilter.h" +#include "internal.h" + +#include "cuda/load_helper.h" + +static const enum AVPixelFormat supported_formats[]
Re: [FFmpeg-devel] [PATCH v3] lavfi: add nlmeans CUDA filter
On Sun, 7 Nov 2021 at 4:38 am, Dylan Fernando wrote: > > > On Sat, Nov 6, 2021 at 10:34 AM Dylan Fernando > wrote: > >> nlmeans CUDA filter >> >> >> >> removed query_formats >> > Anybody have any feedback for this? ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH v2] lavfi: add nlmeans CUDA filter
On Sun, Sep 5, 2021 at 8:37 AM Timo Rothenpieler wrote: > On 05.09.2021 20:02, Dylan Fernando wrote: > > > > Impossible to convert between the formats supported by the filter 'graph > 0 > > input from stream 0:0' and the filter 'auto_scale_0' > > > > Thread 1 "ffmpeg" received signal SIGSEGV, Segmentation fault. > > 0x556726eb in uninit (ctx=0x5817e800) at > > libavfilter/vf_nlmeans_cuda.c:704 > > 704 CudaFunctions *cu = s->hwctx->internal->cuda_dl; > > Seems like some error state can cause uninit to be called before the > hwctx is properly set up. > So it needs a guard against that case. > > > (gdb) backtrace > > #0 0x556726eb in uninit (ctx=0x5817e800) at > > libavfilter/vf_nlmeans_cuda.c:704 > > #1 0x55742e65 in avfilter_free (filter=0x5817e800) at > > libavfilter/avfilter.c:769 > > #2 0x55744cac in avfilter_graph_free (graph=0x5716ded0) at > > libavfilter/avfiltergraph.c:126 > > #3 0x55707ae0 in cleanup_filtergraph (fg=0x5716dec0) at > > fftools/ffmpeg_filter.c:952 > > #4 configure_filtergraph (fg=fg@entry=0x5716dec0) at > > fftools/ffmpeg_filter.c:1130 > > #5 0x5571b060 in ifilter_send_frame (frame=0x57a72d00, > > ifilter=0x5716db40) at fftools/ffmpeg.c:2242 > > #6 send_frame_to_filters (ist=ist@entry=0x5716c5c0, > > decoded_frame=decoded_frame@entry=0x57a72d00) at > fftools/ffmpeg.c:2323 > > #7 0x5571c204 in decode_video (decode_failed=, > > eof=, duration_pts=, got_output= > out>, pkt=, ist=) > > at fftools/ffmpeg.c:2520 > > #8 process_input_packet (ist=ist@entry=0x5716c5c0, > pkt=0x5716c7c0, > > no_eof=no_eof@entry=0) at fftools/ffmpeg.c:2682 > > #9 0x5571daee in process_input (file_index=) at > > fftools/ffmpeg.c:4636 > > #10 transcode_step () at fftools/ffmpeg.c:4776 > > #11 transcode () at fftools/ffmpeg.c:4830 > > #12 0x556f84a7 in main (argc=, argv= out>) > > at fftools/ffmpeg.c:5035 > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > > > attatched updated patch > From 32239fc2225d4ff5cd8d7d3d20764d34b26e3a92 Mon Sep 17 00:00:00 2001 From: Dylan Fernando Date: Sun, 28 Nov 2021 17:42:55 + Subject: [PATCH] lavfi: add nlmeans_cuda filter Signed-off-by: Dylan Fernando --- configure | 2 + doc/filters.texi | 4 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/version.h | 2 +- libavfilter/vf_nlmeans_cuda.c | 871 + libavfilter/vf_nlmeans_cuda.cu | 378 ++ 7 files changed, 1259 insertions(+), 1 deletion(-) create mode 100644 libavfilter/vf_nlmeans_cuda.c create mode 100644 libavfilter/vf_nlmeans_cuda.cu diff --git a/configure b/configure index 4af36bf80a..1ec0453f44 100755 --- a/configure +++ b/configure @@ -3115,6 +3115,8 @@ thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm" transpose_npp_filter_deps="ffnvcodec libnpp" overlay_cuda_filter_deps="ffnvcodec" overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +nlmeans_cuda_filter_deps="ffnvcodec" +nlmeans_cuda_filter_deps_any="cuda_nvcc cuda_llvm" sharpen_npp_filter_deps="ffnvcodec libnpp" amf_deps_any="libdl LoadLibrary" diff --git a/doc/filters.texi b/doc/filters.texi index 3731a14521..8f6c0ce22c 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15698,6 +15698,10 @@ Same as @option{r} but for chroma planes. The default value is @var{0} and means automatic. @end table +@section nlmeans_cuda + +Non-local Means denoise filter through CUDA, this filter accepts same options as @ref{nlmeans}. + @section nnedi Deinterlace video using neural network edge directed interpolation. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 0e27aeeff6..24d41567c5 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -357,6 +357,8 @@ OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_negate.o OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o +OBJS-$(CONFIG_NLMEANS_CUDA_FILTER) += vf_nlmeans_cuda.o vf_nlmeans_cuda.ptx.o \ +cuda/load_helper.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_
[FFmpeg-devel] [PATCH] lavfi: add gblur_opencl filter
--- configure | 1 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/opencl/gblur.cl | 62 +++ libavfilter/opencl_source.h | 1 + libavfilter/vf_gblur_opencl.c | 370 ++ 6 files changed, 437 insertions(+) create mode 100644 libavfilter/opencl/gblur.cl create mode 100644 libavfilter/vf_gblur_opencl.c diff --git a/configure b/configure index bbeaf2fadc..8c1d3cdf92 100755 --- a/configure +++ b/configure @@ -3451,6 +3451,7 @@ freezedetect_filter_select="scene_sad" frei0r_filter_deps="frei0r libdl" frei0r_src_filter_deps="frei0r libdl" fspp_filter_deps="gpl" +gblur_opencl_filter_deps="opencl" geq_filter_deps="gpl" histeq_filter_deps="gpl" hqdn3d_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index fef6ec5c55..230315ef39 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -243,6 +243,8 @@ OBJS-$(CONFIG_FREEZEDETECT_FILTER) += vf_freezedetect.o OBJS-$(CONFIG_FREI0R_FILTER) += vf_frei0r.o OBJS-$(CONFIG_FSPP_FILTER) += vf_fspp.o OBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o +OBJS-$(CONFIG_GBLUR_OPENCL_FILTER) += vf_gblur_opencl.o opencl.o \ +opencl/gblur.o OBJS-$(CONFIG_GEQ_FILTER)+= vf_geq.o OBJS-$(CONFIG_GRADFUN_FILTER)+= vf_gradfun.o OBJS-$(CONFIG_GRAPHMONITOR_FILTER) += f_graphmonitor.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index c51ae0f3c7..cb0fc051cc 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -229,6 +229,7 @@ extern AVFilter ff_vf_freezedetect; extern AVFilter ff_vf_frei0r; extern AVFilter ff_vf_fspp; extern AVFilter ff_vf_gblur; +extern AVFilter ff_vf_gblur_opencl; extern AVFilter ff_vf_geq; extern AVFilter ff_vf_gradfun; extern AVFilter ff_vf_graphmonitor; diff --git a/libavfilter/opencl/gblur.cl b/libavfilter/opencl/gblur.cl new file mode 100644 index 00..4fece30d4a --- /dev/null +++ b/libavfilter/opencl/gblur.cl @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Dylan Fernando + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +__kernel void gblur_conv_horz(__write_only image2d_t dst, + __read_only image2d_t src, + int coef_matrix_dim, + __constant float *coef_matrix) +{ +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + +const int half_matrix_dim = (coef_matrix_dim / 2); +int2 loc = (int2)(get_global_id(0), get_global_id(1)); +float4 convPix = (float4)(0.0f, 0.0f, 0.0f, 0.0f); + +for (int conv_j = -half_matrix_dim; conv_j <= half_matrix_dim; conv_j++) { +float4 px = read_imagef(src, sampler, loc + (int2)(conv_j, 0)); +convPix += px * coef_matrix[(conv_j + half_matrix_dim)]; +} + +write_imagef(dst, loc, convPix); +} + +__kernel void gblur_conv_vert(__write_only image2d_t dst, + __read_only image2d_t src, + int coef_matrix_dim, + __constant float *coef_matrix) +{ +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + +const int half_matrix_dim = (coef_matrix_dim / 2); +int2 loc = (int2)(get_global_id(0), get_global_id(1)); +float4 convPix = (float4)(0.0f, 0.0f, 0.0f, 0.0f); + +for (int conv_j = -half_matrix_dim; conv_j <= half_matrix_dim; conv_j++) { +float4 px = read_imagef(src, sampler, loc + (int2)(0, conv_j)); +convPix += px * coef_matrix[(conv_j + half_matrix_dim)]; +} + +write_imagef(dst, loc, convPix); +} diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h index 4118138c30..be7e826c4c 100644 --- a/libavfilter/opencl_source.h +++ b/libavfilter/opencl_source.h @@ -20,6 +20,7 @@ #define AVFIL
[FFmpeg-devel] [PATCHv2] lavfi: add gblur_opencl filter
--- configure | 1 + doc/filters.texi | 28 libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/opencl/gblur.cl | 62 +++ libavfilter/opencl_source.h | 1 + libavfilter/vf_gblur_opencl.c | 368 ++ 7 files changed, 463 insertions(+) create mode 100644 libavfilter/opencl/gblur.cl create mode 100644 libavfilter/vf_gblur_opencl.c diff --git a/configure b/configure index bbeaf2fadc..8c1d3cdf92 100755 --- a/configure +++ b/configure @@ -3451,6 +3451,7 @@ freezedetect_filter_select="scene_sad" frei0r_filter_deps="frei0r libdl" frei0r_src_filter_deps="frei0r libdl" fspp_filter_deps="gpl" +gblur_opencl_filter_deps="opencl" geq_filter_deps="gpl" histeq_filter_deps="gpl" hqdn3d_filter_deps="gpl" diff --git a/doc/filters.texi b/doc/filters.texi index 4bf96b6d90..80485fed83 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -19010,6 +19010,34 @@ Apply erosion filter with threshold0 set to 30, threshold1 set 40, threshold2 se @end example @end itemize +@section gblur_opencl + +Apply Gaussian blur filter. + +The filter accepts the following options: + +@table @option +@item sigma +Set horizontal sigma, standard deviation of Gaussian blur. Default is @code{0.5}. + +@item planes +Set which planes to filter. By default all planes are filtered. + +@item sigmaV +Set vertical sigma, if negative it will be same as @code{sigma}. +Default is @code{-1}. +@end table + +@subsection Example + +@itemize +@item +Apply Gaussian blur filter with horizontal and vertical sigma of 0.5. +@example +-i INPUT -vf "hwupload, gblur_opencl=0.5, hwdownload" OUTPUT +@end example +@end itemize + @section overlay_opencl Overlay one video on top of another. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index fef6ec5c55..230315ef39 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -243,6 +243,8 @@ OBJS-$(CONFIG_FREEZEDETECT_FILTER) += vf_freezedetect.o OBJS-$(CONFIG_FREI0R_FILTER) += vf_frei0r.o OBJS-$(CONFIG_FSPP_FILTER) += vf_fspp.o OBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o +OBJS-$(CONFIG_GBLUR_OPENCL_FILTER) += vf_gblur_opencl.o opencl.o \ +opencl/gblur.o OBJS-$(CONFIG_GEQ_FILTER)+= vf_geq.o OBJS-$(CONFIG_GRADFUN_FILTER)+= vf_gradfun.o OBJS-$(CONFIG_GRAPHMONITOR_FILTER) += f_graphmonitor.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index c51ae0f3c7..cb0fc051cc 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -229,6 +229,7 @@ extern AVFilter ff_vf_freezedetect; extern AVFilter ff_vf_frei0r; extern AVFilter ff_vf_fspp; extern AVFilter ff_vf_gblur; +extern AVFilter ff_vf_gblur_opencl; extern AVFilter ff_vf_geq; extern AVFilter ff_vf_gradfun; extern AVFilter ff_vf_graphmonitor; diff --git a/libavfilter/opencl/gblur.cl b/libavfilter/opencl/gblur.cl new file mode 100644 index 00..4fece30d4a --- /dev/null +++ b/libavfilter/opencl/gblur.cl @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 Dylan Fernando + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +__kernel void gblur_conv_horz(__write_only image2d_t dst, + __read_only image2d_t src, + int coef_matrix_dim, + __constant float *coef_matrix) +{ +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + +const int half_matrix_dim = (coef_matrix_dim / 2); +int2 loc = (int2)(get_global_id(0), get_global_id(1)); +float4 convPix = (float4)(0.0f, 0.0f, 0.0f, 0.0f); + +for (int conv_j = -half_matrix_dim; conv_j <= half_matrix_dim; conv_j++) { +float4 px = read_imagef(src, sampler, loc + (int2)(conv_j, 0)); +convPix += px * coef_matrix[(conv_j + half_matrix_dim)]; +} + +write_imagef(dst, loc, convPix); +} + +__kernel void gblur_co
Re: [FFmpeg-devel] [PATCH] lavfi: add gblur_opencl filter
On Thu, 25 Apr 2019 at 10:42 pm, Dylan Fernando wrote: > --- > configure | 1 + > libavfilter/Makefile | 2 + > libavfilter/allfilters.c | 1 + > libavfilter/opencl/gblur.cl | 62 +++ > libavfilter/opencl_source.h | 1 + > libavfilter/vf_gblur_opencl.c | 370 > ++ > 6 files changed, 437 insertions(+) > create mode 100644 libavfilter/opencl/gblur.cl > create mode 100644 libavfilter/vf_gblur_opencl.c > > diff --git a/configure b/configure > index bbeaf2fadc..8c1d3cdf92 100755 > --- a/configure > +++ b/configure > @@ -3451,6 +3451,7 @@ freezedetect_filter_select="scene_sad" > frei0r_filter_deps="frei0r libdl" > frei0r_src_filter_deps="frei0r libdl" > fspp_filter_deps="gpl" > +gblur_opencl_filter_deps="opencl" > geq_filter_deps="gpl" > histeq_filter_deps="gpl" > hqdn3d_filter_deps="gpl" > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index fef6ec5c55..230315ef39 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -243,6 +243,8 @@ OBJS-$(CONFIG_FREEZEDETECT_FILTER) += > vf_freezedetect.o > OBJS-$(CONFIG_FREI0R_FILTER) += vf_frei0r.o > OBJS-$(CONFIG_FSPP_FILTER) += vf_fspp.o > OBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o > +OBJS-$(CONFIG_GBLUR_OPENCL_FILTER) += vf_gblur_opencl.o > opencl.o \ > +opencl/gblur.o > OBJS-$(CONFIG_GEQ_FILTER)+= vf_geq.o > OBJS-$(CONFIG_GRADFUN_FILTER)+= vf_gradfun.o > OBJS-$(CONFIG_GRAPHMONITOR_FILTER) += f_graphmonitor.o > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index c51ae0f3c7..cb0fc051cc 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -229,6 +229,7 @@ extern AVFilter ff_vf_freezedetect; > extern AVFilter ff_vf_frei0r; > extern AVFilter ff_vf_fspp; > extern AVFilter ff_vf_gblur; > +extern AVFilter ff_vf_gblur_opencl; > extern AVFilter ff_vf_geq; > extern AVFilter ff_vf_gradfun; > extern AVFilter ff_vf_graphmonitor; > diff --git a/libavfilter/opencl/gblur.cl b/libavfilter/opencl/gblur.cl > new file mode 100644 > index 00..4fece30d4a > --- /dev/null > +++ b/libavfilter/opencl/gblur.cl > @@ -0,0 +1,62 @@ > +/* > + * Copyright (c) 2018 Dylan Fernando > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > + > +__kernel void gblur_conv_horz(__write_only image2d_t dst, > + __read_only image2d_t src, > + int coef_matrix_dim, > + __constant float *coef_matrix) > +{ > +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | > + CLK_ADDRESS_CLAMP_TO_EDGE | > + CLK_FILTER_NEAREST); > + > +const int half_matrix_dim = (coef_matrix_dim / 2); > +int2 loc = (int2)(get_global_id(0), get_global_id(1)); > +float4 convPix = (float4)(0.0f, 0.0f, 0.0f, 0.0f); > + > +for (int conv_j = -half_matrix_dim; conv_j <= half_matrix_dim; > conv_j++) { > +float4 px = read_imagef(src, sampler, loc + (int2)(conv_j, 0)); > +convPix += px * coef_matrix[(conv_j + half_matrix_dim)]; > +} > + > +write_imagef(dst, loc, convPix); > +} > + > +__kernel void gblur_conv_vert(__write_only image2d_t dst, > + __read_only image2d_t src, > + int coef_matrix_dim, > + __constant float *coef_matrix) > +{ > +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | > + CLK_ADDRESS_CLAMP_TO_EDGE | > + CLK_FILTER_NEAREST); > + > +const int half_matrix_dim = (coef_ma
Re: [FFmpeg-devel] [PATCH] lavfi: add gblur_opencl filter
On Tue, 30 Apr 2019 at 11:45 pm, Paul B Mahol wrote: > On 4/30/19, Dylan Fernando wrote: > > > > Anyone have any feedback? > > If I'm not mistaken there is already one available. > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". How do I run the available filter? > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] lavfi: add gblur_opencl filter
On Thu, May 2, 2019 at 11:27 PM Paul B Mahol wrote: > On 5/2/19, Dylan Fernando wrote: > > On Tue, 30 Apr 2019 at 11:45 pm, Paul B Mahol wrote: > > > >> On 4/30/19, Dylan Fernando wrote: > >> > > >> > Anyone have any feedback? > >> > >> If I'm not mistaken there is already one available. > >> ___ > >> ffmpeg-devel mailing list > >> ffmpeg-devel@ffmpeg.org > >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >> > >> To unsubscribe, visit link above, or email > >> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > > > > > > How do I run the available filter? > > Do you ask how to use gblur video filter? > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". Sorry, I mean how do I run the opencl gblur filter, if there is one available ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCHv2] lavfi: add gblur_opencl filter
Anyone have any comments/feedback? On Sat, 27 Apr 2019 at 1:05 am, Dylan Fernando wrote: > --- > configure | 1 + > doc/filters.texi | 28 > libavfilter/Makefile | 2 + > libavfilter/allfilters.c | 1 + > libavfilter/opencl/gblur.cl | 62 +++ > libavfilter/opencl_source.h | 1 + > libavfilter/vf_gblur_opencl.c | 368 > ++ > 7 files changed, 463 insertions(+) > create mode 100644 libavfilter/opencl/gblur.cl > create mode 100644 libavfilter/vf_gblur_opencl.c > > diff --git a/configure b/configure > index bbeaf2fadc..8c1d3cdf92 100755 > --- a/configure > +++ b/configure > @@ -3451,6 +3451,7 @@ freezedetect_filter_select="scene_sad" > frei0r_filter_deps="frei0r libdl" > frei0r_src_filter_deps="frei0r libdl" > fspp_filter_deps="gpl" > +gblur_opencl_filter_deps="opencl" > geq_filter_deps="gpl" > histeq_filter_deps="gpl" > hqdn3d_filter_deps="gpl" > diff --git a/doc/filters.texi b/doc/filters.texi > index 4bf96b6d90..80485fed83 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -19010,6 +19010,34 @@ Apply erosion filter with threshold0 set to 30, > threshold1 set 40, threshold2 se > @end example > @end itemize > > +@section gblur_opencl > + > +Apply Gaussian blur filter. > + > +The filter accepts the following options: > + > +@table @option > +@item sigma > +Set horizontal sigma, standard deviation of Gaussian blur. Default is > @code{0.5}. > + > +@item planes > +Set which planes to filter. By default all planes are filtered. > + > +@item sigmaV > +Set vertical sigma, if negative it will be same as @code{sigma}. > +Default is @code{-1}. > +@end table > + > +@subsection Example > + > +@itemize > +@item > +Apply Gaussian blur filter with horizontal and vertical sigma of 0.5. > +@example > +-i INPUT -vf "hwupload, gblur_opencl=0.5, hwdownload" OUTPUT > +@end example > +@end itemize > + > @section overlay_opencl > > Overlay one video on top of another. > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index fef6ec5c55..230315ef39 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -243,6 +243,8 @@ OBJS-$(CONFIG_FREEZEDETECT_FILTER) += > vf_freezedetect.o > OBJS-$(CONFIG_FREI0R_FILTER) += vf_frei0r.o > OBJS-$(CONFIG_FSPP_FILTER) += vf_fspp.o > OBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o > +OBJS-$(CONFIG_GBLUR_OPENCL_FILTER) += vf_gblur_opencl.o > opencl.o \ > +opencl/gblur.o > OBJS-$(CONFIG_GEQ_FILTER)+= vf_geq.o > OBJS-$(CONFIG_GRADFUN_FILTER)+= vf_gradfun.o > OBJS-$(CONFIG_GRAPHMONITOR_FILTER) += f_graphmonitor.o > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index c51ae0f3c7..cb0fc051cc 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -229,6 +229,7 @@ extern AVFilter ff_vf_freezedetect; > extern AVFilter ff_vf_frei0r; > extern AVFilter ff_vf_fspp; > extern AVFilter ff_vf_gblur; > +extern AVFilter ff_vf_gblur_opencl; > extern AVFilter ff_vf_geq; > extern AVFilter ff_vf_gradfun; > extern AVFilter ff_vf_graphmonitor; > diff --git a/libavfilter/opencl/gblur.cl b/libavfilter/opencl/gblur.cl > new file mode 100644 > index 00..4fece30d4a > --- /dev/null > +++ b/libavfilter/opencl/gblur.cl > @@ -0,0 +1,62 @@ > +/* > + * Copyright (c) 2018 Dylan Fernando > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > + > +__kernel void gblur_conv_horz(__write_only image2d_t dst, > + __read_only image2d_t src, > + int coef_matrix_dim, > +
Re: [FFmpeg-devel] [PATCHv2] lavfi: add gblur_opencl filter
On Sun, May 12, 2019 at 10:47 PM Paul B Mahol wrote: > On 5/8/19, Song, Ruiling wrote: > > > > > >> -Original Message- > >> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf > >> Of Dylan Fernando > >> Sent: Tuesday, May 7, 2019 8:27 AM > >> To: ffmpeg-devel@ffmpeg.org > >> Subject: Re: [FFmpeg-devel] [PATCHv2] lavfi: add gblur_opencl filter > >> > >> Anyone have any comments/feedback? > > I think unsharp_opencl with a negative amount should do similar thing as > > this one. > > Not really. > > > What's the difference? Better quality? or better speed? > > This one can blur image with larger radius. > > But why step parameter was removed? > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". This one doesn't use a Gaussian approximation, it uses a Gaussian kernel calculated with: matrix_horiz[i] = (1 / sqrt(2 * 3.14159*pow(s->sigma, 2)))*exp(-(pow(x, 2) / (2 * pow(s->sigma, 2 with kernel size 6 * sigma. Should there be a parameter for adjusting the kernel size? ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] Segmentation fault when running vulkan
When running vulkan on arch linux, I get a segmentation fault. Command: ffmpeg -v verbose -init_hw_device vulkan=gpu:0.0 -filter_hw_device gpu -i atm.mp4 -filter_complex hwupload,avgblur_vulkan,hwdownload atmvulk.mp4 -loglevel debug Output: ffmpeg version N-102515-g175f675f7b Copyright (c) 2000-2021 the FFmpeg developers built with gcc 10.2.0 (GCC) configuration: --enable-opencl --enable-vulkan --enable-libglslang libavutil 57. 0.100 / 57. 0.100 libavcodec 59. 1.100 / 59. 1.100 libavformat59. 0.101 / 59. 0.101 libavdevice59. 0.100 / 59. 0.100 libavfilter 8. 0.101 / 8. 0.101 libswscale 6. 0.100 / 6. 0.100 libswresample 4. 0.100 / 4. 0.100 Splitting the commandline. Reading option '-v' ... matched as option 'v' (set logging level) with argument 'verbose'. Reading option '-init_hw_device' ... matched as option 'init_hw_device' (initialise hardware device) with argument 'vulkan=gpu:0.0'. Reading option '-filter_hw_device' ... matched as option 'filter_hw_device' (set hardware device used when filtering) with argument 'gpu'. Reading option '-i' ... matched as input url with argument 'atm.mp4'. Reading option '-filter_complex' ... matched as option 'filter_complex' (create a complex filtergraph) with argument 'hwupload,avgblur_vulkan=10,hwdownload'. Reading option 'atmvulk.mp4' ... matched as output url. Reading option '-loglevel' ... matched as option 'loglevel' (set logging level) with argument 'debug'. Finished splitting the commandline. Parsing a group of options: global . Applying option v (set logging level) with argument verbose. [AVHWDeviceContext @ 0x563094799c80] GPU listing: [AVHWDeviceContext @ 0x563094799c80] 0: NVIDIA GeForce RTX 2070 SUPER (discrete) (0x1e84) [AVHWDeviceContext @ 0x563094799c80] Using queue family 0 (total queues: 16) for graphics [AVHWDeviceContext @ 0x563094799c80] QF 0 flags: (graphics) (compute) (transfers) (sparse) [AVHWDeviceContext @ 0x563094799c80] Using queue family 2 (total queues: 8) for compute [AVHWDeviceContext @ 0x563094799c80] QF 2 flags: (compute) (transfers) (sparse) [AVHWDeviceContext @ 0x563094799c80] Using queue family 1 (total queues: 2) for transfers [AVHWDeviceContext @ 0x563094799c80] QF 1 flags: (transfers) (sparse) [AVHWDeviceContext @ 0x563094799c80] Using device extension "VK_KHR_external_memory_fd" [AVHWDeviceContext @ 0x563094799c80] Using device extension "VK_KHR_external_semaphore_fd" [AVHWDeviceContext @ 0x563094799c80] Using device extension "VK_EXT_external_memory_host" [AVHWDeviceContext @ 0x563094799c80] Using device extension "VK_KHR_push_descriptor" [AVHWDeviceContext @ 0x563094799c80] Using device extension "VK_EXT_host_query_reset" [AVHWDeviceContext @ 0x563094799c80] Using device extension VK_KHR_external_memory_fd [AVHWDeviceContext @ 0x563094799c80] Using device extension VK_KHR_external_semaphore_fd [AVHWDeviceContext @ 0x563094799c80] Using device extension VK_EXT_external_memory_host [AVHWDeviceContext @ 0x563094799c80] Using device extension VK_KHR_push_descriptor [AVHWDeviceContext @ 0x563094799c80] Using device extension VK_EXT_host_query_reset [AVHWDeviceContext @ 0x563094799c80] Using device: NVIDIA GeForce RTX 2070 SUPER [AVHWDeviceContext @ 0x563094799c80] Alignments: [AVHWDeviceContext @ 0x563094799c80] optimalBufferCopyRowPitchAlignment: 1 [AVHWDeviceContext @ 0x563094799c80] minMemoryMapAlignment: 64 [AVHWDeviceContext @ 0x563094799c80] minImportedHostPointerAlignment: 4096 Successfully parsed a group of options. Parsing a group of options: input url atm.mp4. Successfully parsed a group of options. Opening an input file: atm.mp4. [NULL @ 0x563094a2a3c0] Opening 'atm.mp4' for reading [file @ 0x563094a2ac00] Setting default whitelist 'file,crypto,data' [mov,mp4,m4a,3gp,3g2,mj2 @ 0x563094a2a3c0] Format mov,mp4,m4a,3gp,3g2,mj2 probed with size=2048 and score=100 [mov,mp4,m4a,3gp,3g2,mj2 @ 0x563094a2a3c0] ISO: File Type Major Brand: mp42 [mov,mp4,m4a,3gp,3g2,mj2 @ 0x563094a2a3c0] Unknown dref type 0x206c7275 size 12 Last message repeated 1 times [mov,mp4,m4a,3gp,3g2,mj2 @ 0x563094a2a3c0] Before avformat_find_stream_info() pos: 23751465 bytes read:74604 seeks:1 nb_streams:2 [h264 @ 0x563094a33c00] nal_unit_type: 7(SPS), nal_ref_idc: 3 [h264 @ 0x563094a33c00] nal_unit_type: 8(PPS), nal_ref_idc: 3 [h264 @ 0x563094a33c00] nal_unit_type: 9(AUD), nal_ref_idc: 0 [h264 @ 0x563094a33c00] nal_unit_type: 6(SEI), nal_ref_idc: 0 Last message repeated 1 times [h264 @ 0x563094a33c00] nal_unit_type: 5(IDR), nal_ref_idc: 3 [h264 @ 0x563094a33c00] Format yuv420p chosen by get_format(). [h264 @ 0x563094a33c00] Reinit context to 1024x576, pix_fmt: yuv420p [h264 @ 0x563094a33c00] nal_unit_type: 9(AUD), nal_ref_idc: 0 [h264 @ 0x563094a33c00] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 3 [h264 @ 0x563094a33c00] nal_unit_type: 9(AUD), nal_ref_idc: 0 [h264 @ 0x563094a33c00] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc
Re: [FFmpeg-devel] Unable to compile with cuda
I got it to work, with --enable-cuda as well, using: PKG_CONFIG_PATH="/home/dylan/Files/nv-codec-headers" ./configure --enable-opencl --enable-vulkan --enable-libglslang --disable-stripping --enable-nonfree --enable-cuda --enable-cuda-nvcc --extra-cflags=-I/opt/local/cuda/include --nvccflags="-gencode arch=compute_52,code=sm_52 -O2" Thanks, Dylan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Unable to compile with cuda
On Sun, May 23, 2021 at 3:03 PM Dennis Mungai wrote: > On Sun, 23 May 2021 at 15:08, Dylan Fernando wrote: > > > I got it to work, with --enable-cuda as well, using: > > > > PKG_CONFIG_PATH="/home/dylan/Files/nv-codec-headers" ./configure > > --enable-opencl --enable-vulkan --enable-libglslang --disable-stripping > > --enable-nonfree --enable-cuda --enable-cuda-nvcc > > --extra-cflags=-I/opt/local/cuda/include --nvccflags="-gencode > > arch=compute_52,code=sm_52 -O2" > > > > Thanks, > > Dylan > > > > > > Dylan, > > That has to be a very old build of FFmpeg. The option --enable-cuda has > been deprecated for months. Try building from a current release or git > master. > For the LLVM route, you can also pass nvccflags in this fashion: > --enable-cuda-llvm --nvccflags="--cuda-gpu-arch=sm_52 -O2". > These flags will differ based on your current GPU and the CUDA version in > use. See > > https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-steering-gpu-code-generation > FFmpeg's ./cconfigure script picks a very old set of flags that are > deprecated in newer CUDA versions, hence the need to override these flags > with CUDA builds. See: > https://github.com/FFmpeg/FFmpeg/blob/master/configure#L4382 > The difference between the build with and without the proprietary CUDA SDK > will be the inclusion of the scale_npp filters. > With the proprietary SDK, both the scale_npp and scale_cuda filters will be > available. > With the cuda llvm route, only scale_cuda will be available. > For more on their usage, see https://superuser.com/a/1650962/473795 > And for more on compiling cuda with llvm, see the llvm wiki entry here: > https://llvm.org/docs/CompileCudaWithLLVM.html > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > Hi Dennis, Timo, I omitted --enable-cuda and it works correctly. I also switched to using --enable-cuda-llvm. Thanks, Dylan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] Unable to use sqrt() and exp() in CUDA
I can't seem to be able to use exp() and sqrt() in CUDA. I get: NVCClibavfilter/try_cuda.ptx clang-11: warning: Unknown CUDA version. cuda.h: CUDA_VERSION=11030. Assuming the latest supported version 10.1 [-Wunknown-cuda-version] libavfilter/try_cuda.cu:39:19: error: use of undeclared identifier 'exp' float g = exp(f); Regards, Dylan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Unable to use sqrt() and exp() in CUDA
Yeah, I was using enable-cuda-llvm. I haven't been including cuda.h in my cuda file. Could it be from the include in hwcontext_cuda.h possibly? I was using: PKG_CONFIG_PATH="/home/dylan/Files/nv-codec-headers" ./configure --enable-opencl --enable-vulkan --enable-libglslang --disable-stripping --enable-nonfree --enable-cuda-llvm --extra-cflags=-I/opt/local/cuda/include --nvccflags="--cuda-gpu-arch=sm_52 -O2" When I use nvcc, I don't seem to get the cuda.h error. Thanks, Dylan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Unable to use sqrt() and exp() in CUDA
I've only been including vector_helpers.cuh in the .cu file. I think cuda.h is being included because I include libavutil/hwcontext_cuda_internal.h in the c file. I was looking at vf_thumbnail_cuda.c for reference and I copied the line #include "libavutil/hwcontext_cuda_internal.h". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v1] lavfi: add nlmeans CUDA filter
From 2ddd2f5e0d9559bbbf46de3d7cfb7ffbbdbefceb Mon Sep 17 00:00:00 2001 From: Dylan Fernando Date: Tue, 27 Jul 2021 19:25:59 + Subject: [PATCH] lavfi: add nlmeans_cuda filter --- compat/cuda/cuda_runtime.h | 1 + configure | 2 + doc/filters.texi | 4 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/vf_nlmeans_cuda.c | 814 + libavfilter/vf_nlmeans_cuda.cu | 361 +++ 7 files changed, 1185 insertions(+) create mode 100644 libavfilter/vf_nlmeans_cuda.c create mode 100644 libavfilter/vf_nlmeans_cuda.cu diff --git a/compat/cuda/cuda_runtime.h b/compat/cuda/cuda_runtime.h index c5450b2542..c1e2143dde 100644 --- a/compat/cuda/cuda_runtime.h +++ b/compat/cuda/cuda_runtime.h @@ -184,5 +184,6 @@ static inline __device__ double fabs(double a) { return __builtin_fabs(a); } static inline __device__ float __sinf(float a) { return __nvvm_sin_approx_f(a); } static inline __device__ float __cosf(float a) { return __nvvm_cos_approx_f(a); } +static inline __device__ float exp(float a) { return __nvvm_ex2_approx_f(a); } #endif /* COMPAT_CUDA_CUDA_RUNTIME_H */ diff --git a/configure b/configure index 646d16e3c9..96a6fcde7d 100755 --- a/configure +++ b/configure @@ -3094,6 +3094,8 @@ thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm" transpose_npp_filter_deps="ffnvcodec libnpp" overlay_cuda_filter_deps="ffnvcodec" overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +nlmeans_cuda_filter_deps="ffnvcodec" +nlmeans_cuda_filter_deps_any="cuda_nvcc cuda_llvm" amf_deps_any="libdl LoadLibrary" nvenc_deps="ffnvcodec" diff --git a/doc/filters.texi b/doc/filters.texi index 66c0f87e47..a0b68fc49f 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15228,6 +15228,10 @@ Same as @option{r} but for chroma planes. The default value is @var{0} and means automatic. @end table +@section nlmeans_cuda + +Non-local Means denoise filter through CUDA, this filter accepts same options as @ref{nlmeans}. + @section nnedi Deinterlace video using neural network edge directed interpolation. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 49c0c8342b..565923d85a 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -341,6 +341,8 @@ OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o +OBJS-$(CONFIG_NLMEANS_CUDA_FILTER) += vf_nlmeans_cuda.o vf_nlmeans_cuda.ptx.o \ +cuda/load_helper.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index ae74f9c891..5fcdfecfbc 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -327,6 +327,7 @@ extern const AVFilter ff_vf_msad; extern const AVFilter ff_vf_negate; extern const AVFilter ff_vf_nlmeans; extern const AVFilter ff_vf_nlmeans_opencl; +extern const AVFilter ff_vf_nlmeans_cuda; extern const AVFilter ff_vf_nnedi; extern const AVFilter ff_vf_noformat; extern const AVFilter ff_vf_noise; diff --git a/libavfilter/vf_nlmeans_cuda.c b/libavfilter/vf_nlmeans_cuda.c new file mode 100644 index 00..fd7e649556 --- /dev/null +++ b/libavfilter/vf_nlmeans_cuda.c @@ -0,0 +1,814 @@ + +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" + +#include "avfilter.h" +#include "internal.h" + +#include "cuda/load_helper.h" + +static const enum AVPixelFormat supported_formats[] = { +AV_PIX_FMT_YUV420P, +AV_PIX_FMT_NV12, +AV_PIX_FMT_YUV444P, +AV_PIX_FMT_P010, +AV_PIX_FMT_P016, +AV_PIX_FMT_YUV444P16, +AV_PIX_FMT_0RGB32, +AV_PIX_FMT_0BGR32, +}; + + +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) + +#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) +#define BLOCKX 32 +#define BLOCKY 16 + + + +typedef struct NLMeansCudaContext { +const AVClass *class; + +doublesigma; +int patch_size; +int patch_size_uv; +int research_size; +int research_size_uv; +int initialised; + +float h; + +AVBufferRef *hw_frames_ctx; +AVCUDADeviceContext *hwctx; + +CUmodulecu_module; + +CUfunction cu_func_horiz_uchar; +CUfunction cu_func_horiz_uchar2;
Re: [FFmpeg-devel] GSoC
On Thu, Mar 8, 2018 at 8:57 AM, Mark Thompson wrote: > On 07/03/18 03:56, Dylan Fernando wrote: > > Thanks, it works now > > > > Would trying to implement an OpenCL version of vf_fade be a good idea > for a > > qualification task, or would it be a better idea to try a different > filter? > > That sounds like a sensible choice to me, though if you haven't written a > filter before you might find it helpful to write something simpler first to > understand how it fits together (for example: vflip, which has trivial > processing parts but still needs the surrounding boilerplate). > > - Mark > > (PS: be aware that top-posting is generally frowned upon on this mailing > list.) > > > > On Wed, Mar 7, 2018 at 1:20 AM, Mark Thompson wrote: > > > >> On 06/03/18 12:37, Dylan Fernando wrote: > >>> Hi, > >>> > >>> I am Dylan Fernando. I am a Computer Science student from Australia. I > am > >>> new to FFmpeg and I wish to apply for GSoC this year. > >>> I would like to do the Video filtering with OpenCL project and I have a > >> few > >>> questions. Would trying to implement an opencl version of vf_fade be a > >> good > >>> idea for the qualification task, or would I be better off using a > >> different > >>> filter? > >>> > >>> Also, I’m having a bit of trouble with running unsharp_opencl. I tried > >>> running: > >>> ffmpeg -hide_banner -nostats -v verbose -init_hw_device opencl=ocl:0.1 > >>> -filter_hw_device ocl -i space.mpg -filter_complex unsharp_opencl > >> output.mp4 > >>> > >>> but I got the error: > >>> [AVHWDeviceContext @ 0x7fdac050c700] 0.1: Apple / Intel(R) Iris(TM) > >>> Graphics 6100 > >>> [mpeg @ 0x7fdac3132600] max_analyze_duration 500 reached at 5005000 > >>> microseconds st:0 > >>> Input #0, mpeg, from 'space.mpg': > >>> Duration: 00:00:21.99, start: 0.387500, bitrate: 6108 kb/s > >>> Stream #0:0[0x1e0]: Video: mpeg2video (Main), 1 reference frame, > >>> yuv420p(tv, bt470bg, bottom first, left), 720x480 [SAR 8:9 DAR 4:3], > 6000 > >>> kb/s, 29.97 fps, 29.97 tbr, 90k tbn, 59.94 tbc > >>> Stream mapping: > >>> Stream #0:0 (mpeg2video) -> unsharp_opencl > >>> unsharp_opencl -> Stream #0:0 (mpeg4) > >>> Press [q] to stop, [?] for help > >>> [graph 0 input from stream 0:0 @ 0x7fdac0418800] w:720 h:480 > >> pixfmt:yuv420p > >>> tb:1/9 fr:3/1001 sar:8/9 sws_param:flags=2 > >>> [auto_scaler_0 @ 0x7fdac05232c0] w:iw h:ih flags:'bilinear' interl:0 > >>> [Parsed_unsharp_opencl_0 @ 0x7fdac0715a80] auto-inserting filter > >>> 'auto_scaler_0' between the filter 'graph 0 input from stream 0:0' and > >> the > >>> filter 'Parsed_unsharp_opencl_0' > >>> Impossible to convert between the formats supported by the filter > 'graph > >> 0 > >>> input from stream 0:0' and the filter 'auto_scaler_0' > >>> Error reinitializing filters! > >>> Failed to inject frame into filter network: Function not implemented > >>> Error while processing the decoded data for stream #0:0 > >>> Conversion failed! > >>> > >>> How do I correctly run unsharp_opencl? Should I be running it on a > >>> different video file? > >> > >> It's intended to be used in filter graphs where much of the activity is > >> already happening on the GPU, so the input and output are in the > >> AV_PIX_FMT_OPENCL format which contains GPU-side OpenCL images. > >> > >> If you want to use it standalone then you need hwupload and hwdownload > >> filters to move the frames between the CPU and GPU. For your example, > it > >> should work with: > >> > >> ffmpeg -init_hw_device opencl=ocl:0.1 -filter_hw_device ocl -i space.mpg > >> -filter_complex hwupload,unsharp_opencl,hwdownload output.mp4 > >> > >> (There are constraints on what formats can be used and therefore > suitable > >> files (or required format conversions), but I believe a normal yuv420p > >> video like this should work in all cases.) > >> > >> - Mark > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > Thanks. How is AV_PIX_FMT_OPENCL formatted? When using read_imagef(), does xyzw correspond to RGBA respectively, or to YUV? Would I have to account for different formats? If so, how do I check the format of the input? Regards, Dylan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] GSoC
On Thu, Mar 15, 2018 at 12:08 PM, Dylan Fernando wrote: > > > On Sun, Mar 11, 2018 at 10:18 PM, Mark Thompson wrote: > >> On 11/03/18 04:36, Dylan Fernando wrote: >> > On Thu, Mar 8, 2018 at 8:57 AM, Mark Thompson wrote: >> > >> >> On 07/03/18 03:56, Dylan Fernando wrote: >> >>> Thanks, it works now >> >>> >> >>> Would trying to implement an OpenCL version of vf_fade be a good idea >> >> for a >> >>> qualification task, or would it be a better idea to try a different >> >> filter? >> >> >> >> That sounds like a sensible choice to me, though if you haven't >> written a >> >> filter before you might find it helpful to write something simpler >> first to >> >> understand how it fits together (for example: vflip, which has trivial >> >> processing parts but still needs the surrounding boilerplate). >> >> >> >> - Mark >> >> >> >> (PS: be aware that top-posting is generally frowned upon on this >> mailing >> >> list.) >> >> >> >> >> >>> On Wed, Mar 7, 2018 at 1:20 AM, Mark Thompson wrote: >> >>> >> >>>> On 06/03/18 12:37, Dylan Fernando wrote: >> >>>>> Hi, >> >>>>> >> >>>>> I am Dylan Fernando. I am a Computer Science student from >> Australia. I >> >> am >> >>>>> new to FFmpeg and I wish to apply for GSoC this year. >> >>>>> I would like to do the Video filtering with OpenCL project and I >> have a >> >>>> few >> >>>>> questions. Would trying to implement an opencl version of vf_fade >> be a >> >>>> good >> >>>>> idea for the qualification task, or would I be better off using a >> >>>> different >> >>>>> filter? >> >>>>> >> >>>>> Also, I’m having a bit of trouble with running unsharp_opencl. I >> tried >> >>>>> running: >> >>>>> ffmpeg -hide_banner -nostats -v verbose -init_hw_device >> opencl=ocl:0.1 >> >>>>> -filter_hw_device ocl -i space.mpg -filter_complex unsharp_opencl >> >>>> output.mp4 >> >>>>> >> >>>>> but I got the error: >> >>>>> [AVHWDeviceContext @ 0x7fdac050c700] 0.1: Apple / Intel(R) Iris(TM) >> >>>>> Graphics 6100 >> >>>>> [mpeg @ 0x7fdac3132600] max_analyze_duration 500 reached at >> 5005000 >> >>>>> microseconds st:0 >> >>>>> Input #0, mpeg, from 'space.mpg': >> >>>>> Duration: 00:00:21.99, start: 0.387500, bitrate: 6108 kb/s >> >>>>> Stream #0:0[0x1e0]: Video: mpeg2video (Main), 1 reference frame, >> >>>>> yuv420p(tv, bt470bg, bottom first, left), 720x480 [SAR 8:9 DAR 4:3], >> >> 6000 >> >>>>> kb/s, 29.97 fps, 29.97 tbr, 90k tbn, 59.94 tbc >> >>>>> Stream mapping: >> >>>>> Stream #0:0 (mpeg2video) -> unsharp_opencl >> >>>>> unsharp_opencl -> Stream #0:0 (mpeg4) >> >>>>> Press [q] to stop, [?] for help >> >>>>> [graph 0 input from stream 0:0 @ 0x7fdac0418800] w:720 h:480 >> >>>> pixfmt:yuv420p >> >>>>> tb:1/9 fr:3/1001 sar:8/9 sws_param:flags=2 >> >>>>> [auto_scaler_0 @ 0x7fdac05232c0] w:iw h:ih flags:'bilinear' interl:0 >> >>>>> [Parsed_unsharp_opencl_0 @ 0x7fdac0715a80] auto-inserting filter >> >>>>> 'auto_scaler_0' between the filter 'graph 0 input from stream 0:0' >> and >> >>>> the >> >>>>> filter 'Parsed_unsharp_opencl_0' >> >>>>> Impossible to convert between the formats supported by the filter >> >> 'graph >> >>>> 0 >> >>>>> input from stream 0:0' and the filter 'auto_scaler_0' >> >>>>> Error reinitializing filters! >> >>>>> Failed to inject frame into filter network: Function not implemented >> >>>>> Error while processing the decoded data for stream #0:0 >> >>>>> Conversion failed! >> >>>>> >> >>>>> How do I correctly run unsharp_opencl? Should I be running it on a >>
Re: [FFmpeg-devel] GSoC
On Sun, Mar 11, 2018 at 10:18 PM, Mark Thompson wrote: > On 11/03/18 04:36, Dylan Fernando wrote: > > On Thu, Mar 8, 2018 at 8:57 AM, Mark Thompson wrote: > > > >> On 07/03/18 03:56, Dylan Fernando wrote: > >>> Thanks, it works now > >>> > >>> Would trying to implement an OpenCL version of vf_fade be a good idea > >> for a > >>> qualification task, or would it be a better idea to try a different > >> filter? > >> > >> That sounds like a sensible choice to me, though if you haven't written > a > >> filter before you might find it helpful to write something simpler > first to > >> understand how it fits together (for example: vflip, which has trivial > >> processing parts but still needs the surrounding boilerplate). > >> > >> - Mark > >> > >> (PS: be aware that top-posting is generally frowned upon on this mailing > >> list.) > >> > >> > >>> On Wed, Mar 7, 2018 at 1:20 AM, Mark Thompson wrote: > >>> > >>>> On 06/03/18 12:37, Dylan Fernando wrote: > >>>>> Hi, > >>>>> > >>>>> I am Dylan Fernando. I am a Computer Science student from Australia. > I > >> am > >>>>> new to FFmpeg and I wish to apply for GSoC this year. > >>>>> I would like to do the Video filtering with OpenCL project and I > have a > >>>> few > >>>>> questions. Would trying to implement an opencl version of vf_fade be > a > >>>> good > >>>>> idea for the qualification task, or would I be better off using a > >>>> different > >>>>> filter? > >>>>> > >>>>> Also, I’m having a bit of trouble with running unsharp_opencl. I > tried > >>>>> running: > >>>>> ffmpeg -hide_banner -nostats -v verbose -init_hw_device > opencl=ocl:0.1 > >>>>> -filter_hw_device ocl -i space.mpg -filter_complex unsharp_opencl > >>>> output.mp4 > >>>>> > >>>>> but I got the error: > >>>>> [AVHWDeviceContext @ 0x7fdac050c700] 0.1: Apple / Intel(R) Iris(TM) > >>>>> Graphics 6100 > >>>>> [mpeg @ 0x7fdac3132600] max_analyze_duration 500 reached at > 5005000 > >>>>> microseconds st:0 > >>>>> Input #0, mpeg, from 'space.mpg': > >>>>> Duration: 00:00:21.99, start: 0.387500, bitrate: 6108 kb/s > >>>>> Stream #0:0[0x1e0]: Video: mpeg2video (Main), 1 reference frame, > >>>>> yuv420p(tv, bt470bg, bottom first, left), 720x480 [SAR 8:9 DAR 4:3], > >> 6000 > >>>>> kb/s, 29.97 fps, 29.97 tbr, 90k tbn, 59.94 tbc > >>>>> Stream mapping: > >>>>> Stream #0:0 (mpeg2video) -> unsharp_opencl > >>>>> unsharp_opencl -> Stream #0:0 (mpeg4) > >>>>> Press [q] to stop, [?] for help > >>>>> [graph 0 input from stream 0:0 @ 0x7fdac0418800] w:720 h:480 > >>>> pixfmt:yuv420p > >>>>> tb:1/9 fr:3/1001 sar:8/9 sws_param:flags=2 > >>>>> [auto_scaler_0 @ 0x7fdac05232c0] w:iw h:ih flags:'bilinear' interl:0 > >>>>> [Parsed_unsharp_opencl_0 @ 0x7fdac0715a80] auto-inserting filter > >>>>> 'auto_scaler_0' between the filter 'graph 0 input from stream 0:0' > and > >>>> the > >>>>> filter 'Parsed_unsharp_opencl_0' > >>>>> Impossible to convert between the formats supported by the filter > >> 'graph > >>>> 0 > >>>>> input from stream 0:0' and the filter 'auto_scaler_0' > >>>>> Error reinitializing filters! > >>>>> Failed to inject frame into filter network: Function not implemented > >>>>> Error while processing the decoded data for stream #0:0 > >>>>> Conversion failed! > >>>>> > >>>>> How do I correctly run unsharp_opencl? Should I be running it on a > >>>>> different video file? > >>>> > >>>> It's intended to be used in filter graphs where much of the activity > is > >>>> already happening on the GPU, so the input and output are in the > >>>> AV_PIX_FMT_OPENCL format which contains GPU-side OpenCL images. > >>>> > >>>> If you want to use it standalone then you need
[FFmpeg-devel] [PATCH] libavfilter/vf_avgblur_opencl.c: add openCL version of libavfilter/vf_avgblur.c filter
[master 319e56f87c] lavfi: Add OpenCL avgblur filter 6 files changed, 381 insertions(+) create mode 100644 libavfilter/opencl/avgblur.cl create mode 100644 libavfilter/vf_avgblur_opencl.c diff --git a/configure b/configure index fe81ba31b5..203737615c 100755 --- a/configure +++ b/configure @@ -3205,6 +3205,7 @@ aresample_filter_deps="swresample" ass_filter_deps="libass" atempo_filter_deps="avcodec" atempo_filter_select="rdft" +avgblur_opencl_filter_deps="opencl" azmq_filter_deps="libzmq" blackframe_filter_deps="gpl" boxblur_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 6a6083618d..6bf32ad260 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -138,6 +138,8 @@ OBJS-$(CONFIG_ALPHAMERGE_FILTER) += vf_alphamerge.o OBJS-$(CONFIG_ASS_FILTER)+= vf_subtitles.o OBJS-$(CONFIG_ATADENOISE_FILTER) += vf_atadenoise.o OBJS-$(CONFIG_AVGBLUR_FILTER)+= vf_avgblur.o +OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ +opencl/avgblur.o OBJS-$(CONFIG_BBOX_FILTER) += bbox.o vf_bbox.o OBJS-$(CONFIG_BENCH_FILTER) += f_bench.o OBJS-$(CONFIG_BITPLANENOISE_FILTER) += vf_bitplanenoise.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 9adb1090b7..cb04d1b113 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -148,6 +148,7 @@ static void register_all(void) REGISTER_FILTER(ASS,ass,vf); REGISTER_FILTER(ATADENOISE, atadenoise, vf); REGISTER_FILTER(AVGBLUR,avgblur,vf); +REGISTER_FILTER(AVGBLUR_OPENCL, avgblur_opencl, vf); REGISTER_FILTER(BBOX, bbox, vf); REGISTER_FILTER(BENCH, bench, vf); REGISTER_FILTER(BITPLANENOISE, bitplanenoise, vf); diff --git a/libavfilter/opencl/avgblur.cl b/libavfilter/opencl/avgblur.cl new file mode 100644 index 00..fff655529b --- /dev/null +++ b/libavfilter/opencl/avgblur.cl @@ -0,0 +1,60 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +__kernel void avgblur_horiz(__write_only image2d_t dst, +__read_only image2d_t src, +int rad) +{ +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_FILTER_NEAREST); +int2 loc = (int2)(get_global_id(0), get_global_id(1)); +int2 size = (int2)(get_global_size(0), get_global_size(1)); + +int count = 0; +float4 acc = (float4)(0,0,0,0); + +for (int xx = max(0,loc.x-rad); xx < min(loc.x+rad+1,size.x); xx++) +{ +count++; +acc += read_imagef(src, sampler, (int2)(xx, loc.y)); +} + +write_imagef(dst, loc, acc / count); +} + +__kernel void avgblur_vert(__write_only image2d_t dst, + __read_only image2d_t src, + int radv) +{ +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_FILTER_NEAREST); +int2 loc = (int2)(get_global_id(0), get_global_id(1)); +int2 size = (int2)(get_global_size(0), get_global_size(1)); + +int count = 0; +float4 acc = (float4)(0,0,0,0); + +for (int yy = max(0,loc.y-radv); yy < min(loc.y+radv+1,size.y); yy++) +{ +count++; +acc += read_imagef(src, sampler, (int2)(loc.x, yy)); +} + +write_imagef(dst, loc, acc / count); +} diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h index 23cdfc6ac9..02bc1723b0 100644 --- a/libavfilter/opencl_source.h +++ b/libavfilter/opencl_source.h @@ -19,6 +19,7 @@ #ifndef AVFILTER_OPENCL_SOURCE_H #define AVFILTER_OPENCL_SOURCE_H +extern const char *ff_opencl_source_avgblur; extern const char *ff_opencl_source_overlay; extern const char *ff_opencl_source_unsharp; diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_ opencl.c new file mode 100644 index 00..6e5ae4f32e --- /dev/null +++ b/libavfilter/vf_avgblur_opencl.c @@ -0,0 +1,316 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU
Re: [FFmpeg-devel] [PATCH] lavfi: Add OpenCL avgblur filter
On Mon, Mar 19, 2018 at 6:16 AM, Mark Thompson wrote: > On 18/03/18 12:48, dylanf...@gmail.com wrote: > > From: drfer3 > > > > Behaves like the existing avgblur filter, except working on OpenCL > > hardware frames. Takes exactly the same options. > > --- > > configure | 1 + > > libavfilter/Makefile| 2 + > > libavfilter/allfilters.c| 1 + > > libavfilter/opencl/avgblur.cl | 60 > > libavfilter/opencl_source.h | 1 + > > libavfilter/vf_avgblur_opencl.c | 318 ++ > ++ > > 6 files changed, 383 insertions(+) > > create mode 100644 libavfilter/opencl/avgblur.cl > > create mode 100644 libavfilter/vf_avgblur_opencl.c > > > > diff --git a/configure b/configure > > index 0c5ed07a07..481d338caf 100755 > > --- a/configure > > +++ b/configure > > @@ -3202,6 +3202,7 @@ aresample_filter_deps="swresample" > > ass_filter_deps="libass" > > atempo_filter_deps="avcodec" > > atempo_filter_select="rdft" > > +avgblur_opencl_filter_deps="opencl" > > azmq_filter_deps="libzmq" > > blackframe_filter_deps="gpl" > > boxblur_filter_deps="gpl" > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > > index fc16512e2c..1043b41d80 100644 > > --- a/libavfilter/Makefile > > +++ b/libavfilter/Makefile > > @@ -139,6 +139,8 @@ OBJS-$(CONFIG_ALPHAMERGE_FILTER) += > vf_alphamerge.o > > OBJS-$(CONFIG_ASS_FILTER)+= vf_subtitles.o > > OBJS-$(CONFIG_ATADENOISE_FILTER) += vf_atadenoise.o > > OBJS-$(CONFIG_AVGBLUR_FILTER)+= vf_avgblur.o > > +OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o > opencl.o \ > > +opencl/avgblur.o > > OBJS-$(CONFIG_BBOX_FILTER) += bbox.o vf_bbox.o > > OBJS-$(CONFIG_BENCH_FILTER) += f_bench.o > > OBJS-$(CONFIG_BITPLANENOISE_FILTER) += vf_bitplanenoise.o > > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > > index cc423af738..3f67e321bf 100644 > > --- a/libavfilter/allfilters.c > > +++ b/libavfilter/allfilters.c > > @@ -149,6 +149,7 @@ static void register_all(void) > > REGISTER_FILTER(ASS,ass,vf); > > REGISTER_FILTER(ATADENOISE, atadenoise, vf); > > REGISTER_FILTER(AVGBLUR,avgblur,vf); > > +REGISTER_FILTER(AVGBLUR_OPENCL, avgblur_opencl, vf); > > REGISTER_FILTER(BBOX, bbox, vf); > > REGISTER_FILTER(BENCH, bench, vf); > > REGISTER_FILTER(BITPLANENOISE, bitplanenoise, vf); > > diff --git a/libavfilter/opencl/avgblur.cl b/libavfilter/opencl/avgblur. > cl > > new file mode 100644 > > index 00..28e0c90d15 > > --- /dev/null > > +++ b/libavfilter/opencl/avgblur.cl > > @@ -0,0 +1,60 @@ > > +/* > > + * Copyright (c) 2018 Dylan Fernando > > + * > > + * This file is part of FFmpeg. > > + * > > + * FFmpeg is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * FFmpeg is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with FFmpeg; if not, write to the Free Software > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > > + */ > > + > > + > > +__kernel void avgblur_horiz(__write_only image2d_t dst, > > +__read_only image2d_t src, > > +int rad) > > +{ > > +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | > > + CLK_FILTER_NEAREST); > > +int2 loc = (int2)(get_global_id(0), get_global_id(1)); > > +int2 size = (int2)(get_global_size(0), get_global_size(1)); > > + > > +int count = 0; > > +float4 acc = (float4)(0,0,0,0); > > + > > +for (int xx = max(0,loc.x-rad); xx < min(loc.x+r
Re: [FFmpeg-devel] [PATCH] lavfi: Add OpenCL avgblur filter
On Tue, Mar 20, 2018 at 10:34 AM, Mark Thompson wrote: > On 19/03/18 02:30, dylanf...@gmail.com wrote: > > From: drfer3 > > > > Behaves like the existing avgblur filter, except working on OpenCL > > hardware frames. Takes exactly the same options. > > --- > > configure | 1 + > > libavfilter/Makefile| 2 + > > libavfilter/allfilters.c| 1 + > > libavfilter/opencl/avgblur.cl | 60 > > libavfilter/opencl_source.h | 1 + > > libavfilter/vf_avgblur_opencl.c | 328 ++ > ++ > > 6 files changed, 393 insertions(+) > > create mode 100644 libavfilter/opencl/avgblur.cl > > create mode 100644 libavfilter/vf_avgblur_opencl.c > > > > diff --git a/configure b/configure > > index 0c5ed07a07..481d338caf 100755 > > --- a/configure > > +++ b/configure > > @@ -3202,6 +3202,7 @@ aresample_filter_deps="swresample" > > ass_filter_deps="libass" > > atempo_filter_deps="avcodec" > > atempo_filter_select="rdft" > > +avgblur_opencl_filter_deps="opencl" > > azmq_filter_deps="libzmq" > > blackframe_filter_deps="gpl" > > boxblur_filter_deps="gpl" > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > > index fc16512e2c..1043b41d80 100644 > > --- a/libavfilter/Makefile > > +++ b/libavfilter/Makefile > > @@ -139,6 +139,8 @@ OBJS-$(CONFIG_ALPHAMERGE_FILTER) += > vf_alphamerge.o > > OBJS-$(CONFIG_ASS_FILTER)+= vf_subtitles.o > > OBJS-$(CONFIG_ATADENOISE_FILTER) += vf_atadenoise.o > > OBJS-$(CONFIG_AVGBLUR_FILTER)+= vf_avgblur.o > > +OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o > opencl.o \ > > +opencl/avgblur.o > > OBJS-$(CONFIG_BBOX_FILTER) += bbox.o vf_bbox.o > > OBJS-$(CONFIG_BENCH_FILTER) += f_bench.o > > OBJS-$(CONFIG_BITPLANENOISE_FILTER) += vf_bitplanenoise.o > > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > > index cc423af738..3f67e321bf 100644 > > --- a/libavfilter/allfilters.c > > +++ b/libavfilter/allfilters.c > > @@ -149,6 +149,7 @@ static void register_all(void) > > REGISTER_FILTER(ASS,ass,vf); > > REGISTER_FILTER(ATADENOISE, atadenoise, vf); > > REGISTER_FILTER(AVGBLUR,avgblur,vf); > > +REGISTER_FILTER(AVGBLUR_OPENCL, avgblur_opencl, vf); > > REGISTER_FILTER(BBOX, bbox, vf); > > REGISTER_FILTER(BENCH, bench, vf); > > REGISTER_FILTER(BITPLANENOISE, bitplanenoise, vf); > > diff --git a/libavfilter/opencl/avgblur.cl b/libavfilter/opencl/avgblur. > cl > > new file mode 100644 > > index 00..6a8d70df93 > > --- /dev/null > > +++ b/libavfilter/opencl/avgblur.cl > > @@ -0,0 +1,60 @@ > > +/* > > + * Copyright (c) 2018 Dylan Fernando > > + * > > + * This file is part of FFmpeg. > > + * > > + * FFmpeg is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * FFmpeg is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with FFmpeg; if not, write to the Free Software > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > > + */ > > + > > + > > +__kernel void avgblur_horiz(__write_only image2d_t dst, > > +__read_only image2d_t src, > > +int rad) > > +{ > > +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | > > + CLK_FILTER_NEAREST); > > +int2 loc = (int2)(get_global_id(0), get_global_id(1)); > > +int2 size = (int2)(get_global_size(0), get_global_size(1)); > > + > > +int count = 0; > > +float4 acc = (float4)(0,0,0,0); > > + > > +for (int xx = max(0, loc.x - rad); xx < min(loc.x + rad + 1, >
Re: [FFmpeg-devel] [PATCH] vf_avgblur_opencl: Don't run kernel on pixels outside the image
On Sun, Mar 25, 2018 at 1:06 AM, Mark Thompson wrote: > The output frame size is larger than the image containing a subsampled > plane - use the actual size of the image being written rather than the > dimensions of the intended output frame. > --- > libavfilter/vf_avgblur_opencl.c | 12 > 1 file changed, 8 insertions(+), 4 deletions(-) > > diff --git a/libavfilter/vf_avgblur_opencl.c > b/libavfilter/vf_avgblur_opencl.c > index 5ee66c0ba2..3a5b4a28ca 100644 > --- a/libavfilter/vf_avgblur_opencl.c > +++ b/libavfilter/vf_avgblur_opencl.c > @@ -170,8 +170,10 @@ static int avgblur_opencl_filter_frame(AVFilterLink > *inlink, AVFrame *input) > goto fail; > } > > -global_work[0] = output->width; > -global_work[1] = output->height; > +err = ff_opencl_filter_work_size_from_image(avctx, global_work, > +intermediate, p, 0); > +if (err < 0) > +goto fail; > > av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d " > "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", > @@ -206,8 +208,10 @@ static int avgblur_opencl_filter_frame(AVFilterLink > *inlink, AVFrame *input) > goto fail; > } > > -global_work[0] = output->width; > -global_work[1] = output->height; > +err = ff_opencl_filter_work_size_from_image(avctx, global_work, > +output, p, 0); > +if (err < 0) > +goto fail; > > av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d " > "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", > -- > 2.16.1 > Thanks. I tried the patch, it works correctly. Following is a patch attempting to fix the err issue. It returns -1 if any clSetKernelArg() fails. Is this good, or should I be using a different return value for this error? - Dylan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH] lavfi: Add OpenCL avgblur filter
On Fri, Mar 23, 2018 at 9:10 PM, Carl Eugen Hoyos wrote: > 2018-03-21 14:09 GMT+01:00, Dylan Fernando : > > > What information should I put in my GSoC application? How should I > > structure it? Should I give a rough timeline detailing exactly which > color > > conversion and scaling algorithms I’ll be implementing? If so, which > files > > should I look at to see the current colour conversion code? > > Two blogposts that are meant to help you: > https://medium.com/@owtf/google-summer-of-code-writing- > a-good-proposal-141b1376f076 > http://mirca.fun/gsoc-application/ > > But please remember that in this project, the qualification task > is more important than the form of the application. > You of course absolutely have to finish an application, without > it you cannot be chosen as student. > > Carl Eugen > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > Thanks, I have the project timeline part of my application below. Feel free to comment any suggestions: Proposal Video filtering with OpenCL Currently, FFmpeg has good support for offloading decoding and encoding from the CPU, but not such good support for doing anything else with that video without using the CPU. There is interop support for mapping between APIs so that generic methods like OpenCL which can be run on any GPU can be used together with specific video APIs for decoding and encoding, but currently there are only a few operations which can actually use this so many use-cases require expensive additional steps to download frames from the GPU and then upload them again later after performing some processing on the CPU. Therefore, we would like to add more OpenCL filter support to libavfilter so that more operations can be offloaded. This includes implementing a scaler, supporting a choice of scaling algorithms, a deinterlacer and color conversion. Timeline I have a report due for uni in June. This should only take a day or two. Other than that, I have no commitments. All tasks below include writing documentation. Week 1-2: Implement OpenCL deinterlacer based on yadif Week 3-6: Implement OpenCL YUV-to-YUV color conversion based on vf_colorspace Week 7-9: Implement OpenCL rgb colorspace conversion Week 10-14: Implement scaling algorithms with OpenCL including bilinear, bicubic, and averaging area ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] GSoC
Hi, I am Dylan Fernando. I am a Computer Science student from Australia. I am new to FFmpeg and I wish to apply for GSoC this year. I would like to do the Video filtering with OpenCL project and I have a few questions. Would trying to implement an opencl version of vf_fade be a good idea for the qualification task, or would I be better off using a different filter? Also, I’m having a bit of trouble with running unsharp_opencl. I tried running: ffmpeg -hide_banner -nostats -v verbose -init_hw_device opencl=ocl:0.1 -filter_hw_device ocl -i space.mpg -filter_complex unsharp_opencl output.mp4 but I got the error: [AVHWDeviceContext @ 0x7fdac050c700] 0.1: Apple / Intel(R) Iris(TM) Graphics 6100 [mpeg @ 0x7fdac3132600] max_analyze_duration 500 reached at 5005000 microseconds st:0 Input #0, mpeg, from 'space.mpg': Duration: 00:00:21.99, start: 0.387500, bitrate: 6108 kb/s Stream #0:0[0x1e0]: Video: mpeg2video (Main), 1 reference frame, yuv420p(tv, bt470bg, bottom first, left), 720x480 [SAR 8:9 DAR 4:3], 6000 kb/s, 29.97 fps, 29.97 tbr, 90k tbn, 59.94 tbc Stream mapping: Stream #0:0 (mpeg2video) -> unsharp_opencl unsharp_opencl -> Stream #0:0 (mpeg4) Press [q] to stop, [?] for help [graph 0 input from stream 0:0 @ 0x7fdac0418800] w:720 h:480 pixfmt:yuv420p tb:1/9 fr:3/1001 sar:8/9 sws_param:flags=2 [auto_scaler_0 @ 0x7fdac05232c0] w:iw h:ih flags:'bilinear' interl:0 [Parsed_unsharp_opencl_0 @ 0x7fdac0715a80] auto-inserting filter 'auto_scaler_0' between the filter 'graph 0 input from stream 0:0' and the filter 'Parsed_unsharp_opencl_0' Impossible to convert between the formats supported by the filter 'graph 0 input from stream 0:0' and the filter 'auto_scaler_0' Error reinitializing filters! Failed to inject frame into filter network: Function not implemented Error while processing the decoded data for stream #0:0 Conversion failed! How do I correctly run unsharp_opencl? Should I be running it on a different video file? Best Regards, Dylan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] GSoC
Thanks, it works now Would trying to implement an OpenCL version of vf_fade be a good idea for a qualification task, or would it be a better idea to try a different filter? Regards, Dylan On Wed, Mar 7, 2018 at 1:20 AM, Mark Thompson wrote: > On 06/03/18 12:37, Dylan Fernando wrote: > > Hi, > > > > I am Dylan Fernando. I am a Computer Science student from Australia. I am > > new to FFmpeg and I wish to apply for GSoC this year. > > I would like to do the Video filtering with OpenCL project and I have a > few > > questions. Would trying to implement an opencl version of vf_fade be a > good > > idea for the qualification task, or would I be better off using a > different > > filter? > > > > Also, I’m having a bit of trouble with running unsharp_opencl. I tried > > running: > > ffmpeg -hide_banner -nostats -v verbose -init_hw_device opencl=ocl:0.1 > > -filter_hw_device ocl -i space.mpg -filter_complex unsharp_opencl > output.mp4 > > > > but I got the error: > > [AVHWDeviceContext @ 0x7fdac050c700] 0.1: Apple / Intel(R) Iris(TM) > > Graphics 6100 > > [mpeg @ 0x7fdac3132600] max_analyze_duration 500 reached at 5005000 > > microseconds st:0 > > Input #0, mpeg, from 'space.mpg': > > Duration: 00:00:21.99, start: 0.387500, bitrate: 6108 kb/s > > Stream #0:0[0x1e0]: Video: mpeg2video (Main), 1 reference frame, > > yuv420p(tv, bt470bg, bottom first, left), 720x480 [SAR 8:9 DAR 4:3], 6000 > > kb/s, 29.97 fps, 29.97 tbr, 90k tbn, 59.94 tbc > > Stream mapping: > > Stream #0:0 (mpeg2video) -> unsharp_opencl > > unsharp_opencl -> Stream #0:0 (mpeg4) > > Press [q] to stop, [?] for help > > [graph 0 input from stream 0:0 @ 0x7fdac0418800] w:720 h:480 > pixfmt:yuv420p > > tb:1/9 fr:3/1001 sar:8/9 sws_param:flags=2 > > [auto_scaler_0 @ 0x7fdac05232c0] w:iw h:ih flags:'bilinear' interl:0 > > [Parsed_unsharp_opencl_0 @ 0x7fdac0715a80] auto-inserting filter > > 'auto_scaler_0' between the filter 'graph 0 input from stream 0:0' and > the > > filter 'Parsed_unsharp_opencl_0' > > Impossible to convert between the formats supported by the filter 'graph > 0 > > input from stream 0:0' and the filter 'auto_scaler_0' > > Error reinitializing filters! > > Failed to inject frame into filter network: Function not implemented > > Error while processing the decoded data for stream #0:0 > > Conversion failed! > > > > How do I correctly run unsharp_opencl? Should I be running it on a > > different video file? > > It's intended to be used in filter graphs where much of the activity is > already happening on the GPU, so the input and output are in the > AV_PIX_FMT_OPENCL format which contains GPU-side OpenCL images. > > If you want to use it standalone then you need hwupload and hwdownload > filters to move the frames between the CPU and GPU. For your example, it > should work with: > > ffmpeg -init_hw_device opencl=ocl:0.1 -filter_hw_device ocl -i space.mpg > -filter_complex hwupload,unsharp_opencl,hwdownload output.mp4 > > (There are constraints on what formats can be used and therefore suitable > files (or required format conversions), but I believe a normal yuv420p > video like this should work in all cases.) > > - Mark > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH v1] lavfi: add nlmeans CUDA filter
Any update on this? Kind Regards, Dylan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH v1] lavfi: add nlmeans CUDA filter
On Sat, Aug 14, 2021 at 9:11 AM Timo Rothenpieler wrote: > On 13.08.2021 10:42, Dylan Fernando wrote: > > Any update on this? > > > > Kind Regards, > > Dylan > > Also, are you sure that exp() function is correct? > > The CUDA-Function exp() is defined as "double exp(double x)" and > calculates the base e exponential. > > While __nvvm_ex2_approx_f reads to me like it does so for floats, and > for base 2. For which the CUDA equivalent would be "float exp2f(float)". > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > I wasn't sure about the exp() function. Is there a function like __nvvm_exp_approx_d? I can't seem to find a function for this. ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH v1] lavfi: add nlmeans CUDA filter
On Sat, Aug 14, 2021 at 1:03 PM Timo Rothenpieler wrote: > On 14.08.2021 07:49, Dylan Fernando wrote: > > On Sat, Aug 14, 2021 at 9:11 AM Timo Rothenpieler > > > wrote: > > > >> On 13.08.2021 10:42, Dylan Fernando wrote: > >>> Any update on this? > >>> > >>> Kind Regards, > >>> Dylan > >> > >> Also, are you sure that exp() function is correct? > >> > >> The CUDA-Function exp() is defined as "double exp(double x)" and > >> calculates the base e exponential. > >> > >> While __nvvm_ex2_approx_f reads to me like it does so for floats, and > >> for base 2. For which the CUDA equivalent would be "float exp2f(float)". > >> > >> ___ > >> ffmpeg-devel mailing list > >> ffmpeg-devel@ffmpeg.org > >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >> > >> To unsubscribe, visit link above, or email > >> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > >> > > > > I wasn't sure about the exp() function. Is there a function like > > __nvvm_exp_approx_d? I can't seem to find a function for this. > > Looking into it some more, that's simply because there is no other fast > approx exp function than ex2. > If I use __expf() with nvcc, it spawns the following code: > > ld.param.f32%f1, [param]; > mul.f32 %f2, %f1, 0f3FB8AA3B; > ex2.approx.f32 %f3, %f2; > > So it multiplies the input value by some factor, and then runs it > through it. > Given by math, this value must be log2(euler_constant), or log2(exp(1)), > for lack of the constant being defined. > > So the implementation of __expf() would look like this: > > > static inline __device__ float __expf(float a) { return > __nvvm_ex2_approx_f(a * (float)__builtin_log2(__builtin_exp(1))); } > > With llvm, this now spawns the exact same code: > > ld.param.f32%f1, [param]; > mul.f32 %f2, %f1, 0f3FB8AA3B; > ex2.approx.f32 %f3, %f2; > > > I will push that function soon, so you can just use __expf() in your > code. Assuming you want exp to base e. > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > > Attatched updated patch > From 1b1103f48599f56a84f749c51085c22341a95fa3 Mon Sep 17 00:00:00 2001 From: Dylan Fernando Date: Sun, 15 Aug 2021 17:08:07 + Subject: [PATCH] lavfi: add nlmeans_cuda filter --- configure | 2 + doc/filters.texi | 4 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/version.h | 2 +- libavfilter/vf_nlmeans_cuda.c | 830 + libavfilter/vf_nlmeans_cuda.cu | 378 +++ 7 files changed, 1218 insertions(+), 1 deletion(-) create mode 100644 libavfilter/vf_nlmeans_cuda.c create mode 100644 libavfilter/vf_nlmeans_cuda.cu diff --git a/configure b/configure index 82639ce057..0d905cc3c2 100755 --- a/configure +++ b/configure @@ -3094,6 +3094,8 @@ thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm" transpose_npp_filter_deps="ffnvcodec libnpp" overlay_cuda_filter_deps="ffnvcodec" overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +nlmeans_cuda_filter_deps="ffnvcodec" +nlmeans_cuda_filter_deps_any="cuda_nvcc cuda_llvm" amf_deps_any="libdl LoadLibrary" nvenc_deps="ffnvcodec" diff --git a/doc/filters.texi b/doc/filters.texi index bdeb3fedfd..585aff9880 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15234,6 +15234,10 @@ Same as @option{r} but for chroma planes. The default value is @var{0} and means automatic. @end table +@section nlmeans_cuda + +Non-local Means denoise filter through CUDA, this filter accepts same options as @ref{nlmeans}. + @section nnedi Deinterlace video using neural network edge directed interpolation. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 49c0c8342b..565923d85a 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -341,6 +341,8 @@ OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o +OBJS-$(CONFIG_NLMEANS_CUDA_FILTER) += vf_nlmeans_cuda.o vf_nlmeans_cuda.ptx.o \ +
[FFmpeg-devel] [PATCH v2] lavfi: add nlmeans CUDA filter
I want to add support for the other formats, but I'm not sure how to find video files to test it out. I tried looking through https://samples.ffmpeg.org/, but I'm not sure which files on there are the formats im looking for (AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_YUV444P16). From 9183993c6b31560ed21fe9dd4c06f7e01735d903 Mon Sep 17 00:00:00 2001 From: Dylan Fernando Date: Thu, 2 Sep 2021 17:31:49 + Subject: [PATCH] lavfi: add nlmeans_cuda filter Signed-off-by: Dylan Fernando --- configure | 2 + doc/filters.texi | 4 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/version.h | 2 +- libavfilter/vf_nlmeans_cuda.c | 850 + libavfilter/vf_nlmeans_cuda.cu | 378 +++ 7 files changed, 1238 insertions(+), 1 deletion(-) create mode 100644 libavfilter/vf_nlmeans_cuda.c create mode 100644 libavfilter/vf_nlmeans_cuda.cu diff --git a/configure b/configure index 9249254b70..55ed0200c7 100755 --- a/configure +++ b/configure @@ -3094,6 +3094,8 @@ thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm" transpose_npp_filter_deps="ffnvcodec libnpp" overlay_cuda_filter_deps="ffnvcodec" overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +nlmeans_cuda_filter_deps="ffnvcodec" +nlmeans_cuda_filter_deps_any="cuda_nvcc cuda_llvm" amf_deps_any="libdl LoadLibrary" nvenc_deps="ffnvcodec" diff --git a/doc/filters.texi b/doc/filters.texi index 9ad6031d23..b5eb9ecd33 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15380,6 +15380,10 @@ Same as @option{r} but for chroma planes. The default value is @var{0} and means automatic. @end table +@section nlmeans_cuda + +Non-local Means denoise filter through CUDA, this filter accepts same options as @ref{nlmeans}. + @section nnedi Deinterlace video using neural network edge directed interpolation. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index af957a5ac0..7a61d7591e 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -347,6 +347,8 @@ OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o +OBJS-$(CONFIG_NLMEANS_CUDA_FILTER) += vf_nlmeans_cuda.o vf_nlmeans_cuda.ptx.o \ +cuda/load_helper.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 0c6b2347c8..d65c13011c 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -333,6 +333,7 @@ extern const AVFilter ff_vf_msad; extern const AVFilter ff_vf_negate; extern const AVFilter ff_vf_nlmeans; extern const AVFilter ff_vf_nlmeans_opencl; +extern const AVFilter ff_vf_nlmeans_cuda; extern const AVFilter ff_vf_nnedi; extern const AVFilter ff_vf_noformat; extern const AVFilter ff_vf_noise; diff --git a/libavfilter/version.h b/libavfilter/version.h index ff12ff9f8f..306bb62ff4 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -30,7 +30,7 @@ #include "libavutil/version.h" #define LIBAVFILTER_VERSION_MAJOR 8 -#define LIBAVFILTER_VERSION_MINOR 7 +#define LIBAVFILTER_VERSION_MINOR 8 #define LIBAVFILTER_VERSION_MICRO 100 diff --git a/libavfilter/vf_nlmeans_cuda.c b/libavfilter/vf_nlmeans_cuda.c new file mode 100644 index 00..3dc74e310d --- /dev/null +++ b/libavfilter/vf_nlmeans_cuda.c @@ -0,0 +1,850 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" + +#include "avfilter.h" +#include "internal.h" + +#include "cuda/l
Re: [FFmpeg-devel] [PATCH v2] lavfi: add nlmeans CUDA filter
On Thu, Sep 2, 2021 at 4:25 PM Timo Rothenpieler wrote: > On 02.09.2021 15:32, Timo Rothenpieler wrote: > > On 02.09.2021 19:50, Dylan Fernando wrote: > >> I want to add support for the other formats, but I'm not sure how to > find > >> video files to test it out. I tried looking through > >> https://samples.ffmpeg.org/, but I'm not sure which files on there are > >> the > >> formats im looking for (AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, > >> AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_YUV444P16). > > > > Just slap a format_cuda filter in front and convert to the desired > format. > > For RGB formats, which it doesn't support right now, just use > > format,hwupload_cuda. > > sorry, scale_cuda or format+hwupload > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > Thanks, I have yuv420p and yuv444p working now. For P010, I tried using: ffmpeg -loglevel debug -v verbose -hwaccel cuda -hwaccel_output_format cuda -i noise.mp4 -vf format=p010,hwupload,nlmeans_cuda=20,hwdownload nlmeans.mp4 and I get: [Parsed_format_0 @ 0x558bf0ff6bc0] auto-inserting filter 'auto_scale_0' between the filter 'graph 0 input from stream 0:0' and the filter 'Parsed_format_0' Impossible to convert between the formats supported by the filter 'graph 0 input from stream 0:0' and the filter 'auto_scale_0' Segmentation fault (core dumped) ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH v2] lavfi: add nlmeans CUDA filter
On Sat, Sep 4, 2021 at 10:43 AM Timo Rothenpieler wrote: > On 04.09.2021 22:03, Dylan Fernando wrote: > > On Thu, Sep 2, 2021 at 4:25 PM Timo Rothenpieler > > wrote: > > > >> On 02.09.2021 15:32, Timo Rothenpieler wrote: > >>> On 02.09.2021 19:50, Dylan Fernando wrote: > >>>> I want to add support for the other formats, but I'm not sure how to > >> find > >>>> video files to test it out. I tried looking through > >>>> https://samples.ffmpeg.org/, but I'm not sure which files on there > are > >>>> the > >>>> formats im looking for (AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, > >>>> AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_YUV444P16). > >>> > >>> Just slap a format_cuda filter in front and convert to the desired > >> format. > >>> For RGB formats, which it doesn't support right now, just use > >>> format,hwupload_cuda. > >> > >> sorry, scale_cuda or format+hwupload > >> ___ > >> ffmpeg-devel mailing list > >> ffmpeg-devel@ffmpeg.org > >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > >> > >> To unsubscribe, visit link above, or email > >> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > >> > > > > Thanks, I have yuv420p and yuv444p working now. > > > > For P010, I tried using: > > ffmpeg -loglevel debug -v verbose -hwaccel cuda -hwaccel_output_format > cuda > > -i noise.mp4 -vf format=p010,hwupload,nlmeans_cuda=20,hwdownload > nlmeans.mp4 > > > > and I get: > > [Parsed_format_0 @ 0x558bf0ff6bc0] auto-inserting filter 'auto_scale_0' > > between the filter 'graph 0 input from stream 0:0' and the filter > > 'Parsed_format_0' > > Impossible to convert between the formats supported by the filter 'graph > 0 > > input from stream 0:0' and the filter 'auto_scale_0' > > > > Segmentation fault (core dumped) > > you're trying to hwupload something that already is uploaded by the > decoder. > Either use scale_cuda for the conversion, or don't have the decoder > output CUDA frames. > > The segfault is a bit unexpected though. Can you get a backtrace? > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > Impossible to convert between the formats supported by the filter 'graph 0 input from stream 0:0' and the filter 'auto_scale_0' Thread 1 "ffmpeg" received signal SIGSEGV, Segmentation fault. 0x556726eb in uninit (ctx=0x5817e800) at libavfilter/vf_nlmeans_cuda.c:704 704 CudaFunctions *cu = s->hwctx->internal->cuda_dl; (gdb) backtrace #0 0x556726eb in uninit (ctx=0x5817e800) at libavfilter/vf_nlmeans_cuda.c:704 #1 0x55742e65 in avfilter_free (filter=0x5817e800) at libavfilter/avfilter.c:769 #2 0x55744cac in avfilter_graph_free (graph=0x5716ded0) at libavfilter/avfiltergraph.c:126 #3 0x55707ae0 in cleanup_filtergraph (fg=0x5716dec0) at fftools/ffmpeg_filter.c:952 #4 configure_filtergraph (fg=fg@entry=0x5716dec0) at fftools/ffmpeg_filter.c:1130 #5 0x5571b060 in ifilter_send_frame (frame=0x57a72d00, ifilter=0x5716db40) at fftools/ffmpeg.c:2242 #6 send_frame_to_filters (ist=ist@entry=0x5716c5c0, decoded_frame=decoded_frame@entry=0x57a72d00) at fftools/ffmpeg.c:2323 #7 0x5571c204 in decode_video (decode_failed=, eof=, duration_pts=, got_output=, pkt=, ist=) at fftools/ffmpeg.c:2520 #8 process_input_packet (ist=ist@entry=0x5716c5c0, pkt=0x5716c7c0, no_eof=no_eof@entry=0) at fftools/ffmpeg.c:2682 #9 0x5571daee in process_input (file_index=) at fftools/ffmpeg.c:4636 #10 transcode_step () at fftools/ffmpeg.c:4776 #11 transcode () at fftools/ffmpeg.c:4830 #12 0x556f84a7 in main (argc=, argv=) at fftools/ffmpeg.c:5035 > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH v3] lavfi: add nlmeans CUDA filter
From f8b8a250a2c4b092747d632adc7dafccfc474140 Mon Sep 17 00:00:00 2001 From: Dylan Fernando Date: Wed, 8 Sep 2021 18:19:40 + Subject: [PATCH] lavfi: add nlmeans_cuda filter Signed-off-by: Dylan Fernando --- configure | 2 + doc/filters.texi | 4 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/version.h | 4 +- libavfilter/vf_nlmeans_cuda.c | 883 + libavfilter/vf_nlmeans_cuda.cu | 378 ++ 7 files changed, 1272 insertions(+), 2 deletions(-) create mode 100644 libavfilter/vf_nlmeans_cuda.c create mode 100644 libavfilter/vf_nlmeans_cuda.cu diff --git a/configure b/configure index af410a9d11..7fa67e415e 100755 --- a/configure +++ b/configure @@ -3094,6 +3094,8 @@ thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm" transpose_npp_filter_deps="ffnvcodec libnpp" overlay_cuda_filter_deps="ffnvcodec" overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm" +nlmeans_cuda_filter_deps="ffnvcodec" +nlmeans_cuda_filter_deps_any="cuda_nvcc cuda_llvm" amf_deps_any="libdl LoadLibrary" nvenc_deps="ffnvcodec" diff --git a/doc/filters.texi b/doc/filters.texi index 9ad6031d23..b5eb9ecd33 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15380,6 +15380,10 @@ Same as @option{r} but for chroma planes. The default value is @var{0} and means automatic. @end table +@section nlmeans_cuda + +Non-local Means denoise filter through CUDA, this filter accepts same options as @ref{nlmeans}. + @section nnedi Deinterlace video using neural network edge directed interpolation. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index af957a5ac0..7a61d7591e 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -347,6 +347,8 @@ OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o +OBJS-$(CONFIG_NLMEANS_CUDA_FILTER) += vf_nlmeans_cuda.o vf_nlmeans_cuda.ptx.o \ +cuda/load_helper.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 0c6b2347c8..d65c13011c 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -333,6 +333,7 @@ extern const AVFilter ff_vf_msad; extern const AVFilter ff_vf_negate; extern const AVFilter ff_vf_nlmeans; extern const AVFilter ff_vf_nlmeans_opencl; +extern const AVFilter ff_vf_nlmeans_cuda; extern const AVFilter ff_vf_nnedi; extern const AVFilter ff_vf_noformat; extern const AVFilter ff_vf_noise; diff --git a/libavfilter/version.h b/libavfilter/version.h index 2110048b77..306bb62ff4 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -30,8 +30,8 @@ #include "libavutil/version.h" #define LIBAVFILTER_VERSION_MAJOR 8 -#define LIBAVFILTER_VERSION_MINOR 7 -#define LIBAVFILTER_VERSION_MICRO 101 +#define LIBAVFILTER_VERSION_MINOR 8 +#define LIBAVFILTER_VERSION_MICRO 100 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ diff --git a/libavfilter/vf_nlmeans_cuda.c b/libavfilter/vf_nlmeans_cuda.c new file mode 100644 index 00..3ecc7c8945 --- /dev/null +++ b/libavfilter/vf_nlmeans_cuda.c @@ -0,0 +1,883 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" + +#include "avfilter.h" +#include "internal.h" + +#include "cuda/load_helper.h" + +static const enum AVPixelFormat supported_formats[] = { +AV_PIX_FMT_NV12, +AV_PIX_FMT_YUV420P, +AV_PIX_FMT_YUV444P +}; + + +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->in
Re: [FFmpeg-devel] [PATCH v3] lavfi: add nlmeans CUDA filter
Any feedback for this? ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".