On 27 March 2018 at 05:48, Gabriel Machado <gabriel_mach...@live.com> wrote:
> From: Gabriel Machado <gabriel_mach...@live.com> > > Some scaling filters implemented as OpenCL kernels. Can be used as: > > scale_opencl=<width>:<height>:flags=<filter> > where <filter> can be `neighbor', `bilinear', `bicubic' or `fast_bicubic' > > This is an initial draft, there's still a long way to go in terms of > completeness, configurability and performance. > > --- > configure | 1 + > libavfilter/Makefile | 1 + > libavfilter/allfilters.c | 1 + > libavfilter/opencl/scale.cl | 165 ++++++++++++++++++++++++ > libavfilter/opencl_source.h | 1 + > libavfilter/vf_scale_opencl.c | 289 ++++++++++++++++++++++++++++++ > ++++++++++++ > 6 files changed, 458 insertions(+) > create mode 100644 libavfilter/opencl/scale.cl > create mode 100644 libavfilter/vf_scale_opencl.c > > diff --git a/configure b/configure > index 5ccf3ce..4007ee8 100755 > --- a/configure > +++ b/configure > @@ -2821,6 +2821,7 @@ v4l2_m2m_deps_any="linux_videodev2_h" > > hwupload_cuda_filter_deps="ffnvcodec" > scale_npp_filter_deps="ffnvcodec libnpp" > +scale_opencl_filter_deps="opencl" > scale_cuda_filter_deps="cuda_sdk" > thumbnail_cuda_filter_deps="cuda_sdk" > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index a90ca30..6303cbd 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -302,6 +302,7 @@ OBJS-$(CONFIG_SAB_FILTER) += > vf_sab.o > OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale.o > OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o > vf_scale_cuda.ptx.o > OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o > +OBJS-$(CONFIG_SCALE_OPENCL_FILTER) += vf_scale_opencl.o > opencl.o opencl/scale.o > OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o > OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale.o > vaapi_vpp.o > OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale.o > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index 1cf1340..3185b17 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -309,6 +309,7 @@ static void register_all(void) > REGISTER_FILTER(SCALE, scale, vf); > REGISTER_FILTER(SCALE_CUDA, scale_cuda, vf); > REGISTER_FILTER(SCALE_NPP, scale_npp, vf); > + REGISTER_FILTER(SCALE_OPENCL, scale_opencl, vf); > REGISTER_FILTER(SCALE_QSV, scale_qsv, vf); > REGISTER_FILTER(SCALE_VAAPI, scale_vaapi, vf); > REGISTER_FILTER(SCALE2REF, scale2ref, vf); > diff --git a/libavfilter/opencl/scale.cl b/libavfilter/opencl/scale.cl > new file mode 100644 > index 0000000..b0e6cb2 > --- /dev/null > +++ b/libavfilter/opencl/scale.cl > @@ -0,0 +1,165 @@ > +/* > + * Copyright (c) 2018 Gabriel Machado > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > +__kernel void neighbor(__write_only image2d_t dst, > + __read_only image2d_t src) > +{ > + const sampler_t sampler = (CLK_NORMALIZED_COORDS_TRUE | > + CLK_ADDRESS_CLAMP_TO_EDGE | > + CLK_FILTER_NEAREST); > + > + int2 coord = {get_global_id(0), get_global_id(1)}; > + int2 size = {get_global_size(0), get_global_size(1)}; > + > + float2 pos = (convert_float2(coord) + 0.5) / convert_float2(size); > + > + float4 c = read_imagef(src, sampler, pos); > + write_imagef(dst, coord, c); > +} > + > +__kernel void bilinear(__write_only image2d_t dst, > + __read_only image2d_t src) > +{ > + const sampler_t sampler = (CLK_NORMALIZED_COORDS_TRUE | > + CLK_ADDRESS_CLAMP_TO_EDGE | > + CLK_FILTER_LINEAR); > + > + int2 coord = {get_global_id(0), get_global_id(1)}; > + int2 size = {get_global_size(0), get_global_size(1)}; > + > + float2 pos = (convert_float2(coord) + 0.5) / convert_float2(size); > + > + float4 c = read_imagef(src, sampler, pos); > + write_imagef(dst, coord, c); > +} > + > +// https://developer.nvidia.com/gpugems/GPUGems/gpugems_ch24.html > +float MitchellNetravali(float x, float B, float C) > +{ > + float t = fabs(x); > + float tt = t*t; > + float ttt = tt*t; > + > + if (t < 1) { > + return ((12 - 9 * B - 6 * C) * ttt + > + (-18 + 12 * B + 6 * C) * tt + (6 - 2 * B)) / 6; > + } else if ((t >= 1) && (t < 2)) { > + return ((-B - 6 * C) * ttt + > + (6 * B + 30 * C) * tt + (-12 * B - 48 * C) * > + t + (8 * B + 24 * C)) / 6; > + } else { > + return 0; > + } > +} > License unclear, I don't think you can use it. Moreover it comes from a book. + > +float4 cubic(float4 c0, float4 c1, float4 c2, float4 c3, float t) > +{ > + float B = 0, C = 0.6; // libswscale default > + float a = MitchellNetravali(t + 1, B, C); > + float b = MitchellNetravali(t, B, C); > + float c = MitchellNetravali(1 - t, B, C); > + float d = MitchellNetravali(2 - t, B, C); > + return a*c0 + b*c1 + c*c2 + d*c3; > +} > + > +__kernel void bicubic(__write_only image2d_t dst, > + __read_only image2d_t src) > +{ > + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | > + CLK_ADDRESS_CLAMP_TO_EDGE | > + CLK_FILTER_NEAREST); > + > + int2 dst_coord = {get_global_id(0), get_global_id(1)}; > + > + float2 dst_size = {get_global_size(0), get_global_size(1)}; > + float2 src_size = convert_float2(get_image_dim(src)); > + > + float2 uv = convert_float2(dst_coord) / dst_size; > + > + float2 src_pos = uv * convert_float2(src_size) - 0.5; > + > + float2 src_coordf; > + float2 t = fract(src_pos, &src_coordf); > + int2 src_coord = convert_int2(src_coordf); > + > +#define TEX(x,y) read_imagef(src, sampler, src_coord + (int2){x,y}) > + float4 col = cubic(cubic(TEX(-1,-1), TEX(0,-1), TEX(1,-1), TEX(2,-1), > t.x), > + cubic(TEX(-1, 0), TEX(0, 0), TEX(1, 0), TEX(2, 0), > t.x), > + cubic(TEX(-1, 1), TEX(0, 1), TEX(1, 1), TEX(2, 1), > t.x), > + cubic(TEX(-1, 2), TEX(0, 2), TEX(1, 2), TEX(2, 2), > t.x), > + t.y); > +#undef TEX > + > + write_imagef(dst, dst_coord, col); > +} > + > +// https://www.shadertoy.com/view/4df3Dn > +// 4x4 bicubic filter using 4 bilinear texture lookups > +// cubic B-spline basis functions > +float w0(float a) { return (1.0/6.0)*(a*(a*(-a + 3.0) - 3.0) + 1.0); } > +float w1(float a) { return (1.0/6.0)*(a*a*(3.0*a - 6.0) + 4.0); } > +float w2(float a) { return (1.0/6.0)*(a*(a*(-3.0*a + 3.0) + 3.0) + 1.0); } > +float w3(float a) { return (1.0/6.0)*(a*a*a); } > No license, can't use it. Shadertoy has no explicit license. Moreover the whole filter is incorrectly designed. Take a look at what mpv does and how it has no explicit per-algorithm scaling functions. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel