Re: [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_dr_16x16_16 avx2 implementation
>I know unaligned loads are not as slow as they used to be, >but could m1 be produced by m2 and palignr? I am not sure, can you clarify your question? >From the comment I assume you don't use the extra two bytes >that you get from the load, as you mark them as "*" >generic undefined values No, those two extra bytes are actually used, that's the above/left corner pixel. If you look in the vp9dsp_template.c file, there is a macro defined diag_downright_ that's top[-1] in the body. Sorry for this ambiguous marking, but it's used in other ipred_dr functions so I decided to follow it. >"cnt" doesn't seem to be used. Yes indeed, I mislooked that, thanks. On Fri, Jun 9, 2017 at 6:03 PM, Ivan Kalvachev wrote: > On 6/8/17, Ilia Valiakhmetov wrote: > > vp9_diag_downright_16x16_12bpp_c: 149.0 > > vp9_diag_downright_16x16_12bpp_sse2: 67.8 > > vp9_diag_downright_16x16_12bpp_ssse3: 45.6 > > vp9_diag_downright_16x16_12bpp_avx: 36.6 > > vp9_diag_downright_16x16_12bpp_avx2: 25.5 > > > > ~30% faster than avx > > > > Signed-off-by: Ilia Valiakhmetov > > --- > > libavcodec/x86/vp9dsp_init_16bpp.c| 2 ++ > > libavcodec/x86/vp9intrapred_16bpp.asm | 56 > > +++ > > 2 files changed, 58 insertions(+) > > > > diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c > > b/libavcodec/x86/vp9dsp_init_16bpp.c > > index d1b8fcd..8d1aa13 100644 > > --- a/libavcodec/x86/vp9dsp_init_16bpp.c > > +++ b/libavcodec/x86/vp9dsp_init_16bpp.c > > @@ -52,6 +52,7 @@ decl_ipred_fns(dc, 16, mmxext, sse2); > > decl_ipred_fns(dc_top, 16, mmxext, sse2); > > decl_ipred_fns(dc_left, 16, mmxext, sse2); > > decl_ipred_fn(dl, 16, 16, avx2); > > +decl_ipred_fn(dr, 16, 16, avx2); > > decl_ipred_fn(dl, 32, 16, avx2); > > > > #define decl_ipred_dir_funcs(type) \ > > @@ -136,6 +137,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext > > *dsp) > > init_fpel_func(1, 1, 64, avg, _16, avx2); > > init_fpel_func(0, 1, 128, avg, _16, avx2); > > init_ipred_func(dl, DIAG_DOWN_LEFT, 16, 16, avx2); > > +init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2); > > init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2); > > } > > > > diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm > > b/libavcodec/x86/vp9intrapred_16bpp.asm > > index 92333bc..67b98b1 100644 > > --- a/libavcodec/x86/vp9intrapred_16bpp.asm > > +++ b/libavcodec/x86/vp9intrapred_16bpp.asm > > @@ -1170,6 +1170,62 @@ DR_FUNCS 2 > > INIT_XMM avx > > DR_FUNCS 2 > > > > +%if HAVE_AVX2_EXTERNAL > > +INIT_YMM avx2 > > +cglobal vp9_ipred_dr_16x16_16, 4, 6, 7, dst, stride, l, a > > +movam0, [lq] ; > klmnopqrstuvwxyz > > +movum1, [aq-2] ; > *abcdefghijklmno > > +movam2, [aq] ; > abcdefghijklmnop > > From the comment I assume you don't use the extra two bytes > that you get from the load, as you mark them as "*" > generic undefined values > > > +vperm2i128 m4, m2, m2, q2001 ; > ijklmnop > > +vpalignrm5, m4, m2, 2 ; > bcdefghijklmnop. > > +vperm2i128 m3, m0, m1, q0201 ; > stuvwxyz*abcdefg > > +LOWPASS 1, 2, 5 ; > ABCDEFGHIJKLMNO. > > +vpalignrm4, m3, m0, 2 ; > lmnopqrstuvwxyz* > > +vpalignrm5, m3, m0, 4 ; > mnopqrstuvwxyz*a > > +LOWPASS 0, 4, 5 ; > LMNOPQRSTUVWXYZ# > > +vperm2i128 m5, m0, m1, q0201 ; > TUVWXYZ#ABCDEFGH > > +DEFINE_ARGS dst, stride, stride3, stride5, dst3, cnt > > "cnt" doesn't seem to be used. > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_dr_16x16_16 avx2 implementation
Yes, you are right, I'll send a patch with this fixed, thanks. On Sat, Jun 10, 2017 at 5:35 AM, Ivan Kalvachev wrote: > On 6/9/17, Ilia Valiakhmetov wrote: > > Signed-off-by: Ilia Valiakhmetov > > --- > > libavcodec/x86/vp9dsp_init_16bpp.c| 2 ++ > > libavcodec/x86/vp9intrapred_16bpp.asm | 56 > > +++ > > 2 files changed, 58 insertions(+) > > > > diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c > > b/libavcodec/x86/vp9dsp_init_16bpp.c > > index d1b8fcd..8d1aa13 100644 > > --- a/libavcodec/x86/vp9dsp_init_16bpp.c > > +++ b/libavcodec/x86/vp9dsp_init_16bpp.c > > @@ -52,6 +52,7 @@ decl_ipred_fns(dc, 16, mmxext, sse2); > > decl_ipred_fns(dc_top, 16, mmxext, sse2); > > decl_ipred_fns(dc_left, 16, mmxext, sse2); > > decl_ipred_fn(dl, 16, 16, avx2); > > +decl_ipred_fn(dr, 16, 16, avx2); > > decl_ipred_fn(dl, 32, 16, avx2); > > > > #define decl_ipred_dir_funcs(type) \ > > @@ -136,6 +137,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext > > *dsp) > > init_fpel_func(1, 1, 64, avg, _16, avx2); > > init_fpel_func(0, 1, 128, avg, _16, avx2); > > init_ipred_func(dl, DIAG_DOWN_LEFT, 16, 16, avx2); > > +init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2); > > init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2); > > } > > > > diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm > > b/libavcodec/x86/vp9intrapred_16bpp.asm > > index 92333bc..7230de2 100644 > > --- a/libavcodec/x86/vp9intrapred_16bpp.asm > > +++ b/libavcodec/x86/vp9intrapred_16bpp.asm > > @@ -1170,6 +1170,62 @@ DR_FUNCS 2 > > INIT_XMM avx > > DR_FUNCS 2 > > > > +%if HAVE_AVX2_EXTERNAL > > +INIT_YMM avx2 > > +cglobal vp9_ipred_dr_16x16_16, 4, 4, 6, dst, stride, l, a > [...] > > +DEFINE_ARGS dst, stride, stride3, stride5, dst3 > > You removed one variable, so now the number of > re-define-args gprs should be 5. > However the cglobal above have 4 reserved registers. > > It used to be 4, 6, 6 > Now it is 4, 4, 6 > I think it should be 4, 5, 6 > > Do I miss something? > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/2] avcodec/pthread_slice: add main function support for avpriv_slicethread_create()
Fixed in v2. On Fri, Sep 8, 2017 at 3:40 AM, Michael Niedermayer wrote: > On Fri, Sep 08, 2017 at 12:55:29AM +0700, Ilia Valiakhmetov wrote: > > --- > > libavcodec/internal.h | 4 > > libavcodec/pthread_slice.c | 33 ++--- > > libavcodec/thread.h| 1 + > > libavutil/slicethread.h| 18 ++ > > 4 files changed, 37 insertions(+), 19 deletions(-) > > applying only patch 1 fails to build > > libavcodec/vp9.c: In function ‘thread_execute3’: > libavcodec/vp9.c:94:5: error: unknown type name ‘SliceThreadContext’ > SliceThreadContext *c = avctx->internal->thread_ctx; > ^ > libavcodec/vp9.c:95:6: error: request for member ‘func2’ in something not > a structure or union > c->func2 = func; > ^ > libavcodec/vp9.c:96:6: error: request for member ‘m_func’ in something not > a structure or union > c->m_func = m_func; > ^ > libavcodec/vp9.c:97:5: error: implicit declaration of function > ‘ff_thread_execute’ [-Werror=implicit-function-declaration] > return ff_thread_execute(avctx, NULL, arg, ret, job_count, 0); > ^ > libavcodec/vp9.c: At top level: > libavcodec/vp9.c:1790:22: error: ‘FF_CODEC_CAP_SLICE_THREAD_HAS_MF’ > undeclared here (not in a function) > .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF, > ^ > > > applying both patches fails to build on mingw64 > > In file included from src/libavutil/slicethread.c:20:0: > src/libavutil/slicethread.h:38:5: error: unknown type name > ‘pthread_cond_t’ > pthread_cond_t *progress_cond; > ^ > src/libavutil/slicethread.h:39:5: error: unknown type name > ‘pthread_mutex_t’ > pthread_mutex_t *progress_mutex; > ^ > make: *** [libavutil/slicethread.o] Error 1 > make: *** Waiting for unfinished jobs > > > [...] > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > Those who would give up essential Liberty, to purchase a little > temporary Safety, deserve neither Liberty nor Safety -- Benjamin Franklin > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH] vp9: fix compilation with threading disabled.
This also fixes the warning. --- libavcodec/vp9.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index a71045e..f940d60 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -88,7 +88,7 @@ static void vp9_await_tile_progress(VP9Context *s, int field, int n) { pthread_mutex_unlock(&s->progress_mutex); } #else -static void vp9_free_entries(VP9Context *s) {} +static void vp9_free_entries(AVCodecContext *avctx) {} static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; } static void vp9_report_tile_progress(VP9Context *s, int field, int n) {} static void vp9_await_tile_progress(VP9Context *s, int field, int n) {} @@ -1599,7 +1599,7 @@ FF_ENABLE_DEPRECATION_WARNINGS s->td[i].uveob[1] = s->td[i].uveob_base[1]; } -if (avctx->active_thread_type == FF_THREAD_SLICE) { +if (HAVE_THREADS && avctx->active_thread_type == FF_THREAD_SLICE) { int tile_row, tile_col; assert(!pass); -- 2.8.3 On Sat, Sep 9, 2017 at 11:13 PM, Ronald S. Bultje wrote: > --- > libavcodec/vp9.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c > index a71045e..499f357 100644 > --- a/libavcodec/vp9.c > +++ b/libavcodec/vp9.c > @@ -1599,7 +1599,7 @@ FF_ENABLE_DEPRECATION_WARNINGS > s->td[i].uveob[1] = s->td[i].uveob_base[1]; > } > > -if (avctx->active_thread_type == FF_THREAD_SLICE) { > +if (HAVE_THREADS && avctx->active_thread_type == FF_THREAD_SLICE) > { > int tile_row, tile_col; > > assert(!pass); > -- > 2.8.1 > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH] vp9: fix compilation with threading disabled.
Hm, I don't understand, what exactly is the problem? It applies fine for me. On Sun, Sep 10, 2017 at 1:27 AM, Michael Niedermayer wrote: > On Sat, Sep 09, 2017 at 11:24:12PM +0700, gh0st wrote: > > This also fixes the warning. > > > > --- > > libavcodec/vp9.c | 4 ++-- > > 1 file changed, 2 insertions(+), 2 deletions(-) > > > > diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c > > index a71045e..f940d60 100644 > > --- a/libavcodec/vp9.c > > +++ b/libavcodec/vp9.c > > @@ -88,7 +88,7 @@ static void vp9_await_tile_progress(VP9Context *s, int > > field, int n) { > > This is corrupted by newlines > > [...] > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > Observe your enemies, for they first find out your faults. -- Antisthenes > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 0/2] Tile threading support for vp9
Fixed in PATCHv3 On Mon, Aug 28, 2017 at 7:40 AM, Michael Niedermayer wrote: > On Mon, Aug 28, 2017 at 02:22:15AM +0700, Ilia Valiakhmetov wrote: > > These patches introduce tile threading support for vp9. > > > > Tile threading is ~45% faster at 2 threads vs 1. > > Frame threading is ~55% faster at 2 threads vs 1. > > ffvp9 tile threading is ~25% faster than libvpx-vp9 at 2 threads > > > > execute3() function is similar to execute2(), execept it has > > a extra argument - main function for avpriv_slicethread_create(), it is > used for the loopfilter. > > > > Ilia Valiakhmetov (2): > > avcodec: add execute3() api to utilize the main function of > > avpriv_slicethread_create(). > > avcodec/vp9: Add tile threading support > > this seems to break build with mips-linux-gnu-gcc-4.4 and > --disable-pthreads > > In file included from src/libavcodec/vp9data.h:28, > from src/libavcodec/vp9.c:33: > src/libavcodec/vp9dec.h:218: error: redefinition of typedef ‘VP9TileData’ > src/libavcodec/vp9dec.h:89: note: previous declaration of ‘VP9TileData’ > was here > src/libavcodec/vp9.c: In function ‘vp9_free_entries’: > src/libavcodec/vp9.c:115: error: implicit declaration of function > ‘pthread_mutex_destroy’ > src/libavcodec/vp9.c:116: error: implicit declaration of function > ‘pthread_cond_destroy’ > src/libavcodec/vp9.c: In function ‘vp9_alloc_entries’: > src/libavcodec/vp9.c:138: error: implicit declaration of function > ‘pthread_mutex_init’ > src/libavcodec/vp9.c:139: error: implicit declaration of function > ‘pthread_cond_init’ > src/libavcodec/vp9.c: In function ‘vp9_report_tile_progress’: > src/libavcodec/vp9.c:146: error: implicit declaration of function > ‘pthread_cond_signal’ > src/libavcodec/vp9.c: In function ‘vp9_await_tile_progress’: > src/libavcodec/vp9.c:153: error: implicit declaration of function > ‘pthread_mutex_lock’ > src/libavcodec/vp9.c:155: error: implicit declaration of function > ‘pthread_cond_wait’ > src/libavcodec/vp9.c:156: error: implicit declaration of function > ‘pthread_mutex_unlock’ > src/libavcodec/vp9.c: In function ‘vp9_decode_frame’: > src/libavcodec/vp9.c:1424: warning: ‘pkt_pts’ is deprecated (declared at > src/libavutil/frame.h:302) > make: *** [libavcodec/vp9.o] Error 1 > make: *** Waiting for unfinished jobs > > [...] > > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > Those who would give up essential Liberty, to purchase a little > temporary Safety, deserve neither Liberty nor Safety -- Benjamin Franklin > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCHv3 2/2] avcodec/vp9: Add tile threading support
Should be fixed now in PATCHv4, thanks! On Thu, Aug 31, 2017 at 3:59 AM, Michael Niedermayer wrote: > On Mon, Aug 28, 2017 at 06:36:13PM +0700, Ilia Valiakhmetov wrote: > > Signed-off-by: Ilia Valiakhmetov > > --- > > libavcodec/vp9.c | 608 ++ > - > > libavcodec/vp9_mc_template.c | 202 +++--- > > libavcodec/vp9block.c| 522 ++ > --- > > libavcodec/vp9dec.h | 108 +--- > > libavcodec/vp9mvs.c | 97 +++ > > libavcodec/vp9prob.c | 64 ++--- > > libavcodec/vp9recon.c| 153 +-- > > 7 files changed, 1006 insertions(+), 748 deletions(-) > > with mips-linux-gnu-gcc-4.4 (Debian 4.4.5-8) 4.4.5 > > In file included from src/libavcodec/vp9data.h:28, > from src/libavcodec/vp9.c:33: > src/libavcodec/vp9dec.h:218: error: redefinition of typedef ‘VP9TileData’ > src/libavcodec/vp9dec.h:89: note: previous declaration of ‘VP9TileData’ > was here > src/libavcodec/vp9.c: In function ‘vp9_decode_frame’: > src/libavcodec/vp9.c:1379: warning: ‘pkt_pts’ is deprecated (declared at > src/libavutil/frame.h:302) > make: *** [libavcodec/vp9.o] Error 1 > make: *** Waiting for unfinished jobs > In file included from src/libavcodec/vp9data.h:28, > from src/libavcodec/vp9data.c:24: > src/libavcodec/vp9dec.h:218: error: redefinition of typedef ‘VP9TileData’ > src/libavcodec/vp9dec.h:89: note: previous declaration of ‘VP9TileData’ > was here > make: *** [libavcodec/vp9data.o] Error 1 > In file included from src/libavcodec/vp9data.h:28, > from src/libavcodec/vp9block.c:32: > src/libavcodec/vp9dec.h:218: error: redefinition of typedef ‘VP9TileData’ > src/libavcodec/vp9dec.h:89: note: previous declaration of ‘VP9TileData’ > was here > make: *** [libavcodec/vp9block.o] Error 1 > src/libavcodec/utils.c: In function ‘avcodec_open2’: > src/libavcodec/utils.c:859: warning: ‘coded_frame’ is deprecated (declared > at src/libavcodec/avcodec.h:3170) > src/libavcodec/utils.c:860: warning: ‘coded_frame’ is deprecated (declared > at src/libavcodec/avcodec.h:3170) > src/libavcodec/utils.c:1135: warning: ‘coded_frame’ is deprecated > (declared at src/libavcodec/avcodec.h:3170) > src/libavcodec/utils.c: In function ‘avcodec_close’: > src/libavcodec/utils.c:1232: warning: ‘coded_frame’ is deprecated > (declared at src/libavcodec/avcodec.h:3170) > src/libavcodec/utils.c: In function ‘av_log_missing_feature’: > src/libavcodec/utils.c:1936: warning: ‘av_log_ask_for_sample’ is > deprecated (declared at src/libavcodec/avcodec.h:6288) > > > > [...] > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > When the tyrant has disposed of foreign enemies by conquest or treaty, and > there is nothing more to fear from them, then he is always stirring up > some war or other, in order that the people may require a leader. -- Plato > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel