Re: [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_dr_16x16_16 avx2 implementation

2017-06-09 Thread gh0st
>I know unaligned loads are not as slow as they used to be,
>but could m1 be produced by m2 and palignr?

I am not sure, can you clarify your question?

>From the comment I assume you don't use the extra two bytes
>that you get from the load, as you mark them as "*"
>generic undefined values

No, those two extra bytes are actually used, that's the above/left corner
pixel.
If you look in the vp9dsp_template.c file, there is a macro defined
diag_downright_
that's top[-1] in the body. Sorry for this ambiguous marking, but it's used
in
other ipred_dr functions so I decided to follow it.

>"cnt" doesn't seem to be used.

Yes indeed, I mislooked that, thanks.

On Fri, Jun 9, 2017 at 6:03 PM, Ivan Kalvachev  wrote:

> On 6/8/17, Ilia Valiakhmetov  wrote:
> > vp9_diag_downright_16x16_12bpp_c: 149.0
> > vp9_diag_downright_16x16_12bpp_sse2: 67.8
> > vp9_diag_downright_16x16_12bpp_ssse3: 45.6
> > vp9_diag_downright_16x16_12bpp_avx: 36.6
> > vp9_diag_downright_16x16_12bpp_avx2: 25.5
> >
> > ~30% faster than avx
> >
> > Signed-off-by: Ilia Valiakhmetov 
> > ---
> >  libavcodec/x86/vp9dsp_init_16bpp.c|  2 ++
> >  libavcodec/x86/vp9intrapred_16bpp.asm | 56
> > +++
> >  2 files changed, 58 insertions(+)
> >
> > diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c
> > b/libavcodec/x86/vp9dsp_init_16bpp.c
> > index d1b8fcd..8d1aa13 100644
> > --- a/libavcodec/x86/vp9dsp_init_16bpp.c
> > +++ b/libavcodec/x86/vp9dsp_init_16bpp.c
> > @@ -52,6 +52,7 @@ decl_ipred_fns(dc,  16, mmxext, sse2);
> >  decl_ipred_fns(dc_top,  16, mmxext, sse2);
> >  decl_ipred_fns(dc_left, 16, mmxext, sse2);
> >  decl_ipred_fn(dl,   16, 16, avx2);
> > +decl_ipred_fn(dr,   16, 16, avx2);
> >  decl_ipred_fn(dl,   32, 16, avx2);
> >
> >  #define decl_ipred_dir_funcs(type) \
> > @@ -136,6 +137,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext
> > *dsp)
> >  init_fpel_func(1, 1,  64, avg, _16, avx2);
> >  init_fpel_func(0, 1, 128, avg, _16, avx2);
> >  init_ipred_func(dl, DIAG_DOWN_LEFT, 16, 16, avx2);
> > +init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2);
> >  init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2);
> >  }
> >
> > diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm
> > b/libavcodec/x86/vp9intrapred_16bpp.asm
> > index 92333bc..67b98b1 100644
> > --- a/libavcodec/x86/vp9intrapred_16bpp.asm
> > +++ b/libavcodec/x86/vp9intrapred_16bpp.asm
> > @@ -1170,6 +1170,62 @@ DR_FUNCS 2
> >  INIT_XMM avx
> >  DR_FUNCS 2
> >
> > +%if HAVE_AVX2_EXTERNAL
> > +INIT_YMM avx2
> > +cglobal vp9_ipred_dr_16x16_16, 4, 6, 7, dst, stride, l, a
> > +movam0, [lq]   ;
> klmnopqrstuvwxyz
> > +movum1, [aq-2] ;
> *abcdefghijklmno
> > +movam2, [aq]   ;
> abcdefghijklmnop
>


> From the comment I assume you don't use the extra two bytes
> that you get from the load, as you mark them as "*"
> generic undefined values
>
> > +vperm2i128  m4, m2, m2, q2001  ;
> ijklmnop
> > +vpalignrm5, m4, m2, 2  ;
> bcdefghijklmnop.
> > +vperm2i128  m3, m0, m1, q0201  ;
> stuvwxyz*abcdefg
> > +LOWPASS  1,  2,  5 ;
> ABCDEFGHIJKLMNO.
> > +vpalignrm4, m3, m0, 2  ;
> lmnopqrstuvwxyz*
> > +vpalignrm5, m3, m0, 4  ;
> mnopqrstuvwxyz*a
> > +LOWPASS  0,  4,  5 ;
> LMNOPQRSTUVWXYZ#
> > +vperm2i128  m5, m0, m1, q0201  ;
> TUVWXYZ#ABCDEFGH
> > +DEFINE_ARGS dst, stride, stride3, stride5, dst3, cnt
>
> "cnt" doesn't seem to be used.
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/vp9: ipred_dr_16x16_16 avx2 implementation

2017-06-10 Thread gh0st
Yes, you are right, I'll send a patch with this fixed, thanks.

On Sat, Jun 10, 2017 at 5:35 AM, Ivan Kalvachev 
wrote:

> On 6/9/17, Ilia Valiakhmetov  wrote:
> > Signed-off-by: Ilia Valiakhmetov 
> > ---
> >  libavcodec/x86/vp9dsp_init_16bpp.c|  2 ++
> >  libavcodec/x86/vp9intrapred_16bpp.asm | 56
> > +++
> >  2 files changed, 58 insertions(+)
> >
> > diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c
> > b/libavcodec/x86/vp9dsp_init_16bpp.c
> > index d1b8fcd..8d1aa13 100644
> > --- a/libavcodec/x86/vp9dsp_init_16bpp.c
> > +++ b/libavcodec/x86/vp9dsp_init_16bpp.c
> > @@ -52,6 +52,7 @@ decl_ipred_fns(dc,  16, mmxext, sse2);
> >  decl_ipred_fns(dc_top,  16, mmxext, sse2);
> >  decl_ipred_fns(dc_left, 16, mmxext, sse2);
> >  decl_ipred_fn(dl,   16, 16, avx2);
> > +decl_ipred_fn(dr,   16, 16, avx2);
> >  decl_ipred_fn(dl,   32, 16, avx2);
> >
> >  #define decl_ipred_dir_funcs(type) \
> > @@ -136,6 +137,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext
> > *dsp)
> >  init_fpel_func(1, 1,  64, avg, _16, avx2);
> >  init_fpel_func(0, 1, 128, avg, _16, avx2);
> >  init_ipred_func(dl, DIAG_DOWN_LEFT, 16, 16, avx2);
> > +init_ipred_func(dr, DIAG_DOWN_RIGHT, 16, 16, avx2);
> >  init_ipred_func(dl, DIAG_DOWN_LEFT, 32, 16, avx2);
> >  }
> >
> > diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm
> > b/libavcodec/x86/vp9intrapred_16bpp.asm
> > index 92333bc..7230de2 100644
> > --- a/libavcodec/x86/vp9intrapred_16bpp.asm
> > +++ b/libavcodec/x86/vp9intrapred_16bpp.asm
> > @@ -1170,6 +1170,62 @@ DR_FUNCS 2
> >  INIT_XMM avx
> >  DR_FUNCS 2
> >
> > +%if HAVE_AVX2_EXTERNAL
> > +INIT_YMM avx2
> > +cglobal vp9_ipred_dr_16x16_16, 4, 4, 6, dst, stride, l, a
> [...]
> > +DEFINE_ARGS dst, stride, stride3, stride5, dst3
>
> You removed one variable, so now the number of
> re-define-args gprs should be 5.
> However the cglobal above have 4 reserved registers.
>
> It used to be  4, 6, 6
> Now it is 4, 4, 6
> I think it should be 4, 5, 6
>
> Do I miss something?
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/2] avcodec/pthread_slice: add main function support for avpriv_slicethread_create()

2017-09-07 Thread gh0st
Fixed in v2.

On Fri, Sep 8, 2017 at 3:40 AM, Michael Niedermayer 
wrote:

> On Fri, Sep 08, 2017 at 12:55:29AM +0700, Ilia Valiakhmetov wrote:
> > ---
> >  libavcodec/internal.h  |  4 
> >  libavcodec/pthread_slice.c | 33 ++---
> >  libavcodec/thread.h|  1 +
> >  libavutil/slicethread.h| 18 ++
> >  4 files changed, 37 insertions(+), 19 deletions(-)
>
> applying only patch 1 fails to build
>
> libavcodec/vp9.c: In function ‘thread_execute3’:
> libavcodec/vp9.c:94:5: error: unknown type name ‘SliceThreadContext’
>  SliceThreadContext *c = avctx->internal->thread_ctx;
>  ^
> libavcodec/vp9.c:95:6: error: request for member ‘func2’ in something not
> a structure or union
>  c->func2 = func;
>   ^
> libavcodec/vp9.c:96:6: error: request for member ‘m_func’ in something not
> a structure or union
>  c->m_func = m_func;
>   ^
> libavcodec/vp9.c:97:5: error: implicit declaration of function
> ‘ff_thread_execute’ [-Werror=implicit-function-declaration]
>  return ff_thread_execute(avctx, NULL, arg, ret, job_count, 0);
>  ^
> libavcodec/vp9.c: At top level:
> libavcodec/vp9.c:1790:22: error: ‘FF_CODEC_CAP_SLICE_THREAD_HAS_MF’
> undeclared here (not in a function)
>  .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF,
>   ^
>
>
> applying both patches fails to build on mingw64
>
> In file included from src/libavutil/slicethread.c:20:0:
> src/libavutil/slicethread.h:38:5: error: unknown type name
> ‘pthread_cond_t’
>  pthread_cond_t *progress_cond;
>  ^
> src/libavutil/slicethread.h:39:5: error: unknown type name
> ‘pthread_mutex_t’
>  pthread_mutex_t *progress_mutex;
>  ^
> make: *** [libavutil/slicethread.o] Error 1
> make: *** Waiting for unfinished jobs
>
>
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Those who would give up essential Liberty, to purchase a little
> temporary Safety, deserve neither Liberty nor Safety -- Benjamin Franklin
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] vp9: fix compilation with threading disabled.

2017-09-09 Thread gh0st
This also fixes the warning.

---
 libavcodec/vp9.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index a71045e..f940d60 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -88,7 +88,7 @@ static void vp9_await_tile_progress(VP9Context *s, int
field, int n) {
 pthread_mutex_unlock(&s->progress_mutex);
 }
 #else
-static void vp9_free_entries(VP9Context *s) {}
+static void vp9_free_entries(AVCodecContext *avctx) {}
 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {}
 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {}
@@ -1599,7 +1599,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 s->td[i].uveob[1] = s->td[i].uveob_base[1];
 }

-if (avctx->active_thread_type == FF_THREAD_SLICE) {
+if (HAVE_THREADS && avctx->active_thread_type == FF_THREAD_SLICE) {
 int tile_row, tile_col;

 assert(!pass);
-- 
2.8.3


On Sat, Sep 9, 2017 at 11:13 PM, Ronald S. Bultje 
wrote:

> ---
>  libavcodec/vp9.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
> index a71045e..499f357 100644
> --- a/libavcodec/vp9.c
> +++ b/libavcodec/vp9.c
> @@ -1599,7 +1599,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
>  s->td[i].uveob[1] = s->td[i].uveob_base[1];
>  }
>
> -if (avctx->active_thread_type == FF_THREAD_SLICE) {
> +if (HAVE_THREADS && avctx->active_thread_type == FF_THREAD_SLICE)
> {
>  int tile_row, tile_col;
>
>  assert(!pass);
> --
> 2.8.1
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] vp9: fix compilation with threading disabled.

2017-09-09 Thread gh0st
Hm, I don't understand, what exactly is the problem? It applies fine for me.

On Sun, Sep 10, 2017 at 1:27 AM, Michael Niedermayer  wrote:

> On Sat, Sep 09, 2017 at 11:24:12PM +0700, gh0st wrote:
> > This also fixes the warning.
> >
> > ---
> >  libavcodec/vp9.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
> > index a71045e..f940d60 100644
> > --- a/libavcodec/vp9.c
> > +++ b/libavcodec/vp9.c
> > @@ -88,7 +88,7 @@ static void vp9_await_tile_progress(VP9Context *s, int
> > field, int n) {
>
> This is corrupted by newlines
>
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Observe your enemies, for they first find out your faults. -- Antisthenes
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 0/2] Tile threading support for vp9

2017-08-28 Thread gh0st
Fixed in PATCHv3

On Mon, Aug 28, 2017 at 7:40 AM, Michael Niedermayer  wrote:

> On Mon, Aug 28, 2017 at 02:22:15AM +0700, Ilia Valiakhmetov wrote:
> > These patches introduce tile threading support for vp9.
> >
> > Tile threading is ~45% faster at 2 threads vs 1.
> > Frame threading is ~55% faster at 2 threads vs 1.
> > ffvp9 tile threading is ~25% faster than libvpx-vp9 at 2 threads
> >
> > execute3() function is similar to execute2(), execept it has
> > a extra argument - main function for avpriv_slicethread_create(), it is
> used for the loopfilter.
> >
> > Ilia Valiakhmetov (2):
> >   avcodec: add execute3() api to utilize the main function of
> > avpriv_slicethread_create().
> >   avcodec/vp9: Add tile threading support
>
> this seems to break build with mips-linux-gnu-gcc-4.4 and
> --disable-pthreads
>
> In file included from src/libavcodec/vp9data.h:28,
>  from src/libavcodec/vp9.c:33:
> src/libavcodec/vp9dec.h:218: error: redefinition of typedef ‘VP9TileData’
> src/libavcodec/vp9dec.h:89: note: previous declaration of ‘VP9TileData’
> was here
> src/libavcodec/vp9.c: In function ‘vp9_free_entries’:
> src/libavcodec/vp9.c:115: error: implicit declaration of function
> ‘pthread_mutex_destroy’
> src/libavcodec/vp9.c:116: error: implicit declaration of function
> ‘pthread_cond_destroy’
> src/libavcodec/vp9.c: In function ‘vp9_alloc_entries’:
> src/libavcodec/vp9.c:138: error: implicit declaration of function
> ‘pthread_mutex_init’
> src/libavcodec/vp9.c:139: error: implicit declaration of function
> ‘pthread_cond_init’
> src/libavcodec/vp9.c: In function ‘vp9_report_tile_progress’:
> src/libavcodec/vp9.c:146: error: implicit declaration of function
> ‘pthread_cond_signal’
> src/libavcodec/vp9.c: In function ‘vp9_await_tile_progress’:
> src/libavcodec/vp9.c:153: error: implicit declaration of function
> ‘pthread_mutex_lock’
> src/libavcodec/vp9.c:155: error: implicit declaration of function
> ‘pthread_cond_wait’
> src/libavcodec/vp9.c:156: error: implicit declaration of function
> ‘pthread_mutex_unlock’
> src/libavcodec/vp9.c: In function ‘vp9_decode_frame’:
> src/libavcodec/vp9.c:1424: warning: ‘pkt_pts’ is deprecated (declared at
> src/libavutil/frame.h:302)
> make: *** [libavcodec/vp9.o] Error 1
> make: *** Waiting for unfinished jobs
>
> [...]
>
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Those who would give up essential Liberty, to purchase a little
> temporary Safety, deserve neither Liberty nor Safety -- Benjamin Franklin
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCHv3 2/2] avcodec/vp9: Add tile threading support

2017-08-30 Thread gh0st
Should be fixed now in PATCHv4, thanks!

On Thu, Aug 31, 2017 at 3:59 AM, Michael Niedermayer  wrote:

> On Mon, Aug 28, 2017 at 06:36:13PM +0700, Ilia Valiakhmetov wrote:
> > Signed-off-by: Ilia Valiakhmetov 
> > ---
> >  libavcodec/vp9.c | 608 ++
> -
> >  libavcodec/vp9_mc_template.c | 202 +++---
> >  libavcodec/vp9block.c| 522 ++
> ---
> >  libavcodec/vp9dec.h  | 108 +---
> >  libavcodec/vp9mvs.c  |  97 +++
> >  libavcodec/vp9prob.c |  64 ++---
> >  libavcodec/vp9recon.c| 153 +--
> >  7 files changed, 1006 insertions(+), 748 deletions(-)
>
> with mips-linux-gnu-gcc-4.4 (Debian 4.4.5-8) 4.4.5
>
> In file included from src/libavcodec/vp9data.h:28,
>  from src/libavcodec/vp9.c:33:
> src/libavcodec/vp9dec.h:218: error: redefinition of typedef ‘VP9TileData’
> src/libavcodec/vp9dec.h:89: note: previous declaration of ‘VP9TileData’
> was here
> src/libavcodec/vp9.c: In function ‘vp9_decode_frame’:
> src/libavcodec/vp9.c:1379: warning: ‘pkt_pts’ is deprecated (declared at
> src/libavutil/frame.h:302)
> make: *** [libavcodec/vp9.o] Error 1
> make: *** Waiting for unfinished jobs
> In file included from src/libavcodec/vp9data.h:28,
>  from src/libavcodec/vp9data.c:24:
> src/libavcodec/vp9dec.h:218: error: redefinition of typedef ‘VP9TileData’
> src/libavcodec/vp9dec.h:89: note: previous declaration of ‘VP9TileData’
> was here
> make: *** [libavcodec/vp9data.o] Error 1
> In file included from src/libavcodec/vp9data.h:28,
>  from src/libavcodec/vp9block.c:32:
> src/libavcodec/vp9dec.h:218: error: redefinition of typedef ‘VP9TileData’
> src/libavcodec/vp9dec.h:89: note: previous declaration of ‘VP9TileData’
> was here
> make: *** [libavcodec/vp9block.o] Error 1
> src/libavcodec/utils.c: In function ‘avcodec_open2’:
> src/libavcodec/utils.c:859: warning: ‘coded_frame’ is deprecated (declared
> at src/libavcodec/avcodec.h:3170)
> src/libavcodec/utils.c:860: warning: ‘coded_frame’ is deprecated (declared
> at src/libavcodec/avcodec.h:3170)
> src/libavcodec/utils.c:1135: warning: ‘coded_frame’ is deprecated
> (declared at src/libavcodec/avcodec.h:3170)
> src/libavcodec/utils.c: In function ‘avcodec_close’:
> src/libavcodec/utils.c:1232: warning: ‘coded_frame’ is deprecated
> (declared at src/libavcodec/avcodec.h:3170)
> src/libavcodec/utils.c: In function ‘av_log_missing_feature’:
> src/libavcodec/utils.c:1936: warning: ‘av_log_ask_for_sample’ is
> deprecated (declared at src/libavcodec/avcodec.h:6288)
>
>
>
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> When the tyrant has disposed of foreign enemies by conquest or treaty, and
> there is nothing more to fear from them, then he is always stirring up
> some war or other, in order that the people may require a leader. -- Plato
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel