date:20170218

[FFmpeg-cvslog] avcodec/cuvid: add drop_second_field as input option

2017-02-18 Thread Miroslav Slugeň

ffmpeg | branch: master | Miroslav Slugeň  | Sun Feb 12 
21:22:46 2017 +0100| [2a2f6b28873e0a665c6a6075c4db5692497b52bb] | committer: 
Timo Rothenpieler

avcodec/cuvid: add drop_second_field as input option

Signed-off-by: Timo Rothenpieler 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2a2f6b28873e0a665c6a6075c4db5692497b52bb
---

 libavcodec/cuvid.c   | 10 +++---
 libavcodec/version.h |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c
index 844e782..c0b4a37 100644
--- a/libavcodec/cuvid.c
+++ b/libavcodec/cuvid.c
@@ -42,6 +42,7 @@ typedef struct CuvidContext
 
 char *cu_gpu;
 int nb_surfaces;
+int drop_second_field;
 
 AVBufferRef *hwdevice;
 AVBufferRef *hwframe;
@@ -267,7 +268,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void 
*opaque, CUVIDEOFORMAT* form
 cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
 cuinfo.DeinterlaceMode = ctx->deint_mode_current;
 
-if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave)
+if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave && 
!ctx->drop_second_field)
 avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
 
 ctx->internal_error = 
CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
@@ -317,8 +318,10 @@ static int CUDAAPI cuvid_handle_picture_display(void 
*opaque, CUVIDPARSERDISPINF
 } else {
 parsed_frame.is_deinterlacing = 1;
 av_fifo_generic_write(ctx->frame_queue, &parsed_frame, 
sizeof(CuvidParsedFrame), NULL);
-parsed_frame.second_field = 1;
-av_fifo_generic_write(ctx->frame_queue, &parsed_frame, 
sizeof(CuvidParsedFrame), NULL);
+if (!ctx->drop_second_field) {
+parsed_frame.second_field = 1;
+av_fifo_generic_write(ctx->frame_queue, &parsed_frame, 
sizeof(CuvidParsedFrame), NULL);
+}
 }
 
 return 1;
@@ -949,6 +952,7 @@ static const AVOption options[] = {
 { "adaptive", "Adaptive deinterlacing",  0, 
AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, 
"deint" },
 { "gpu",  "GPU to be used for decoding", OFFSET(cu_gpu), 
AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
 { "surfaces", "Maximum surfaces to be used for decoding", 
OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
+{ "drop_second_field", "Drop second field when deinterlacing", 
OFFSET(drop_second_field), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
 { NULL }
 };
 
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 49089db..6d1a1fd 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 
 #define LIBAVCODEC_VERSION_MAJOR  57
 #define LIBAVCODEC_VERSION_MINOR  80
-#define LIBAVCODEC_VERSION_MICRO 100
+#define LIBAVCODEC_VERSION_MICRO 101
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
LIBAVCODEC_VERSION_MINOR, \

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] avcodec/cuvid: don't overwrite deinterlace at progressive input

2017-02-18 Thread Miroslav Slugeň

ffmpeg | branch: master | Miroslav Slugeň  | Sun Feb 12 
18:47:07 2017 +0100| [4cb8872eb79a43b7acaa35bc92ffd1ab1a64eb75] | committer: 
Timo Rothenpieler

avcodec/cuvid: don't overwrite deinterlace at progressive input

If there is progressive input it will disable deinterlacing in cuvid for
all future frames even those interlaced.

Signed-off-by: Timo Rothenpieler 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4cb8872eb79a43b7acaa35bc92ffd1ab1a64eb75
---

 libavcodec/cuvid.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c
index f5a49ce..844e782 100644
--- a/libavcodec/cuvid.c
+++ b/libavcodec/cuvid.c
@@ -51,6 +51,7 @@ typedef struct CuvidContext
 AVFifoBuffer *frame_queue;
 
 int deint_mode;
+int deint_mode_current;
 int64_t prev_pts;
 
 int internal_error;
@@ -164,7 +165,11 @@ static int CUDAAPI cuvid_handle_video_sequence(void 
*opaque, CUVIDEOFORMAT* form
 (AVRational){ format->display_aspect_ratio.x, 
format->display_aspect_ratio.y },
 (AVRational){ avctx->width, avctx->height }));
 
-if (!format->progressive_sequence && ctx->deint_mode == 
cudaVideoDeinterlaceMode_Weave)
+ctx->deint_mode_current = format->progressive_sequence
+  ? cudaVideoDeinterlaceMode_Weave
+  : ctx->deint_mode;
+
+if (!format->progressive_sequence && ctx->deint_mode_current == 
cudaVideoDeinterlaceMode_Weave)
 avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
 else
 avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;
@@ -260,14 +265,9 @@ static int CUDAAPI cuvid_handle_video_sequence(void 
*opaque, CUVIDEOFORMAT* form
 cuinfo.ulNumOutputSurfaces = 1;
 cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
 cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
+cuinfo.DeinterlaceMode = ctx->deint_mode_current;
 
-if (format->progressive_sequence) {
-ctx->deint_mode = cuinfo.DeinterlaceMode = 
cudaVideoDeinterlaceMode_Weave;
-} else {
-cuinfo.DeinterlaceMode = ctx->deint_mode;
-}
-
-if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
+if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave)
 avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
 
 ctx->internal_error = 
CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
@@ -312,7 +312,7 @@ static int CUDAAPI cuvid_handle_picture_display(void 
*opaque, CUVIDPARSERDISPINF
 parsed_frame.dispinfo = *dispinfo;
 ctx->internal_error = 0;
 
-if (ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) {
+if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
 av_fifo_generic_write(ctx->frame_queue, &parsed_frame, 
sizeof(CuvidParsedFrame), NULL);
 } else {
 parsed_frame.is_deinterlacing = 1;
@@ -583,7 +583,7 @@ static int cuvid_decode_frame(AVCodecContext *avctx, void 
*data, int *got_frame,
 
 av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n");
 
-if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) {
+if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave) {
 av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the 
old API\n");
 return AVERROR(EINVAL);
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] tests/fate/hevc: remove vsync drop from where it is not needed anymore

2017-02-18 Thread Michael Niedermayer

ffmpeg | branch: master | Michael Niedermayer  | Sat 
Feb 18 16:29:14 2017 +0100| [56803218db9a52929366a0e67d827a50089f4982] | 
committer: Michael Niedermayer

tests/fate/hevc: remove vsync drop from where it is not needed anymore

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=56803218db9a52929366a0e67d827a50089f4982
---

 tests/fate/hevc.mak | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak
index bd09ab3..bb68328 100644
--- a/tests/fate/hevc.mak
+++ b/tests/fate/hevc.mak
@@ -192,27 +192,27 @@ endef
 
 define FATE_HEVC_TEST_10BIT
 FATE_HEVC += fate-hevc-conformance-$(1)
-fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv420p10le
+fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv420p10le
 endef
 
 define FATE_HEVC_TEST_422_10BIT
 FATE_HEVC += fate-hevc-conformance-$(1)
-fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv422p10le
+fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv422p10le
 endef
 
 define FATE_HEVC_TEST_422_10BIN
 FATE_HEVC += fate-hevc-conformance-$(1)
-fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bin -pix_fmt yuv422p10le
+fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bin -pix_fmt yuv422p10le
 endef
 
 define FATE_HEVC_TEST_444_8BIT
 FATE_HEVC += fate-hevc-conformance-$(1)
-fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bit
+fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bit
 endef
 
 define FATE_HEVC_TEST_444_12BIT
 FATE_HEVC += fate-hevc-conformance-$(1)
-fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv444p12le
+fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i 
$(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv444p12le
 endef
 
 $(foreach N,$(HEVC_SAMPLES),$(eval $(call FATE_HEVC_TEST,$(N

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] avcodec/h264: add named parameters to x86 function

2017-02-18 Thread James Darnley

ffmpeg | branch: master | James Darnley  | Fri Feb 10 20:13:50 
2017 +0100| [e18bc2114f3deb8ef1ab9ddaef282c8d9678669d] | committer: James 
Darnley

avcodec/h264: add named parameters to x86 function

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e18bc2114f3deb8ef1ab9ddaef282c8d9678669d
---

 libavcodec/x86/h264_deblock.asm | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 435c8be..509a0db 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -282,18 +282,18 @@ cextern pb_3
 ;int8_t *tc0)
 ;-
 %macro DEBLOCK_LUMA 0
-cglobal deblock_v_luma_8, 5,5,10
+cglobal deblock_v_luma_8, 5,5,10, pix_, stride_, alpha_, beta_, base3_
 movdm8, [r4] ; tc0
-lea r4, [r1*3]
-dec r2d; alpha-1
+lea r4, [stride_q*3]
+dec alpha_d; alpha-1
 neg r4
-dec r3d; beta-1
-add r4, r0 ; pix-3*stride
+dec beta_d; beta-1
+add base3_q, pix_q ; pix-3*stride
 
-movam0, [r4+r1]   ; p1
-movam1, [r4+2*r1] ; p0
-movam2, [r0]  ; q0
-movam3, [r0+r1]   ; q1
+movam0, [base3_q + stride_q]   ; p1
+movam1, [base3_q + 2*stride_q] ; p0
+movam2, [pix_q]  ; q0
+movam3, [pix_q + stride_q]   ; q1
 LOAD_MASK r2d, r3d
 
 punpcklbw m8, m8
@@ -303,24 +303,24 @@ cglobal deblock_v_luma_8, 5,5,10
 pandn   m9, m7
 pandm8, m9
 
-movdqa  m3, [r4] ; p2
+movdqa  m3, [base3_q] ; p2
 DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1
 pandm6, m9
 psubb   m7, m8, m6
 pandm6, m8
-LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4
+LUMA_Q1 m0, m3, [base3_q], [base3_q + stride_q], m6, m4
 
-movdqa  m4, [r0+2*r1] ; q2
+movdqa  m4, [pix_q + 2*stride_q] ; q2
 DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1
 pandm6, m9
 pandm8, m6
 psubb   m7, m6
-movam3, [r0+r1]
-LUMA_Q1 m3, m4, [r0+2*r1], [r0+r1], m8, m6
+movam3, [pix_q + stride_q]
+LUMA_Q1 m3, m4, [pix_q + 2*stride_q], [pix_q + stride_q], m8, m6
 
 DEBLOCK_P0_Q0
-mova[r4+2*r1], m1
-mova[r0], m2
+mova[base3_q + 2*stride_q], m1
+mova[pix_q], m2
 RET
 
 ;-

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] avcodec/h264: sse2, avx h luma mbaff deblock/loop filter

2017-02-18 Thread James Darnley

ffmpeg | branch: master | James Darnley  | Fri Feb 10 23:17:57 
2017 +0100| [533688786799b22d0711eedcfe8f84deea014f30] | committer: James 
Darnley

avcodec/h264: sse2, avx h luma mbaff deblock/loop filter

x86-64 only

Yorkfield:
- sse2: ~2.17x (434 vs. 200 cycles)

Nehalem:
- sse2: ~2.94x (409 vs. 139 cycles)

Skylake:
- sse2: ~3.10x (370 vs. 119 cycles)
- avx:  ~3.29x (370 vs. 112 cycles)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=533688786799b22d0711eedcfe8f84deea014f30
---

 libavcodec/x86/h264_deblock.asm | 89 +
 libavcodec/x86/h264dsp_init.c   | 10 +
 libavutil/x86/x86util.asm   | 15 +++
 3 files changed, 114 insertions(+)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 509a0db..93caa67 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -377,10 +377,99 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
 RET
 %endmacro
 
+%macro DEBLOCK_H_LUMA_MBAFF 0
+
+cglobal deblock_h_luma_mbaff_8, 5, 9, 10, 8*16, pix_, stride_, alpha_, beta_, 
tc0_, base3_, stride3_
+movsxd stride_q,   stride_d
+decalpha_d
+decbeta_d
+movbase3_q,pix_q
+leastride3_q, [3*stride_q]
+addbase3_q,stride3_q
+
+movq m0, [pix_q - 4]
+movq m1, [pix_q + stride_q - 4]
+movq m2, [pix_q + 2*stride_q - 4]
+movq m3, [base3_q - 4]
+movq m4, [base3_q + stride_q - 4]
+movq m5, [base3_q + 2*stride_q - 4]
+movq m6, [base3_q + stride3_q - 4]
+movq m7, [base3_q + 4*stride_q - 4]
+
+TRANSPOSE_8X8B 0,1,2,3,4,5,6,7
+
+%assign i 0
+%rep 8
+movq [rsp + 16*i], m %+ i
+%assign i i+1
+%endrep
+
+; p2 = m1 [rsp + 16]
+; p1 = m2 [rsp + 32]
+; p0 = m3 [rsp + 48]
+; q0 = m4 [rsp + 64]
+; q1 = m5 [rsp + 80]
+; q2 = m6 [rsp + 96]
+
+SWAP 0, 2
+SWAP 1, 3
+SWAP 2, 4
+SWAP 3, 5
+
+LOAD_MASK alpha_d, beta_d
+movd m8, [tc0_q]
+punpcklbw m8, m8
+pcmpeqb m9, m9
+pcmpeqb m9, m8
+pandn   m9, m7
+pandm8, m9
+
+movdqa  m3, [rsp + 16] ; p2
+DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1
+pandm6, m9
+psubb   m7, m8, m6
+pandm6, m8
+LUMA_Q1 m0, m3, [rsp + 16], [rsp + 32], m6, m4
+
+movdqa  m4, [rsp + 96] ; q2
+DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1
+pandm6, m9
+pandm8, m6
+psubb   m7, m6
+movam3, [rsp + 80]
+LUMA_Q1 m3, m4, [rsp + 96], [rsp + 80], m8, m6
+
+DEBLOCK_P0_Q0
+SWAP 1, 3
+SWAP 2, 4
+movq m0, [rsp]
+movq m1, [rsp + 16]
+movq m2, [rsp + 32]
+movq m5, [rsp + 80]
+movq m6, [rsp + 96]
+movq m7, [rsp + 112]
+
+TRANSPOSE_8X8B 0,1,2,3,4,5,6,7
+movq [pix_q - 4], m0
+movq [pix_q + stride_q - 4], m1
+movq [pix_q + 2*stride_q - 4], m2
+movq [base3_q - 4], m3
+movq [base3_q + stride_q - 4], m4
+movq [base3_q + 2*stride_q - 4], m5
+movq [base3_q + stride3_q - 4], m6
+movq [base3_q + 4*stride_q - 4], m7
+
+RET
+
+%endmacro
+
 INIT_XMM sse2
+DEBLOCK_H_LUMA_MBAFF
 DEBLOCK_LUMA
+
 %if HAVE_AVX_EXTERNAL
 INIT_XMM avx
+DEBLOCK_H_LUMA_MBAFF
 DEBLOCK_LUMA
 %endif
 
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 7b3d17f..10f1940 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -137,6 +137,9 @@ LF_IFUNC(h, chroma422_intra, depth, avx)\
 LF_FUNC(v,  chroma,  depth, avx)\
 LF_IFUNC(v, chroma_intra,depth, avx)
 
+LF_FUNC(h, luma_mbaff, 8, sse2)
+LF_FUNC(h, luma_mbaff, 8, avx)
+
 LF_FUNCS(uint8_t,   8)
 LF_FUNCS(uint16_t, 10)
 
@@ -297,6 +300,10 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const 
int bit_depth,
 c->h264_h_loop_filter_luma   = ff_deblock_h_luma_8_sse2;
 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
+
+#if ARCH_X86_64
+c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2;
+#endif
 }
 if (EXTERNAL_SSSE3(cpu_flags)) {
 c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
@@ -307,6 +314,9 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const 
int bit_depth,
 c->h264_h_loop_filter_luma   = ff_deblock_h_luma_8_avx;
 c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
 c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
+#if ARCH_X86_64
+c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx;
+#endif
 }
 } else if (bit_depth == 10) {
 if (EXTERNAL_MMXEXT(cpu_flags)) {
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 1408f0a..c50ddc6 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -265,6 +265,21 @@
 SWAP   %12, %15
 %endmacro
 
+%macro TRANSPOSE_8X8B 8

[FFmpeg-cvslog] avcodec/x86: deduplicate PASS8ROWS macro

2017-02-18 Thread James Darnley

ffmpeg | branch: master | James Darnley  | Thu Feb  9 22:06:17 
2017 +0100| [9d815b7424b56ffcf6f4dea4fd09c53661e4133a] | committer: James 
Darnley

avcodec/x86: deduplicate PASS8ROWS macro

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9d815b7424b56ffcf6f4dea4fd09c53661e4133a
---

 libavcodec/x86/h264_deblock.asm   | 5 -
 libavcodec/x86/h264_deblock_10bit.asm | 5 -
 libavcodec/x86/hevc_deblock.asm   | 5 -
 libavutil/x86/x86util.asm | 5 +
 4 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index fe0ab20..435c8be 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -37,11 +37,6 @@ cextern pb_0
 cextern pb_1
 cextern pb_3
 
-; expands to [base],...,[base+7*stride]
-%define PASS8ROWS(base, base3, stride, stride3) \
-[base], [base+stride], [base+stride*2], [base3], \
-[base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4]
-
 %define PASS8ROWS(base, base3, stride, stride3, offset) \
 PASS8ROWS(base+offset, base3+offset, stride, stride3)
 
diff --git a/libavcodec/x86/h264_deblock_10bit.asm 
b/libavcodec/x86/h264_deblock_10bit.asm
index c295364..1af3257 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -843,11 +843,6 @@ DEBLOCK_LUMA_INTRA
 mova [r0+2*r1], m2
 %endmacro
 
-; expands to [base],...,[base+7*stride]
-%define PASS8ROWS(base, base3, stride, stride3) \
-[base], [base+stride], [base+stride*2], [base3], \
-[base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4]
-
 ; in: 8 rows of 4 words in %4..%11
 ; out: 4 rows of 8 words in m0..m3
 %macro TRANSPOSE4x8W_LOAD 8
diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
index 48a5975..85ee480 100644
--- a/libavcodec/x86/hevc_deblock.asm
+++ b/libavcodec/x86/hevc_deblock.asm
@@ -39,11 +39,6 @@ cextern pw_m1
 SECTION .text
 INIT_XMM sse2
 
-; expands to [base],...,[base+7*stride]
-%define PASS8ROWS(base, base3, stride, stride3) \
-[base], [base+stride], [base+stride*2], [base3], \
-[base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4]
-
 ; in: 8 rows of 4 bytes in %4..%11
 ; out: 4 rows of 8 words in m0..m3
 %macro TRANSPOSE4x8B_LOAD 8
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 44ed750..c063436 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -29,6 +29,11 @@
 
 %include "libavutil/x86/x86inc.asm"
 
+; expands to [base],...,[base+7*stride]
+%define PASS8ROWS(base, base3, stride, stride3) \
+[base],   [base  + stride],   [base  + 2*stride], [base3], \
+[base3 + stride], [base3 + 2*stride], [base3 + stride3],  [base3 + 
stride*4]
+
 %macro SBUTTERFLY 4
 %ifidn %1, dqqq
 vperm2i128  m%4, m%2, m%3, q0301

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] x86util: import MOVHL macro

2017-02-18 Thread James Darnley

ffmpeg | branch: master | James Darnley  | Sat Feb 11 13:25:09 
2017 +0100| [7627df15d411a69f236b4650e88b1ab911f38efc] | committer: James 
Darnley

x86util: import MOVHL macro

Originally committed to x264 in 1637239a by Henrik Gramner who has
agreed to re-license it as LGPL.  Original commit message follows.

x86: Avoid some bypass delays and false dependencies

A bypass delay of 1-3 clock cycles may occur on some CPUs when transitioning
between int and float domains, so try to avoid that if possible.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7627df15d411a69f236b4650e88b1ab911f38efc
---

 libavutil/x86/x86util.asm | 12 
 1 file changed, 12 insertions(+)

diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index c063436..1408f0a 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -876,3 +876,15 @@
 psrlq   %1, 8*(%2)
 %endif
 %endmacro
+
+%macro MOVHL 2 ; dst, src
+%ifidn %1, %2
+punpckhqdq %1, %2
+%elif cpuflag(avx)
+punpckhqdq %1, %2, %2
+%elif cpuflag(sse4)
+pshufd %1, %2, q3232 ; pshufd is slow on some older CPUs, so only use 
it on more modern ones
+%else
+movhlps%1, %2; may cause an int/float domain transition and 
has a dependency on dst
+%endif
+%endmacro

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] avformat/utils: Also fill dts== RELATIVE_TS_BASE packets in update_initial_durations()

2017-02-18 Thread Michael Niedermayer

ffmpeg | branch: master | Michael Niedermayer  | Sun 
Feb 19 00:31:09 2017 +0100| [3206ea4ba31ebf446a3c4f1220adb895b3272c15] | 
committer: Michael Niedermayer

avformat/utils: Also fill dts==RELATIVE_TS_BASE packets in 
update_initial_durations()

This dts value can end up in the list in the absence of durations and is in that
case semantically identical to AV_NOPTS_VALUE. We can alternatively prevent
storing RELATIVE_TS_BASE if there is no duration.

Fixes Ticket3640

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3206ea4ba31ebf446a3c4f1220adb895b3272c15
---

 libavformat/utils.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavformat/utils.c b/libavformat/utils.c
index 0711310..37d7024 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1164,8 +1164,11 @@ static void update_initial_durations(AVFormatContext *s, 
AVStream *st,
 for (; pktl; pktl = get_next_pkt(s, st, pktl)) {
 if (pktl->pkt.stream_index != stream_index)
 continue;
-if (pktl->pkt.pts == pktl->pkt.dts  &&
-(pktl->pkt.dts == AV_NOPTS_VALUE || pktl->pkt.dts == 
st->first_dts) &&
+if ((pktl->pkt.pts == pktl->pkt.dts ||
+ pktl->pkt.pts == AV_NOPTS_VALUE) &&
+(pktl->pkt.dts == AV_NOPTS_VALUE ||
+ pktl->pkt.dts == st->first_dts ||
+ pktl->pkt.dts == RELATIVE_TS_BASE) &&
 !pktl->pkt.duration) {
 pktl->pkt.dts = cur_dts;
 if (!st->internal->avctx->has_b_frames)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] avcodec/mpeg12dec: Provide debug level log on skiped P/B frames

2017-02-18 Thread Michael Niedermayer

ffmpeg | branch: master | Michael Niedermayer  | Sat 
Feb 18 19:03:39 2017 +0100| [3f28caf72083231131ee35ebc81be2c4dfe95e11] | 
committer: Michael Niedermayer

avcodec/mpeg12dec: Provide debug level log on skiped P/B frames

Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3f28caf72083231131ee35ebc81be2c4dfe95e11
---

 libavcodec/mpeg12dec.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
index 6397907..f3cf6eb 100644
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c
@@ -2660,6 +2660,8 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame 
*picture,
 if (s2->pict_type == AV_PICTURE_TYPE_B) {
 if (!s2->closed_gop) {
 skip_frame = 1;
+av_log(s2->avctx, AV_LOG_DEBUG,
+   "Skipping B slice due to open GOP\n");
 break;
 }
 }
@@ -2671,6 +2673,8 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame 
*picture,
  * we have an invalid header. */
 if (s2->pict_type == AV_PICTURE_TYPE_P && !s->sync) {
 skip_frame = 1;
+av_log(s2->avctx, AV_LOG_DEBUG,
+   "Skipping P slice due to !sync\n");
 break;
 }
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] avcodec/cuvid: add drop_second_field as input option

[FFmpeg-cvslog] avcodec/cuvid: don't overwrite deinterlace at progressive input

[FFmpeg-cvslog] tests/fate/hevc: remove vsync drop from where it is not needed anymore

[FFmpeg-cvslog] avcodec/h264: add named parameters to x86 function

[FFmpeg-cvslog] avcodec/h264: sse2, avx h luma mbaff deblock/loop filter

[FFmpeg-cvslog] avcodec/x86: deduplicate PASS8ROWS macro

[FFmpeg-cvslog] x86util: import MOVHL macro

[FFmpeg-cvslog] avformat/utils: Also fill dts== RELATIVE_TS_BASE packets in update_initial_durations()

[FFmpeg-cvslog] avcodec/mpeg12dec: Provide debug level log on skiped P/B frames

9 matches

Site Navigation

Mail list logo

Footer information