[FFmpeg-cvslog] avcodec/cuvid: add drop_second_field as input option
ffmpeg | branch: master | Miroslav Slugeň | Sun Feb 12 21:22:46 2017 +0100| [2a2f6b28873e0a665c6a6075c4db5692497b52bb] | committer: Timo Rothenpieler avcodec/cuvid: add drop_second_field as input option Signed-off-by: Timo Rothenpieler > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2a2f6b28873e0a665c6a6075c4db5692497b52bb --- libavcodec/cuvid.c | 10 +++--- libavcodec/version.h | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c index 844e782..c0b4a37 100644 --- a/libavcodec/cuvid.c +++ b/libavcodec/cuvid.c @@ -42,6 +42,7 @@ typedef struct CuvidContext char *cu_gpu; int nb_surfaces; +int drop_second_field; AVBufferRef *hwdevice; AVBufferRef *hwframe; @@ -267,7 +268,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8; cuinfo.DeinterlaceMode = ctx->deint_mode_current; -if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave) +if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field) avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1}); ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo)); @@ -317,8 +318,10 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF } else { parsed_frame.is_deinterlacing = 1; av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); -parsed_frame.second_field = 1; -av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); +if (!ctx->drop_second_field) { +parsed_frame.second_field = 1; +av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); +} } return 1; @@ -949,6 +952,7 @@ static const AVOption options[] = { { "adaptive", "Adaptive deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" }, { "gpu", "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD }, { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD }, +{ "drop_second_field", "Drop second field when deinterlacing", OFFSET(drop_second_field), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD }, { NULL } }; diff --git a/libavcodec/version.h b/libavcodec/version.h index 49089db..6d1a1fd 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,7 +29,7 @@ #define LIBAVCODEC_VERSION_MAJOR 57 #define LIBAVCODEC_VERSION_MINOR 80 -#define LIBAVCODEC_VERSION_MICRO 100 +#define LIBAVCODEC_VERSION_MICRO 101 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/cuvid: don't overwrite deinterlace at progressive input
ffmpeg | branch: master | Miroslav Slugeň | Sun Feb 12 18:47:07 2017 +0100| [4cb8872eb79a43b7acaa35bc92ffd1ab1a64eb75] | committer: Timo Rothenpieler avcodec/cuvid: don't overwrite deinterlace at progressive input If there is progressive input it will disable deinterlacing in cuvid for all future frames even those interlaced. Signed-off-by: Timo Rothenpieler > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4cb8872eb79a43b7acaa35bc92ffd1ab1a64eb75 --- libavcodec/cuvid.c | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c index f5a49ce..844e782 100644 --- a/libavcodec/cuvid.c +++ b/libavcodec/cuvid.c @@ -51,6 +51,7 @@ typedef struct CuvidContext AVFifoBuffer *frame_queue; int deint_mode; +int deint_mode_current; int64_t prev_pts; int internal_error; @@ -164,7 +165,11 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y }, (AVRational){ avctx->width, avctx->height })); -if (!format->progressive_sequence && ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) +ctx->deint_mode_current = format->progressive_sequence + ? cudaVideoDeinterlaceMode_Weave + : ctx->deint_mode; + +if (!format->progressive_sequence && ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT; else avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT; @@ -260,14 +265,9 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form cuinfo.ulNumOutputSurfaces = 1; cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8; +cuinfo.DeinterlaceMode = ctx->deint_mode_current; -if (format->progressive_sequence) { -ctx->deint_mode = cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; -} else { -cuinfo.DeinterlaceMode = ctx->deint_mode; -} - -if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) +if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave) avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1}); ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo)); @@ -312,7 +312,7 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF parsed_frame.dispinfo = *dispinfo; ctx->internal_error = 0; -if (ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) { +if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) { av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); } else { parsed_frame.is_deinterlacing = 1; @@ -583,7 +583,7 @@ static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n"); -if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) { +if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave) { av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the old API\n"); return AVERROR(EINVAL); } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] tests/fate/hevc: remove vsync drop from where it is not needed anymore
ffmpeg | branch: master | Michael Niedermayer | Sat Feb 18 16:29:14 2017 +0100| [56803218db9a52929366a0e67d827a50089f4982] | committer: Michael Niedermayer tests/fate/hevc: remove vsync drop from where it is not needed anymore Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=56803218db9a52929366a0e67d827a50089f4982 --- tests/fate/hevc.mak | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/fate/hevc.mak b/tests/fate/hevc.mak index bd09ab3..bb68328 100644 --- a/tests/fate/hevc.mak +++ b/tests/fate/hevc.mak @@ -192,27 +192,27 @@ endef define FATE_HEVC_TEST_10BIT FATE_HEVC += fate-hevc-conformance-$(1) -fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv420p10le +fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv420p10le endef define FATE_HEVC_TEST_422_10BIT FATE_HEVC += fate-hevc-conformance-$(1) -fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv422p10le +fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv422p10le endef define FATE_HEVC_TEST_422_10BIN FATE_HEVC += fate-hevc-conformance-$(1) -fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bin -pix_fmt yuv422p10le +fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bin -pix_fmt yuv422p10le endef define FATE_HEVC_TEST_444_8BIT FATE_HEVC += fate-hevc-conformance-$(1) -fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit +fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit endef define FATE_HEVC_TEST_444_12BIT FATE_HEVC += fate-hevc-conformance-$(1) -fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -vsync drop -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv444p12le +fate-hevc-conformance-$(1): CMD = framecrc -flags unaligned -i $(TARGET_SAMPLES)/hevc-conformance/$(1).bit -pix_fmt yuv444p12le endef $(foreach N,$(HEVC_SAMPLES),$(eval $(call FATE_HEVC_TEST,$(N ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/h264: add named parameters to x86 function
ffmpeg | branch: master | James Darnley | Fri Feb 10 20:13:50 2017 +0100| [e18bc2114f3deb8ef1ab9ddaef282c8d9678669d] | committer: James Darnley avcodec/h264: add named parameters to x86 function > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e18bc2114f3deb8ef1ab9ddaef282c8d9678669d --- libavcodec/x86/h264_deblock.asm | 32 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index 435c8be..509a0db 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -282,18 +282,18 @@ cextern pb_3 ;int8_t *tc0) ;- %macro DEBLOCK_LUMA 0 -cglobal deblock_v_luma_8, 5,5,10 +cglobal deblock_v_luma_8, 5,5,10, pix_, stride_, alpha_, beta_, base3_ movdm8, [r4] ; tc0 -lea r4, [r1*3] -dec r2d; alpha-1 +lea r4, [stride_q*3] +dec alpha_d; alpha-1 neg r4 -dec r3d; beta-1 -add r4, r0 ; pix-3*stride +dec beta_d; beta-1 +add base3_q, pix_q ; pix-3*stride -movam0, [r4+r1] ; p1 -movam1, [r4+2*r1] ; p0 -movam2, [r0] ; q0 -movam3, [r0+r1] ; q1 +movam0, [base3_q + stride_q] ; p1 +movam1, [base3_q + 2*stride_q] ; p0 +movam2, [pix_q] ; q0 +movam3, [pix_q + stride_q] ; q1 LOAD_MASK r2d, r3d punpcklbw m8, m8 @@ -303,24 +303,24 @@ cglobal deblock_v_luma_8, 5,5,10 pandn m9, m7 pandm8, m9 -movdqa m3, [r4] ; p2 +movdqa m3, [base3_q] ; p2 DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 pandm6, m9 psubb m7, m8, m6 pandm6, m8 -LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4 +LUMA_Q1 m0, m3, [base3_q], [base3_q + stride_q], m6, m4 -movdqa m4, [r0+2*r1] ; q2 +movdqa m4, [pix_q + 2*stride_q] ; q2 DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1 pandm6, m9 pandm8, m6 psubb m7, m6 -movam3, [r0+r1] -LUMA_Q1 m3, m4, [r0+2*r1], [r0+r1], m8, m6 +movam3, [pix_q + stride_q] +LUMA_Q1 m3, m4, [pix_q + 2*stride_q], [pix_q + stride_q], m8, m6 DEBLOCK_P0_Q0 -mova[r4+2*r1], m1 -mova[r0], m2 +mova[base3_q + 2*stride_q], m1 +mova[pix_q], m2 RET ;- ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/h264: sse2, avx h luma mbaff deblock/loop filter
ffmpeg | branch: master | James Darnley | Fri Feb 10 23:17:57 2017 +0100| [533688786799b22d0711eedcfe8f84deea014f30] | committer: James Darnley avcodec/h264: sse2, avx h luma mbaff deblock/loop filter x86-64 only Yorkfield: - sse2: ~2.17x (434 vs. 200 cycles) Nehalem: - sse2: ~2.94x (409 vs. 139 cycles) Skylake: - sse2: ~3.10x (370 vs. 119 cycles) - avx: ~3.29x (370 vs. 112 cycles) > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=533688786799b22d0711eedcfe8f84deea014f30 --- libavcodec/x86/h264_deblock.asm | 89 + libavcodec/x86/h264dsp_init.c | 10 + libavutil/x86/x86util.asm | 15 +++ 3 files changed, 114 insertions(+) diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index 509a0db..93caa67 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -377,10 +377,99 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64 RET %endmacro +%macro DEBLOCK_H_LUMA_MBAFF 0 + +cglobal deblock_h_luma_mbaff_8, 5, 9, 10, 8*16, pix_, stride_, alpha_, beta_, tc0_, base3_, stride3_ +movsxd stride_q, stride_d +decalpha_d +decbeta_d +movbase3_q,pix_q +leastride3_q, [3*stride_q] +addbase3_q,stride3_q + +movq m0, [pix_q - 4] +movq m1, [pix_q + stride_q - 4] +movq m2, [pix_q + 2*stride_q - 4] +movq m3, [base3_q - 4] +movq m4, [base3_q + stride_q - 4] +movq m5, [base3_q + 2*stride_q - 4] +movq m6, [base3_q + stride3_q - 4] +movq m7, [base3_q + 4*stride_q - 4] + +TRANSPOSE_8X8B 0,1,2,3,4,5,6,7 + +%assign i 0 +%rep 8 +movq [rsp + 16*i], m %+ i +%assign i i+1 +%endrep + +; p2 = m1 [rsp + 16] +; p1 = m2 [rsp + 32] +; p0 = m3 [rsp + 48] +; q0 = m4 [rsp + 64] +; q1 = m5 [rsp + 80] +; q2 = m6 [rsp + 96] + +SWAP 0, 2 +SWAP 1, 3 +SWAP 2, 4 +SWAP 3, 5 + +LOAD_MASK alpha_d, beta_d +movd m8, [tc0_q] +punpcklbw m8, m8 +pcmpeqb m9, m9 +pcmpeqb m9, m8 +pandn m9, m7 +pandm8, m9 + +movdqa m3, [rsp + 16] ; p2 +DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1 +pandm6, m9 +psubb m7, m8, m6 +pandm6, m8 +LUMA_Q1 m0, m3, [rsp + 16], [rsp + 32], m6, m4 + +movdqa m4, [rsp + 96] ; q2 +DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1 +pandm6, m9 +pandm8, m6 +psubb m7, m6 +movam3, [rsp + 80] +LUMA_Q1 m3, m4, [rsp + 96], [rsp + 80], m8, m6 + +DEBLOCK_P0_Q0 +SWAP 1, 3 +SWAP 2, 4 +movq m0, [rsp] +movq m1, [rsp + 16] +movq m2, [rsp + 32] +movq m5, [rsp + 80] +movq m6, [rsp + 96] +movq m7, [rsp + 112] + +TRANSPOSE_8X8B 0,1,2,3,4,5,6,7 +movq [pix_q - 4], m0 +movq [pix_q + stride_q - 4], m1 +movq [pix_q + 2*stride_q - 4], m2 +movq [base3_q - 4], m3 +movq [base3_q + stride_q - 4], m4 +movq [base3_q + 2*stride_q - 4], m5 +movq [base3_q + stride3_q - 4], m6 +movq [base3_q + 4*stride_q - 4], m7 + +RET + +%endmacro + INIT_XMM sse2 +DEBLOCK_H_LUMA_MBAFF DEBLOCK_LUMA + %if HAVE_AVX_EXTERNAL INIT_XMM avx +DEBLOCK_H_LUMA_MBAFF DEBLOCK_LUMA %endif diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index 7b3d17f..10f1940 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -137,6 +137,9 @@ LF_IFUNC(h, chroma422_intra, depth, avx)\ LF_FUNC(v, chroma, depth, avx)\ LF_IFUNC(v, chroma_intra,depth, avx) +LF_FUNC(h, luma_mbaff, 8, sse2) +LF_FUNC(h, luma_mbaff, 8, avx) + LF_FUNCS(uint8_t, 8) LF_FUNCS(uint16_t, 10) @@ -297,6 +300,10 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; + +#if ARCH_X86_64 +c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2; +#endif } if (EXTERNAL_SSSE3(cpu_flags)) { c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; @@ -307,6 +314,9 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; +#if ARCH_X86_64 +c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_avx; +#endif } } else if (bit_depth == 10) { if (EXTERNAL_MMXEXT(cpu_flags)) { diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index 1408f0a..c50ddc6 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -265,6 +265,21 @@ SWAP %12, %15 %endmacro +%macro TRANSPOSE_8X8B 8
[FFmpeg-cvslog] avcodec/x86: deduplicate PASS8ROWS macro
ffmpeg | branch: master | James Darnley | Thu Feb 9 22:06:17 2017 +0100| [9d815b7424b56ffcf6f4dea4fd09c53661e4133a] | committer: James Darnley avcodec/x86: deduplicate PASS8ROWS macro > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9d815b7424b56ffcf6f4dea4fd09c53661e4133a --- libavcodec/x86/h264_deblock.asm | 5 - libavcodec/x86/h264_deblock_10bit.asm | 5 - libavcodec/x86/hevc_deblock.asm | 5 - libavutil/x86/x86util.asm | 5 + 4 files changed, 5 insertions(+), 15 deletions(-) diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index fe0ab20..435c8be 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -37,11 +37,6 @@ cextern pb_0 cextern pb_1 cextern pb_3 -; expands to [base],...,[base+7*stride] -%define PASS8ROWS(base, base3, stride, stride3) \ -[base], [base+stride], [base+stride*2], [base3], \ -[base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4] - %define PASS8ROWS(base, base3, stride, stride3, offset) \ PASS8ROWS(base+offset, base3+offset, stride, stride3) diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index c295364..1af3257 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -843,11 +843,6 @@ DEBLOCK_LUMA_INTRA mova [r0+2*r1], m2 %endmacro -; expands to [base],...,[base+7*stride] -%define PASS8ROWS(base, base3, stride, stride3) \ -[base], [base+stride], [base+stride*2], [base3], \ -[base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4] - ; in: 8 rows of 4 words in %4..%11 ; out: 4 rows of 8 words in m0..m3 %macro TRANSPOSE4x8W_LOAD 8 diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm index 48a5975..85ee480 100644 --- a/libavcodec/x86/hevc_deblock.asm +++ b/libavcodec/x86/hevc_deblock.asm @@ -39,11 +39,6 @@ cextern pw_m1 SECTION .text INIT_XMM sse2 -; expands to [base],...,[base+7*stride] -%define PASS8ROWS(base, base3, stride, stride3) \ -[base], [base+stride], [base+stride*2], [base3], \ -[base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4] - ; in: 8 rows of 4 bytes in %4..%11 ; out: 4 rows of 8 words in m0..m3 %macro TRANSPOSE4x8B_LOAD 8 diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index 44ed750..c063436 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -29,6 +29,11 @@ %include "libavutil/x86/x86inc.asm" +; expands to [base],...,[base+7*stride] +%define PASS8ROWS(base, base3, stride, stride3) \ +[base], [base + stride], [base + 2*stride], [base3], \ +[base3 + stride], [base3 + 2*stride], [base3 + stride3], [base3 + stride*4] + %macro SBUTTERFLY 4 %ifidn %1, dqqq vperm2i128 m%4, m%2, m%3, q0301 ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] x86util: import MOVHL macro
ffmpeg | branch: master | James Darnley | Sat Feb 11 13:25:09 2017 +0100| [7627df15d411a69f236b4650e88b1ab911f38efc] | committer: James Darnley x86util: import MOVHL macro Originally committed to x264 in 1637239a by Henrik Gramner who has agreed to re-license it as LGPL. Original commit message follows. x86: Avoid some bypass delays and false dependencies A bypass delay of 1-3 clock cycles may occur on some CPUs when transitioning between int and float domains, so try to avoid that if possible. > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7627df15d411a69f236b4650e88b1ab911f38efc --- libavutil/x86/x86util.asm | 12 1 file changed, 12 insertions(+) diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index c063436..1408f0a 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -876,3 +876,15 @@ psrlq %1, 8*(%2) %endif %endmacro + +%macro MOVHL 2 ; dst, src +%ifidn %1, %2 +punpckhqdq %1, %2 +%elif cpuflag(avx) +punpckhqdq %1, %2, %2 +%elif cpuflag(sse4) +pshufd %1, %2, q3232 ; pshufd is slow on some older CPUs, so only use it on more modern ones +%else +movhlps%1, %2; may cause an int/float domain transition and has a dependency on dst +%endif +%endmacro ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avformat/utils: Also fill dts== RELATIVE_TS_BASE packets in update_initial_durations()
ffmpeg | branch: master | Michael Niedermayer | Sun Feb 19 00:31:09 2017 +0100| [3206ea4ba31ebf446a3c4f1220adb895b3272c15] | committer: Michael Niedermayer avformat/utils: Also fill dts==RELATIVE_TS_BASE packets in update_initial_durations() This dts value can end up in the list in the absence of durations and is in that case semantically identical to AV_NOPTS_VALUE. We can alternatively prevent storing RELATIVE_TS_BASE if there is no duration. Fixes Ticket3640 Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3206ea4ba31ebf446a3c4f1220adb895b3272c15 --- libavformat/utils.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libavformat/utils.c b/libavformat/utils.c index 0711310..37d7024 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -1164,8 +1164,11 @@ static void update_initial_durations(AVFormatContext *s, AVStream *st, for (; pktl; pktl = get_next_pkt(s, st, pktl)) { if (pktl->pkt.stream_index != stream_index) continue; -if (pktl->pkt.pts == pktl->pkt.dts && -(pktl->pkt.dts == AV_NOPTS_VALUE || pktl->pkt.dts == st->first_dts) && +if ((pktl->pkt.pts == pktl->pkt.dts || + pktl->pkt.pts == AV_NOPTS_VALUE) && +(pktl->pkt.dts == AV_NOPTS_VALUE || + pktl->pkt.dts == st->first_dts || + pktl->pkt.dts == RELATIVE_TS_BASE) && !pktl->pkt.duration) { pktl->pkt.dts = cur_dts; if (!st->internal->avctx->has_b_frames) ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog
[FFmpeg-cvslog] avcodec/mpeg12dec: Provide debug level log on skiped P/B frames
ffmpeg | branch: master | Michael Niedermayer | Sat Feb 18 19:03:39 2017 +0100| [3f28caf72083231131ee35ebc81be2c4dfe95e11] | committer: Michael Niedermayer avcodec/mpeg12dec: Provide debug level log on skiped P/B frames Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3f28caf72083231131ee35ebc81be2c4dfe95e11 --- libavcodec/mpeg12dec.c | 4 1 file changed, 4 insertions(+) diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c index 6397907..f3cf6eb 100644 --- a/libavcodec/mpeg12dec.c +++ b/libavcodec/mpeg12dec.c @@ -2660,6 +2660,8 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture, if (s2->pict_type == AV_PICTURE_TYPE_B) { if (!s2->closed_gop) { skip_frame = 1; +av_log(s2->avctx, AV_LOG_DEBUG, + "Skipping B slice due to open GOP\n"); break; } } @@ -2671,6 +2673,8 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture, * we have an invalid header. */ if (s2->pict_type == AV_PICTURE_TYPE_P && !s->sync) { skip_frame = 1; +av_log(s2->avctx, AV_LOG_DEBUG, + "Skipping P slice due to !sync\n"); break; } } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog