[FFmpeg-cvslog] lavc/aarch64: fix hevc sao band filter
ffmpeg | branch: master | J. Dekker | Tue Apr 26 09:29:54 2022 +0200| [d957ee34a6ec998ea00d6d07ac687c5d7a9792a2] | committer: J. Dekker lavc/aarch64: fix hevc sao band filter The SAO band filter can be called with non-multiples of 8, we round up to the nearest multiple of 8 to account for this. Signed-off-by: J. Dekker > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d957ee34a6ec998ea00d6d07ac687c5d7a9792a2 --- libavcodec/aarch64/hevcdsp_init_aarch64.c | 10 +- libavcodec/aarch64/hevcdsp_sao_neon.S | 8 ++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c index 1e40be740c..c8963e6104 100644 --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c @@ -75,11 +75,11 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth) c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon; c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon; c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon; -// This function is disabled, as it doesn't handle widths that aren't -// an even multiple of 8 correctly. fate-hevc doesn't exercise that -// for the current size, but if enabled for bigger sizes, the cases -// of non-multiple of 8 seem to arise. -//c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_neon; +c->sao_band_filter[0] = +c->sao_band_filter[1] = +c->sao_band_filter[2] = +c->sao_band_filter[3] = +c->sao_band_filter[4] = ff_hevc_sao_band_filter_8x8_8_neon; } if (bit_depth == 10) { c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon; diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S index d523bf584d..e07e0cea2d 100644 --- a/libavcodec/aarch64/hevcdsp_sao_neon.S +++ b/libavcodec/aarch64/hevcdsp_sao_neon.S @@ -41,7 +41,11 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1 and w10, w10, #0x1F strhw9, [sp, x10, lsl #1] bne 0b +add w6, w6, #7 +bic w6, w6, #7 ld1 {v16.16b-v19.16b}, [sp], #64 +sub x2, x2, x6 +sub x3, x3, x6 moviv20.8h, #1 1: mov w8, w6// beginning of line 2: // Simple layout for accessing 16bit values @@ -52,7 +56,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1 // |xDE#xAD|xCA#xFE|xBE#xEF|xFE#xED| // +---> //i-0 i-1 i-2 i-3 -ld1 {v2.8b}, [x1] // dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]); +ld1 {v2.8b}, [x1], #8 // dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]); uxtlv0.8h, v2.8b // load src[x] ushrv2.8h, v0.8h, #3 // >> BIT_DEPTH - 3 shl v1.8h, v2.8h, #1 // low (x2, accessing short) @@ -61,7 +65,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1 tbx v2.16b, {v16.16b-v19.16b}, v1.16b // table add v1.8h, v0.8h, v2.8h // src[x] + table sqxtun v4.8b, v1.8h // clip + narrow -st1 {v4.8b}, [x0] // store +st1 {v4.8b}, [x0], #8 // store subsw8, w8, #8// done 8 pixels bne 2b subsw7, w7, #1// finished line, prep. new ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavc/aarch64: add hevc sao edge 8x8
ffmpeg | branch: master | J. Dekker | Thu Apr 28 14:57:43 2022 +0200| [2e832be322eb456e44b1e928904fa470a0b00a67] | committer: J. Dekker lavc/aarch64: add hevc sao edge 8x8 bench on AWS Graviton: hevc_sao_edge_8x8_8_c: 516.0 hevc_sao_edge_8x8_8_neon: 81.0 Signed-off-by: J. Dekker > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2e832be322eb456e44b1e928904fa470a0b00a67 --- libavcodec/aarch64/hevcdsp_init_aarch64.c | 3 ++ libavcodec/aarch64/hevcdsp_sao_neon.S | 51 +++ 2 files changed, 54 insertions(+) diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c index df521bb083..2002530266 100644 --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c @@ -59,6 +59,8 @@ void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, uint8_t *_src, int width, int height); void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, int16_t *sao_offset_val, int eo, int width, int height); +void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, + int16_t *sao_offset_val, int eo, int width, int height); av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth) { @@ -80,6 +82,7 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth) c->sao_band_filter[2] = c->sao_band_filter[3] = c->sao_band_filter[4] = ff_hevc_sao_band_filter_8x8_8_neon; +c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8x8_8_neon; c->sao_edge_filter[1] = c->sao_edge_filter[2] = c->sao_edge_filter[3] = diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S index 0315c479df..efd8112af4 100644 --- a/libavcodec/aarch64/hevcdsp_sao_neon.S +++ b/libavcodec/aarch64/hevcdsp_sao_neon.S @@ -140,3 +140,54 @@ function ff_hevc_sao_edge_filter_16x16_8_neon, export=1 // no lines to filter ret endfunc + +// ff_hevc_sao_edge_filter_8x8_8_neon(char *dst, char *src, ptrdiff stride_dst, +//int16 *sao_offset_val, int eo, int width, int height) +function ff_hevc_sao_edge_filter_8x8_8_neon, export=1 +adr x7, .Lsao_edge_pos +ldr w4, [x7, w4, uxtw #2] +ld1 {v3.8h}, [x3] +mov v3.h[7], v3.h[0] +mov v3.h[0], v3.h[1] +mov v3.h[1], v3.h[2] +mov v3.h[2], v3.h[7] +uzp2v1.16b, v3.16b, v3.16b +uzp1v0.16b, v3.16b, v3.16b +moviv2.16b, #2 +add x16, x0, x2 +lsl x2, x2, #1 +mov x15, #192 +mov x8, x1 +sub x9, x1, x4 +add x10, x1, x4 +lsr w17, w6, #1 +1: ld1 {v3.d}[0], [ x8], x15 +ld1 {v4.d}[0], [ x9], x15 +ld1 {v5.d}[0], [x10], x15 +ld1 {v3.d}[1], [ x8], x15 +ld1 {v4.d}[1], [ x9], x15 +ld1 {v5.d}[1], [x10], x15 +cmhiv16.16b, v4.16b, v3.16b +cmhiv17.16b, v3.16b, v4.16b +cmhiv18.16b, v5.16b, v3.16b +cmhiv19.16b, v3.16b, v5.16b +sub v20.16b, v16.16b, v17.16b +sub v21.16b, v18.16b, v19.16b +add v20.16b, v20.16b, v21.16b +add v20.16b, v20.16b, v2.16b +tbl v16.16b, {v0.16b}, v20.16b +tbl v17.16b, {v1.16b}, v20.16b +uxtlv20.8h, v3.8b +uxtl2 v21.8h, v3.16b +zip1v18.16b, v16.16b, v17.16b +zip2v19.16b, v16.16b, v17.16b +sqadd v20.8h, v18.8h, v20.8h +sqadd v21.8h, v19.8h, v21.8h +sqxtun v6.8b, v20.8h +sqxtun v7.8b, v21.8h +st1 {v6.8b}, [ x0], x2 +st1 {v7.8b}, [x16], x2 +subsx17, x17, #1 +b.ne1b +ret +endfunc ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] lavc/aarch64: add hevc sao edge 16x16
ffmpeg | branch: master | J. Dekker | Thu Apr 28 14:57:33 2022 +0200| [92f67e40170994dcb7a96ae362d95308f6744294] | committer: J. Dekker lavc/aarch64: add hevc sao edge 16x16 bench on AWS Graviton: hevc_sao_edge_16x16_8_c: 1857.0 hevc_sao_edge_16x16_8_neon: 211.0 hevc_sao_edge_32x32_8_c: 7802.2 hevc_sao_edge_32x32_8_neon: 808.2 hevc_sao_edge_48x48_8_c: 16764.2 hevc_sao_edge_48x48_8_neon: 1796.5 hevc_sao_edge_64x64_8_c: 32647.5 hevc_sao_edge_64x64_8_neon: 3118.5 Signed-off-by: J. Dekker > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=92f67e40170994dcb7a96ae362d95308f6744294 --- libavcodec/aarch64/hevcdsp_init_aarch64.c | 8 +++- libavcodec/aarch64/hevcdsp_sao_neon.S | 66 +++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c index c8963e6104..df521bb083 100644 --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c @@ -57,8 +57,8 @@ void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int16_t *sao_offset_val, int sao_left_class, int width, int height); - - +void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, + int16_t *sao_offset_val, int eo, int width, int height); av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth) { @@ -80,6 +80,10 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth) c->sao_band_filter[2] = c->sao_band_filter[3] = c->sao_band_filter[4] = ff_hevc_sao_band_filter_8x8_8_neon; +c->sao_edge_filter[1] = +c->sao_edge_filter[2] = +c->sao_edge_filter[3] = +c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_16x16_8_neon; } if (bit_depth == 10) { c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon; diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S index e07e0cea2d..0315c479df 100644 --- a/libavcodec/aarch64/hevcdsp_sao_neon.S +++ b/libavcodec/aarch64/hevcdsp_sao_neon.S @@ -74,3 +74,69 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1 bne 1b ret endfunc + +// ASSUMES STRIDE_SRC = 192 +.Lsao_edge_pos: +.word 1 // horizontal +.word 192 // vertical +.word 192 + 1 // 45 degree +.word 192 - 1 // 135 degree + +// ff_hevc_sao_edge_filter_16x16_8_neon(char *dst, char *src, ptrdiff stride_dst, +// int16 *sao_offset_val, int eo, int width, int height) +function ff_hevc_sao_edge_filter_16x16_8_neon, export=1 +adr x7, .Lsao_edge_pos +ld1 {v3.8h}, [x3] // load sao_offset_val +add w5, w5, #0xF +bic w5, w5, #0xF +ldr w4, [x7, w4, uxtw #2] // stride_src +mov v3.h[7], v3.h[0] // reorder to [1,2,0,3,4] +mov v3.h[0], v3.h[1] +mov v3.h[1], v3.h[2] +mov v3.h[2], v3.h[7] +// split 16bit values into two tables +uzp2v1.16b, v3.16b, v3.16b // sao_offset_val -> upper +uzp1v0.16b, v3.16b, v3.16b // sao_offset_val -> lower +moviv2.16b, #2 +mov x15, #192 +// strides between end of line and next src/dst +sub x15, x15, x5 // stride_src - width +sub x16, x2, x5// stride_dst - width +mov x11, x1// copy base src +1: // new line +mov x14, x5// copy width +sub x12, x11, x4 // src_a (prev) = src - sao_edge_pos +add x13, x11, x4 // src_b (next) = src + sao_edge_pos +2: // process 16 bytes +ld1 {v3.16b}, [x11], #16 // load src +ld1 {v4.16b}, [x12], #16 // load src_a (prev) +ld1 {v5.16b}, [x13], #16 // load src_b (next) +cmhiv16.16b, v4.16b, v3.16b// (prev > cur) +cmhiv17.16b, v3.16b, v4.16b// (cur > prev) +cmhiv18.16b, v5.16b, v3.16b// (next > cur) +cmhiv19.16b, v3.16b, v5.16b// (cur > next) +sub v20.16b, v16.16b, v17.16b // diff0 = CMP(cur, prev) = (cur > prev) - (cur < prev) +sub v21.16b, v18.16b, v19.16b // diff1 = CMP(cur, next) = (cur > next) - (cur < next) +add v20.16b, v20.16b, v21.16b // diff = diff0
[FFmpeg-cvslog] checkasm: improve hevc_sao test
ffmpeg | branch: master | J. Dekker | Tue May 17 13:48:23 2022 +0200| [cc679054c715acda9438e566b8de3a9eba421ac3] | committer: J. Dekker checkasm: improve hevc_sao test The HEVC decoder can call these functions with smaller widths than the functions themselves are designed to operate on so we should only check the relevant output Signed-off-by: J. Dekker > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cc679054c715acda9438e566b8de3a9eba421ac3 --- tests/checkasm/hevc_sao.c | 51 --- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/tests/checkasm/hevc_sao.c b/tests/checkasm/hevc_sao.c index 6b750758e2..4a23010243 100644 --- a/tests/checkasm/hevc_sao.c +++ b/tests/checkasm/hevc_sao.c @@ -78,20 +78,26 @@ static void check_sao_band(HEVCDSPContext h, int bit_depth) for (i = 0; i <= 4; i++) { int block_size = sao_size[i]; +int prev_size = i > 0 ? sao_size[i - 1] : 0; ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL; declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride, int16_t *sao_offset_val, int sao_left_class, int width, int height); -randomize_buffers(src0, src1, BUF_SIZE); -randomize_buffers2(offset_val, OFFSET_LENGTH); -memset(dst0, 0, BUF_SIZE); -memset(dst1, 0, BUF_SIZE); - -if (check_func(h.sao_band_filter[i], "hevc_sao_band_%dx%d_%d", block_size, block_size, bit_depth)) { -call_ref(dst0, src0, stride, stride, offset_val, left_class, block_size, block_size); -call_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size); -if (memcmp(dst0, dst1, BUF_SIZE)) -fail(); +if (check_func(h.sao_band_filter[i], "hevc_sao_band_%d_%d", block_size, bit_depth)) { + +for (int w = prev_size + 4; w <= block_size; w += 4) { +randomize_buffers(src0, src1, BUF_SIZE); +randomize_buffers2(offset_val, OFFSET_LENGTH); +memset(dst0, 0, BUF_SIZE); +memset(dst1, 0, BUF_SIZE); + +call_ref(dst0, src0, stride, stride, offset_val, left_class, w, block_size); +call_new(dst1, src1, stride, stride, offset_val, left_class, w, block_size); +for (int j = 0; j < block_size; j++) { +if (memcmp(dst0 + j*stride, dst1 + j*stride, w*SIZEOF_PIXEL)) +fail(); +} +} bench_new(dst1, src1, stride, stride, offset_val, left_class, block_size, block_size); } } @@ -109,21 +115,26 @@ static void check_sao_edge(HEVCDSPContext h, int bit_depth) for (i = 0; i <= 4; i++) { int block_size = sao_size[i]; +int prev_size = i > 0 ? sao_size[i - 1] : 0; ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL; int offset = (AV_INPUT_BUFFER_PADDING_SIZE + PIXEL_STRIDE)*SIZEOF_PIXEL; declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, int16_t *sao_offset_val, int eo, int width, int height); -randomize_buffers(src0, src1, BUF_SIZE); -randomize_buffers2(offset_val, OFFSET_LENGTH); -memset(dst0, 0, BUF_SIZE); -memset(dst1, 0, BUF_SIZE); - -if (check_func(h.sao_edge_filter[i], "hevc_sao_edge_%dx%d_%d", block_size, block_size, bit_depth)) { -call_ref(dst0, src0 + offset, stride, offset_val, eo, block_size, block_size); -call_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size); -if (memcmp(dst0, dst1, BUF_SIZE)) -fail(); +for (int w = prev_size + 4; w <= block_size; w += 4) { +randomize_buffers(src0, src1, BUF_SIZE); +randomize_buffers2(offset_val, OFFSET_LENGTH); +memset(dst0, 0, BUF_SIZE); +memset(dst1, 0, BUF_SIZE); + +if (check_func(h.sao_edge_filter[i], "hevc_sao_edge_%d_%d", block_size, bit_depth)) { +call_ref(dst0, src0 + offset, stride, offset_val, eo, w, block_size); +call_new(dst1, src1 + offset, stride, offset_val, eo, w, block_size); +for (int j = 0; j < block_size; j++) { +if (memcmp(dst0 + j*stride, dst1 + j*stride, w*SIZEOF_PIXEL)) +fail(); +} +} bench_new(dst1, src1 + offset, stride, offset_val, eo, block_size, block_size); } } ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] qsv: add requirement for the mininal version of libmfx
ffmpeg | branch: master | Haihao Xiang | Sun May 22 20:19:11 2022 +0800| [478e1a98a289bbc777bddc02fdcefeaa3c416a63] | committer: Haihao Xiang qsv: add requirement for the mininal version of libmfx libmfx 1.28 was released 3 years ago, it is easy to get a greater version than 1.28. We may remove lots of compile-time checks if adding the requirement for the minimal version in the configure script. Reviewed-by: softworkz Signed-off-by: Jean-Baptiste Kempf Signed-off-by: Haihao Xiang > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=478e1a98a289bbc777bddc02fdcefeaa3c416a63 --- configure | 7 +- libavcodec/qsv.c | 24 --- libavcodec/qsvenc.c| 476 - libavcodec/qsvenc.h| 51 + libavcodec/qsvenc_h264.c | 6 - libavcodec/qsvenc_hevc.c | 10 - libavfilter/vf_scale_qsv.c | 13 +- libavfilter/vf_vpp_qsv.c | 143 +++--- libavutil/hwcontext_qsv.c | 2 - 9 files changed, 250 insertions(+), 482 deletions(-) diff --git a/configure b/configure index f115b21064..2337f0a8f2 100755 --- a/configure +++ b/configure @@ -6566,8 +6566,11 @@ enabled liblensfun&& require_pkg_config liblensfun lensfun lensfun.h lf_ # Media SDK or Intel Media Server Studio, these don't come with # pkg-config support. Instead, users should make sure that the build # can find the libraries and headers through other means. -enabled libmfx&& { check_pkg_config libmfx libmfx "mfx/mfxvideo.h" MFXInit || - { require libmfx "mfx/mfxvideo.h" MFXInit "-llibmfx $advapi32_extralibs" && warn "using libmfx without pkg-config"; } } +enabled libmfx&& { check_pkg_config libmfx "mfx >= 1.28" "mfx/mfxvideo.h" MFXInit || + { require libmfx "mfx/mfxvideo.h mfx/mfxdefs.h" MFXInit "-llibmfx $advapi32_extralibs" && + { test_cpp_condition mfx/mfxdefs.h "MFX_VERSION >= 1028" || die "ERROR: libmfx version must be >= 1.28"; } && + warn "using libmfx without pkg-config"; } } + if enabled libmfx; then check_cc MFX_CODEC_VP9 "mfx/mfxvp9.h mfx/mfxstructures.h" "MFX_CODEC_VP9" fi diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c index b86c20b153..385b43bb6c 100644 --- a/libavcodec/qsv.c +++ b/libavcodec/qsv.c @@ -38,34 +38,26 @@ #define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl)) -#if QSV_VERSION_ATLEAST(1, 12) #include "mfx/mfxvp8.h" -#endif int ff_qsv_codec_id_to_mfx(enum AVCodecID codec_id) { switch (codec_id) { case AV_CODEC_ID_H264: return MFX_CODEC_AVC; -#if QSV_VERSION_ATLEAST(1, 8) case AV_CODEC_ID_HEVC: return MFX_CODEC_HEVC; -#endif case AV_CODEC_ID_MPEG1VIDEO: case AV_CODEC_ID_MPEG2VIDEO: return MFX_CODEC_MPEG2; case AV_CODEC_ID_VC1: return MFX_CODEC_VC1; -#if QSV_VERSION_ATLEAST(1, 12) case AV_CODEC_ID_VP8: return MFX_CODEC_VP8; -#endif case AV_CODEC_ID_MJPEG: return MFX_CODEC_JPEG; -#if QSV_VERSION_ATLEAST(1, 19) case AV_CODEC_ID_VP9: return MFX_CODEC_VP9; -#endif #if QSV_VERSION_ATLEAST(1, 34) case AV_CODEC_ID_AV1: return MFX_CODEC_AV1; @@ -189,17 +181,11 @@ enum AVPixelFormat ff_qsv_map_fourcc(uint32_t fourcc) case MFX_FOURCC_NV12: return AV_PIX_FMT_NV12; case MFX_FOURCC_P010: return AV_PIX_FMT_P010; case MFX_FOURCC_P8: return AV_PIX_FMT_PAL8; -#if QSV_VERSION_ATLEAST(1, 9) case MFX_FOURCC_A2RGB10: return AV_PIX_FMT_X2RGB10; -#endif -#if QSV_VERSION_ATLEAST(1, 17) case MFX_FOURCC_RGB4: return AV_PIX_FMT_BGRA; -#endif #if CONFIG_VAAPI case MFX_FOURCC_YUY2: return AV_PIX_FMT_YUYV422; -#if QSV_VERSION_ATLEAST(1, 27) case MFX_FOURCC_Y210: return AV_PIX_FMT_Y210; -#endif #endif } return AV_PIX_FMT_NONE; @@ -217,27 +203,21 @@ int ff_qsv_map_pixfmt(enum AVPixelFormat format, uint32_t *fourcc) case AV_PIX_FMT_P010: *fourcc = MFX_FOURCC_P010; return AV_PIX_FMT_P010; -#if QSV_VERSION_ATLEAST(1, 9) case AV_PIX_FMT_X2RGB10: *fourcc = MFX_FOURCC_A2RGB10; return AV_PIX_FMT_X2RGB10; -#endif -#if QSV_VERSION_ATLEAST(1, 17) case AV_PIX_FMT_BGRA: *fourcc = MFX_FOURCC_RGB4; return AV_PIX_FMT_BGRA; -#endif #if CONFIG_VAAPI case AV_PIX_FMT_YUV422P: case AV_PIX_FMT_YUYV422: *fourcc = MFX_FOURCC_YUY2; return AV_PIX_FMT_YUYV422; -#if QSV_VERSION_ATLEAST(1, 27) case AV_PIX_FMT_YUV422P10: case AV_PIX_FMT_Y210: *fourcc = MFX_FOURCC_Y210; return AV_PIX_FMT_Y210; -#endif #endif default: return AVERROR(ENOSYS); @@ -438,9 +418,7 @@ int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs, const char *desc; int ret; -#if QSV_VERSION_ATLEAST(1, 16) init_par.GPUCopy= gpu_copy; -#endif init_par.Implementation = impl; init_par.Version
[FFmpeg-cvslog] libavcodec/qsvenc: expose only supported options
ffmpeg | branch: master | Dmitry Rogozhkin | Thu May 19 12:54:16 2022 -0700| [f8a07c4d4abbd3974e074bc54bc22eeaa0f46051] | committer: Haihao Xiang libavcodec/qsvenc: expose only supported options vp9, hevc, avc, mpeg2 QSV encoders inherit common list of options (QSV_COMMON_OPTS) while bunch of options is not actually supported by current qsv code. The only codec which supportes everything is avc, followed by hevc, while vp9 and mpeg2 significantly fall behind. This creates difficulties for the users to use qsv encoders. This patch fixes options list for encoders leaving only those which are actually supported. Signed-off-by: Dmitry Rogozhkin Signed-off-by: Haihao Xiang > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f8a07c4d4abbd3974e074bc54bc22eeaa0f46051 --- libavcodec/qsvenc.h | 52 ++- libavcodec/qsvenc_h264.c | 12 +++ libavcodec/qsvenc_hevc.c | 9 libavcodec/qsvenc_mpeg2.c | 1 + 4 files changed, 60 insertions(+), 14 deletions(-) diff --git a/libavcodec/qsvenc.h b/libavcodec/qsvenc.h index cb84723dfa..33bbc2a1d6 100644 --- a/libavcodec/qsvenc.h +++ b/libavcodec/qsvenc.h @@ -89,22 +89,46 @@ { "slow",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_3 }, INT_MIN, INT_MAX, VE, "preset" }, \ { "slower", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_2 }, INT_MIN, INT_MAX, VE, "preset" }, \ { "veryslow",NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_BEST_QUALITY }, INT_MIN, INT_MAX, VE, "preset" }, \ -{ "rdo","Enable rate distortion optimization",OFFSET(qsv.rdo), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, \ +{ "forced_idr", "Forcing I frames as IDR frames", OFFSET(qsv.forced_idr), AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE }, \ +{ "low_power", "enable low power mode(experimental: many limitations by mfx version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = -1}, -1, 1, VE}, + +#define QSV_OPTION_RDO \ +{ "rdo","Enable rate distortion optimization",OFFSET(qsv.rdo), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, + +#define QSV_OPTION_MAX_FRAME_SIZE \ { "max_frame_size", "Maximum encoded frame size in bytes", OFFSET(qsv.max_frame_size), AV_OPT_TYPE_INT, { .i64 = -1 }, -1,INT_MAX, VE }, \ { "max_frame_size_i", "Maximum encoded I frame size in bytes",OFFSET(qsv.max_frame_size_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE }, \ -{ "max_frame_size_p", "Maximum encoded P frame size in bytes",OFFSET(qsv.max_frame_size_p), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE }, \ -{ "max_slice_size", "Maximum encoded slice size in bytes", OFFSET(qsv.max_slice_size), AV_OPT_TYPE_INT, { .i64 = -1 }, -1,INT_MAX, VE }, \ -{ "bitrate_limit", "Toggle bitrate limitations", OFFSET(qsv.bitrate_limit), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, \ -{ "mbbrc", "MB level bitrate control", OFFSET(qsv.mbbrc), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, \ -{ "extbrc", "Extended bitrate control", OFFSET(qsv.extbrc), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, \ -{ "adaptive_i", "Adaptive I-frame placement", OFFSET(qsv.adaptive_i), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, \ -{ "adaptive_b", "Adaptive B-frame placement", OFFSET(qsv.adaptive_b), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, \ -{ "p_strategy", "Enable P-pyramid: 0-default 1-simple 2-pyramid(bf need to be set to 0).",OFFSET(qsv.p_strategy), AV_OPT_TYPE_INT,{ .i64 = 0}, 0, 2, VE }, \ -{ "b_strategy", "Strategy to choose between I/P/B-frames", OFFSET(qsv.b_strategy),AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, \ -{ "forced_idr", "Forcing I frames as IDR frames", OFFSET(qsv.forced_idr), AV_OPT_TYPE_BOOL,{ .i64 = 0 }, 0, 1, VE }, \ -{ "low_power", "enable low power mode(experimental: many limitations by mfx version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = -1}, -1, 1, VE},\ -{ "dblk_idc", "This option disable deblocking. It has value in range 0~2.", OFFSET(qsv.dblk_idc), AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, 2, VE},\ -{ "low_delay_brc", "Allow to strictly obey avg frame size", OFFSET(qsv.low_delay_brc), AV_OPT_TYPE_B
[FFmpeg-cvslog] avcodec/mfenc: Dynamically load MFPlat.DLL
ffmpeg | branch: master | Trystan Mata | Wed May 25 12:54:01 2022 +0200| [1cb601ad10313981209a5918fc36a968068fc0ec] | committer: Martin Storsjö avcodec/mfenc: Dynamically load MFPlat.DLL Allows non-UWP builds of FFmpeg with MediaFoundation to work on N editions of Windows which are without MediaFoundation by default. On UWP target, FFmpeg is linked directly against MediaFoundation since LoadLibrary is not available. This commit adresses https://trac.ffmpeg.org/ticket/9788 Signed-off-by: Martin Storsjö > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1cb601ad10313981209a5918fc36a968068fc0ec --- configure | 4 ++- libavcodec/mf_utils.c | 59 libavcodec/mf_utils.h | 35 --- libavcodec/mfenc.c| 93 ++- 4 files changed, 124 insertions(+), 67 deletions(-) diff --git a/configure b/configure index 2337f0a8f2..6cf7d89674 100755 --- a/configure +++ b/configure @@ -3130,7 +3130,6 @@ wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel" # hardware-accelerated codecs mediafoundation_deps="mftransform_h MFCreateAlignedMemoryBuffer" -mediafoundation_extralibs="-lmfplat -lmfuuid -lole32 -lstrmiids" omx_deps="libdl pthreads" omx_rpi_select="omx" qsv_deps="libmfx" @@ -6879,6 +6878,9 @@ test_cppMFCreateAlignedMemoryBuffer(size, align - 1, &buffer); if (FAILED(hr)) return NULL; @@ -548,7 +516,7 @@ const CLSID *ff_codec_to_mf_subtype(enum AVCodecID codec) } } -static int init_com_mf(void *log) +static int init_com_mf(void *log, MFFunctions *f) { HRESULT hr; @@ -561,7 +529,7 @@ static int init_com_mf(void *log) return AVERROR(ENOSYS); } -hr = MFStartup(MF_VERSION, MFSTARTUP_FULL); +hr = f->MFStartup(MF_VERSION, MFSTARTUP_FULL); if (FAILED(hr)) { av_log(log, AV_LOG_ERROR, "could not initialize MediaFoundation\n"); CoUninitialize(); @@ -571,15 +539,16 @@ static int init_com_mf(void *log) return 0; } -static void uninit_com_mf(void) +static void uninit_com_mf(MFFunctions *f) { -MFShutdown(); +f->MFShutdown(); CoUninitialize(); } // Find and create a IMFTransform with the given input/output types. When done, // you should use ff_free_mf() to destroy it, which will also uninit COM. int ff_instantiate_mf(void *log, + MFFunctions *f, GUID category, MFT_REGISTER_TYPE_INFO *in_type, MFT_REGISTER_TYPE_INFO *out_type, @@ -594,7 +563,7 @@ int ff_instantiate_mf(void *log, IMFActivate *winner = 0; UINT32 flags; -ret = init_com_mf(log); +ret = init_com_mf(log, f); if (ret < 0) return ret; @@ -606,7 +575,7 @@ int ff_instantiate_mf(void *log, flags |= MFT_ENUM_FLAG_SYNCMFT; } -hr = ff_MFTEnumEx(category, flags, in_type, out_type, &activate, +hr = f->MFTEnumEx(category, flags, in_type, out_type, &activate, &num_activate); if (FAILED(hr)) goto error_uninit_mf; @@ -667,14 +636,14 @@ int ff_instantiate_mf(void *log, return 0; error_uninit_mf: -uninit_com_mf(); +uninit_com_mf(f); return AVERROR(ENOSYS); } -void ff_free_mf(IMFTransform **mft) +void ff_free_mf(MFFunctions *f, IMFTransform **mft) { if (*mft) IMFTransform_Release(*mft); *mft = NULL; -uninit_com_mf(); +uninit_com_mf(f); } diff --git a/libavcodec/mf_utils.h b/libavcodec/mf_utils.h index d514723c3b..3b12344f3e 100644 --- a/libavcodec/mf_utils.h +++ b/libavcodec/mf_utils.h @@ -41,6 +41,25 @@ #include "avcodec.h" +// Windows N editions does not provide MediaFoundation by default. +// So to avoid DLL loading error, MediaFoundation will be dynamically loaded +// except on UWP build since LoadLibrary is not available on it. +typedef struct MFFunctions { +HRESULT (WINAPI *MFStartup) (ULONG Version, DWORD dwFlags); +HRESULT (WINAPI *MFShutdown) (void); +HRESULT (WINAPI *MFCreateAlignedMemoryBuffer) (DWORD cbMaxLength, + DWORD cbAligment, + IMFMediaBuffer **ppBuffer); +HRESULT (WINAPI *MFCreateSample) (IMFSample **ppIMFSample); +HRESULT (WINAPI *MFCreateMediaType) (IMFMediaType **ppMFType); +// MFTEnumEx is missing in Windows Vista's mfplat.dll. +HRESULT (WINAPI *MFTEnumEx)(GUID guidCategory, UINT32 Flags, +const MFT_REGISTER_TYPE_INFO *pInputType, +const MFT_REGISTER_TYPE_INFO *pOutputType, +IMFActivate ***pppMFTActivate, +UINT32 *pnumMFTActi
[FFmpeg-cvslog] lavc/aarch64: hevc_sao reschedule slightly
ffmpeg | branch: master | J. Dekker | Wed May 25 10:55:34 2022 +0200| [3c694967f862dc5e09921438c6cbd191944ac13c] | committer: J. Dekker lavc/aarch64: hevc_sao reschedule slightly Signed-off-by: J. Dekker > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3c694967f862dc5e09921438c6cbd191944ac13c --- libavcodec/aarch64/hevcdsp_sao_neon.S | 28 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S index efd8112af4..d4decfde3b 100644 --- a/libavcodec/aarch64/hevcdsp_sao_neon.S +++ b/libavcodec/aarch64/hevcdsp_sao_neon.S @@ -3,7 +3,7 @@ * * AArch64 NEON optimised SAO functions for HEVC decoding * - * Copyright (c) 2020 Josh Dekker + * Copyright (c) 2022 J. Dekker * * This file is part of FFmpeg. * @@ -24,6 +24,10 @@ #include "libavutil/aarch64/asm.S" +#define MAX_PB_SIZE 64 +#define AV_INPUT_BUFFER_PADDING_SIZE 64 +#define SAO_STRIDE (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) + // void sao_band_filter(uint8_t *_dst, uint8_t *_src, // ptrdiff_t stride_dst, ptrdiff_t stride_src, // int16_t *sao_offset_val, int sao_left_class, @@ -57,6 +61,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1 // +---> //i-0 i-1 i-2 i-3 ld1 {v2.8b}, [x1], #8 // dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]); +subsw8, w8, #8 uxtlv0.8h, v2.8b // load src[x] ushrv2.8h, v0.8h, #3 // >> BIT_DEPTH - 3 shl v1.8h, v2.8h, #1 // low (x2, accessing short) @@ -66,7 +71,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1 add v1.8h, v0.8h, v2.8h // src[x] + table sqxtun v4.8b, v1.8h // clip + narrow st1 {v4.8b}, [x0], #8 // store -subsw8, w8, #8// done 8 pixels +// done 8 pixels bne 2b subsw7, w7, #1// finished line, prep. new add x0, x0, x2// dst += stride_dst @@ -75,12 +80,11 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1 ret endfunc -// ASSUMES STRIDE_SRC = 192 .Lsao_edge_pos: .word 1 // horizontal -.word 192 // vertical -.word 192 + 1 // 45 degree -.word 192 - 1 // 135 degree +.word SAO_STRIDE // vertical +.word SAO_STRIDE + 1 // 45 degree +.word SAO_STRIDE - 1 // 135 degree // ff_hevc_sao_edge_filter_16x16_8_neon(char *dst, char *src, ptrdiff stride_dst, // int16 *sao_offset_val, int eo, int width, int height) @@ -98,7 +102,7 @@ function ff_hevc_sao_edge_filter_16x16_8_neon, export=1 uzp2v1.16b, v3.16b, v3.16b // sao_offset_val -> upper uzp1v0.16b, v3.16b, v3.16b // sao_offset_val -> lower moviv2.16b, #2 -mov x15, #192 +mov x15, #SAO_STRIDE // strides between end of line and next src/dst sub x15, x15, x5 // stride_src - width sub x16, x2, x5// stride_dst - width @@ -111,6 +115,7 @@ function ff_hevc_sao_edge_filter_16x16_8_neon, export=1 ld1 {v3.16b}, [x11], #16 // load src ld1 {v4.16b}, [x12], #16 // load src_a (prev) ld1 {v5.16b}, [x13], #16 // load src_b (next) +subsx14, x14, #16 cmhiv16.16b, v4.16b, v3.16b// (prev > cur) cmhiv17.16b, v3.16b, v4.16b// (cur > prev) cmhiv18.16b, v5.16b, v3.16b// (next > cur) @@ -130,12 +135,12 @@ function ff_hevc_sao_edge_filter_16x16_8_neon, export=1 sqxtun v3.8b, v20.8h sqxtun2 v3.16b, v21.8h st1 {v3.16b}, [x0], #16 -subsx14, x14, #16 // filtered 16 bytes +// filtered 16 bytes b.ne2b // do we have width to filter? // no width to filter, setup next line +subsw6, w6, #1 // filtered line add x11, x11, x15 // stride src to next line add x0, x0, x16// stride dst to next line -subsw6, w6, #1 // filtered line b.ne1b // do we have lines to process? // no lines to filter ret @@ -156,17 +161,17 @@ function ff_hevc_sao_edge_filter_8x8_8_neon, export=1 moviv2.16b, #2 add x16, x0, x2 lsl x2, x2, #1 -mov