Patches attached. - Andreas
From d06296ccf36da8132b567d75d161ff6a0127368a Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Tue, 27 May 2025 15:38:17 +0200 Subject: [PATCH 1/7] avcodec/asvenc: Fix crash with unaligned pointers/linesizes
This happens on systems where get_pixels really needs to be properly aligned, like ARMV7 or RISC-V. For these systems, 0401ca714a2714743573e27c384ffa810fd31a92 caused a bus error for the vsynth3-asv[12] tests, because the stride in these tests is unaligned. See e.g. https://fate.ffmpeg.org/report.cgi?slot=armv7-linux-gcc-13&time=20250527020548 https://fate.ffmpeg.org/report.cgi?slot=rv64gcvb-linux-gnu-gcc&time=20250527001827 It can also happen (even before said commit) if the pointers itself are unaligned, e.g. by using the crop filter: ffmpeg -filter_complex nullsrc=s=740x576:r=25,format=yuv420p,crop=w=720:x=2 \ -c:v asv2 -f null - The alignment requirements for the frames passed to encoders are mostly undocumented; the only thing I could find is the documentation of AVFrame.linesize: "For video the linesizes should be multiples of the CPUs alignment preference". This means that the FFmpeg cli violates our API. Yet as the above command line shows, it can also happen with unaligned pointers and there does not seem to be a prohibition of this, so we need to handle this case. This commit does so by using get_pixels_unaligned when needed. Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavcodec/asvenc.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c index bcdb5cfbe2..ba48aa8f08 100644 --- a/libavcodec/asvenc.c +++ b/libavcodec/asvenc.c @@ -45,6 +45,10 @@ typedef struct ASVEncContext { PutBitContext pb; + void (*get_pixels)(int16_t *restrict block, + const uint8_t *pixels, + ptrdiff_t stride); + PixblockDSPContext pdsp; FDCTDSPContext fdsp; DECLARE_ALIGNED(32, int16_t, block)[6][64]; @@ -219,16 +223,16 @@ static inline void dct_get(ASVEncContext *a, const AVFrame *frame, const uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8; const uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8; - a->pdsp.get_pixels(block[0], ptr_y, linesize); - a->pdsp.get_pixels(block[1], ptr_y + 8, linesize); - a->pdsp.get_pixels(block[2], ptr_y + 8 * linesize, linesize); - a->pdsp.get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize); + a->get_pixels(block[0], ptr_y, linesize); + a->get_pixels(block[1], ptr_y + 8, linesize); + a->get_pixels(block[2], ptr_y + 8 * linesize, linesize); + a->get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize); for (i = 0; i < 4; i++) a->fdsp.fdct(block[i]); if (!(a->c.avctx->flags & AV_CODEC_FLAG_GRAY)) { - a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]); - a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]); + a->get_pixels(block[4], ptr_cb, frame->linesize[1]); + a->get_pixels(block[5], ptr_cr, frame->linesize[2]); for (i = 4; i < 6; i++) a->fdsp.fdct(block[i]); } @@ -297,6 +301,13 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, if (ret < 0) return ret; + if ((uintptr_t)pict->data[0] & 7 || pict->linesize[0] & 7 || + (uintptr_t)pict->data[1] & 7 || pict->linesize[1] & 7 || + (uintptr_t)pict->data[2] & 7 || pict->linesize[2] & 7) + a->get_pixels = a->pdsp.get_pixels_unaligned; + else + a->get_pixels = a->pdsp.get_pixels; + init_put_bits(&a->pb, pkt->data, pkt->size); for (int mb_y = 0; mb_y < c->mb_height2; mb_y++) { -- 2.45.2
From 49a1efb0b2f3a0370da3ea8175c5477fa1cb2a26 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Tue, 27 May 2025 16:30:11 +0200 Subject: [PATCH 2/7] avcodec/dvenc: Check for unaligned pointers, strides Fixes segfaults on systems where PixblockDSPContext.get_pixels really requires to be properly aligned (e.g. ARMv7). Before this commit input created by -filter_complex nullsrc=s=740x576:r=25,format=yuv420p,crop=w=720:x=2 led to crashes. (The unaligned strides are in violation of the AVFrame.linesize documentation, unaligned pointers itself do not seem to be prohibited for encoders.) Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavcodec/dvenc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c index c7fc930b4b..5ff114da9f 100644 --- a/libavcodec/dvenc.c +++ b/libavcodec/dvenc.c @@ -63,6 +63,8 @@ typedef struct DVEncContext { DVwork_chunk work_chunks[4 * 12 * 27]; int quant_deadzone; + + PixblockDSPContext pdsp; } DVEncContext; @@ -70,7 +72,6 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) { DVEncContext *s = avctx->priv_data; FDCTDSPContext fdsp; - PixblockDSPContext pdsp; int ret; s->avctx = avctx; @@ -108,12 +109,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) } memset(&fdsp,0, sizeof(fdsp)); - memset(&pdsp,0, sizeof(pdsp)); ff_fdctdsp_init(&fdsp, avctx); - ff_pixblockdsp_init(&pdsp, avctx); - s->get_pixels = pdsp.get_pixels; s->fdct[0] = fdsp.fdct; s->fdct[1] = fdsp.fdct248; + ff_pixblockdsp_init(&s->pdsp, avctx); #if !CONFIG_HARDCODED_TABLES { @@ -1201,6 +1200,13 @@ static int dvvideo_encode_frame(AVCodecContext *c, AVPacket *pkt, DVEncContext *s = c->priv_data; int ret; + if ((uintptr_t)frame->data[0] & 7 || frame->linesize[0] & 7 || + (uintptr_t)frame->data[1] & 7 || frame->linesize[1] & 7 || + (uintptr_t)frame->data[2] & 7 || frame->linesize[2] & 7) + s->get_pixels = s->pdsp.get_pixels_unaligned; + else + s->get_pixels = s->pdsp.get_pixels; + if ((ret = ff_get_encode_buffer(c, pkt, s->sys->frame_size, 0)) < 0) return ret; /* Fixme: Only zero the part that is not overwritten later. */ -- 2.45.2
From 4ff3b21de5000d0e7cf0c99485b98671ea831bea Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Tue, 27 May 2025 17:09:34 +0200 Subject: [PATCH 3/7] avcodec/pixblockdsp: Improve 8 vs 16 bit check Before this commit, the input in get_pixels and get_pixels_unaligned has been treated inconsistenly: - The generic code treated 9, 10, 12 and 14 bits as 16bit input (these bits correspond to what FFmpeg's dsputils supported), everything with <= 8 bits as 8 bit and everything else as 8 bit when used via AVDCT (which exposes these functions and purports to support up to 14 bits). - AARCH64, ARM, PPC and RISC-V, x86 ignore this AVDCT special case. - RISC-V also ignored the restriction to 9, 10, 12 and 14 for its 16bit check and treated everything > 8 bits as 16bit. - The mmi MIPS code treats everything as 8 bit when used via AVDCT (this is certainly broken); otherwise it checks for <= 8 bits. The msa MIPS code behaves like the generic code. This commit changes this to treat 9..16 bits as 16 bit input, everything else as 8 bit (the former because it makes sense, the latter to preserve the behaviour for external users*). *: The only internal user of AVDCT (the spp filter) always uses 8, 9 or 10 bits. Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavcodec/aarch64/pixblockdsp_init_aarch64.c | 2 -- libavcodec/arm/pixblockdsp_init_arm.c | 2 -- libavcodec/mips/pixblockdsp_init_mips.c | 20 ++--------- libavcodec/pixblockdsp.c | 33 ++++++++----------- libavcodec/pixblockdsp.h | 12 +++---- libavcodec/ppc/pixblockdsp.c | 2 -- libavcodec/riscv/pixblockdsp_init.c | 2 -- libavcodec/x86/pixblockdsp_init.c | 1 - 8 files changed, 22 insertions(+), 52 deletions(-) diff --git a/libavcodec/aarch64/pixblockdsp_init_aarch64.c b/libavcodec/aarch64/pixblockdsp_init_aarch64.c index e4bac722f8..404f3680a6 100644 --- a/libavcodec/aarch64/pixblockdsp_init_aarch64.c +++ b/libavcodec/aarch64/pixblockdsp_init_aarch64.c @@ -21,7 +21,6 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/aarch64/cpu.h" -#include "libavcodec/avcodec.h" #include "libavcodec/pixblockdsp.h" void ff_get_pixels_neon(int16_t *block, const uint8_t *pixels, @@ -30,7 +29,6 @@ void ff_diff_pixels_neon(int16_t *block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); av_cold void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c, - AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); diff --git a/libavcodec/arm/pixblockdsp_init_arm.c b/libavcodec/arm/pixblockdsp_init_arm.c index 5481c0178c..121338ad0c 100644 --- a/libavcodec/arm/pixblockdsp_init_arm.c +++ b/libavcodec/arm/pixblockdsp_init_arm.c @@ -21,7 +21,6 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/arm/cpu.h" -#include "libavcodec/avcodec.h" #include "libavcodec/pixblockdsp.h" void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, @@ -39,7 +38,6 @@ void ff_diff_pixels_unaligned_neon(int16_t *block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c, - AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); diff --git a/libavcodec/mips/pixblockdsp_init_mips.c b/libavcodec/mips/pixblockdsp_init_mips.c index 00f189d558..acea95d36e 100644 --- a/libavcodec/mips/pixblockdsp_init_mips.c +++ b/libavcodec/mips/pixblockdsp_init_mips.c @@ -23,7 +23,7 @@ #include "libavcodec/pixblockdsp.h" #include "pixblockdsp_mips.h" -void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_mips(PixblockDSPContext *c, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); @@ -31,27 +31,13 @@ void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx, if (have_mmi(cpu_flags)) { c->diff_pixels = ff_diff_pixels_mmi; - if (!high_bit_depth || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { + if (!high_bit_depth) c->get_pixels = ff_get_pixels_8_mmi; - } } if (have_msa(cpu_flags)) { c->diff_pixels = ff_diff_pixels_msa; - switch (avctx->bits_per_raw_sample) { - case 9: - case 10: - case 12: - case 14: - c->get_pixels = ff_get_pixels_16_msa; - break; - default: - if (avctx->bits_per_raw_sample <= 8 || avctx->codec_type != - AVMEDIA_TYPE_VIDEO) { - c->get_pixels = ff_get_pixels_8_msa; - } - break; - } + c->get_pixels = high_bit_depth ? ff_get_pixels_16_msa : ff_get_pixels_8_msa; } } diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c index 1fff244511..78f1f9b5c7 100644 --- a/libavcodec/pixblockdsp.c +++ b/libavcodec/pixblockdsp.c @@ -87,38 +87,31 @@ static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1, av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) { - av_unused const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; + const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8 && + avctx->bits_per_raw_sample <= 16; c->diff_pixels_unaligned = c->diff_pixels = diff_pixels_c; - switch (avctx->bits_per_raw_sample) { - case 9: - case 10: - case 12: - case 14: + if (high_bit_depth) { c->get_pixels_unaligned = get_pixels_unaligned_16_c; - c->get_pixels = get_pixels_16_c; - break; - default: - if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { - c->get_pixels_unaligned = - c->get_pixels = get_pixels_8_c; - } - break; + c->get_pixels = get_pixels_16_c; + } else { + c->get_pixels_unaligned = + c->get_pixels = get_pixels_8_c; } #if ARCH_AARCH64 - ff_pixblockdsp_init_aarch64(c, avctx, high_bit_depth); + ff_pixblockdsp_init_aarch64(c, high_bit_depth); #elif ARCH_ARM - ff_pixblockdsp_init_arm(c, avctx, high_bit_depth); + ff_pixblockdsp_init_arm(c, high_bit_depth); #elif ARCH_PPC - ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth); + ff_pixblockdsp_init_ppc(c, high_bit_depth); #elif ARCH_RISCV - ff_pixblockdsp_init_riscv(c, avctx, high_bit_depth); + ff_pixblockdsp_init_riscv(c, high_bit_depth); #elif ARCH_X86 - ff_pixblockdsp_init_x86(c, avctx, high_bit_depth); + ff_pixblockdsp_init_x86(c, high_bit_depth); #elif ARCH_MIPS - ff_pixblockdsp_init_mips(c, avctx, high_bit_depth); + ff_pixblockdsp_init_mips(c, high_bit_depth); #endif } diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h index 215b0905d7..999aa8a926 100644 --- a/libavcodec/pixblockdsp.h +++ b/libavcodec/pixblockdsp.h @@ -42,17 +42,17 @@ typedef struct PixblockDSPContext { } PixblockDSPContext; void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); -void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c, unsigned high_bit_depth); -void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_arm(PixblockDSPContext *c, unsigned high_bit_depth); -void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, unsigned high_bit_depth); -void ff_pixblockdsp_init_riscv(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_riscv(PixblockDSPContext *c, unsigned high_bit_depth); -void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_x86(PixblockDSPContext *c, unsigned high_bit_depth); -void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx, +void ff_pixblockdsp_init_mips(PixblockDSPContext *c, unsigned high_bit_depth); #endif /* AVCODEC_PIXBLOCKDSP_H */ diff --git a/libavcodec/ppc/pixblockdsp.c b/libavcodec/ppc/pixblockdsp.c index 01d14b4124..75287b1e85 100644 --- a/libavcodec/ppc/pixblockdsp.c +++ b/libavcodec/ppc/pixblockdsp.c @@ -27,7 +27,6 @@ #include "libavutil/ppc/cpu.h" #include "libavutil/ppc/util_altivec.h" -#include "libavcodec/avcodec.h" #include "libavcodec/pixblockdsp.h" #if HAVE_ALTIVEC @@ -263,7 +262,6 @@ static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1, #endif /* HAVE_VSX */ av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, - AVCodecContext *avctx, unsigned high_bit_depth) { #if HAVE_ALTIVEC diff --git a/libavcodec/riscv/pixblockdsp_init.c b/libavcodec/riscv/pixblockdsp_init.c index 28caa99dff..e59fba63cc 100644 --- a/libavcodec/riscv/pixblockdsp_init.c +++ b/libavcodec/riscv/pixblockdsp_init.c @@ -24,7 +24,6 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" #include "libavutil/riscv/cpu.h" -#include "libavcodec/avcodec.h" #include "libavcodec/pixblockdsp.h" void ff_get_pixels_8_rvi(int16_t *block, const uint8_t *pixels, @@ -42,7 +41,6 @@ void ff_diff_pixels_unaligned_rvv(int16_t *block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c, - AVCodecContext *avctx, unsigned high_bit_depth) { #if HAVE_RV diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c index 51f2a0033a..f105775c2b 100644 --- a/libavcodec/x86/pixblockdsp_init.c +++ b/libavcodec/x86/pixblockdsp_init.c @@ -28,7 +28,6 @@ void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, - AVCodecContext *avctx, unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); -- 2.45.2
From 5c0d6d9bb8b7ac4a983d34e7a2e5c3737e93f49e Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Tue, 27 May 2025 19:13:45 +0200 Subject: [PATCH 4/7] avcodec/pixblockdsp: Pass bits_per_raw_sample directly Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavcodec/asvenc.c | 2 +- libavcodec/avdct.c | 2 +- libavcodec/dnxhdenc.c | 2 +- libavcodec/dvenc.c | 2 +- libavcodec/mpegvideo_enc.c | 2 +- libavcodec/pixblockdsp.c | 7 +++---- libavcodec/pixblockdsp.h | 5 ++--- tests/checkasm/pixblockdsp.c | 5 +---- 8 files changed, 11 insertions(+), 16 deletions(-) diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c index ba48aa8f08..159b070821 100644 --- a/libavcodec/asvenc.c +++ b/libavcodec/asvenc.c @@ -378,7 +378,7 @@ static av_cold int encode_init(AVCodecContext *avctx) ff_asv_common_init(avctx); ff_fdctdsp_init(&a->fdsp, avctx); - ff_pixblockdsp_init(&a->pdsp, avctx); + ff_pixblockdsp_init(&a->pdsp, 8); if (avctx->global_quality <= 0) avctx->global_quality = 4 * FF_QUALITY_SCALE; diff --git a/libavcodec/avdct.c b/libavcodec/avdct.c index f995e73eab..5322b181bc 100644 --- a/libavcodec/avdct.c +++ b/libavcodec/avdct.c @@ -119,7 +119,7 @@ int avcodec_dct_init(AVDCT *dsp) #if CONFIG_PIXBLOCKDSP { PixblockDSPContext pdsp; - ff_pixblockdsp_init(&pdsp, avctx); + ff_pixblockdsp_init(&pdsp, dsp->bits_per_sample); COPY(pdsp, get_pixels); COPY(pdsp, get_pixels_unaligned); } diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c index a8f8ab3cd9..7a5978c137 100644 --- a/libavcodec/dnxhdenc.c +++ b/libavcodec/dnxhdenc.c @@ -423,7 +423,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx) ff_fdctdsp_init(&ctx->m.fdsp, avctx); ff_mpv_idct_init(&ctx->m.c); ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx); - ff_pixblockdsp_init(&ctx->m.pdsp, avctx); + ff_pixblockdsp_init(&ctx->m.pdsp, ctx->bit_depth); ff_dct_encode_init(&ctx->m); if (ctx->profile != AV_PROFILE_DNXHD) diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c index 5ff114da9f..b6764e9c2c 100644 --- a/libavcodec/dvenc.c +++ b/libavcodec/dvenc.c @@ -112,7 +112,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) ff_fdctdsp_init(&fdsp, avctx); s->fdct[0] = fdsp.fdct; s->fdct[1] = fdsp.fdct248; - ff_pixblockdsp_init(&s->pdsp, avctx); + ff_pixblockdsp_init(&s->pdsp, 8); #if !CONFIG_HARDCODED_TABLES { diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index 233d729008..46901fc506 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -1031,7 +1031,7 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx) init_unquantize(s, avctx); ff_fdctdsp_init(&s->fdsp, avctx); ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); - ff_pixblockdsp_init(&s->pdsp, avctx); + ff_pixblockdsp_init(&s->pdsp, 8); ret = me_cmp_init(m, avctx); if (ret < 0) return ret; diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c index 78f1f9b5c7..110a374260 100644 --- a/libavcodec/pixblockdsp.c +++ b/libavcodec/pixblockdsp.c @@ -21,7 +21,6 @@ #include "config.h" #include "libavutil/attributes.h" #include "libavutil/intreadwrite.h" -#include "avcodec.h" #include "pixblockdsp.h" static void get_pixels_16_c(int16_t *restrict block, const uint8_t *pixels, @@ -85,10 +84,10 @@ static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1, } } -av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) +av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, int bits_per_raw_sample) { - const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8 && - avctx->bits_per_raw_sample <= 16; + const unsigned high_bit_depth = bits_per_raw_sample > 8 && + bits_per_raw_sample <= 16; c->diff_pixels_unaligned = c->diff_pixels = diff_pixels_c; diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h index 999aa8a926..487a39244d 100644 --- a/libavcodec/pixblockdsp.h +++ b/libavcodec/pixblockdsp.h @@ -19,10 +19,9 @@ #ifndef AVCODEC_PIXBLOCKDSP_H #define AVCODEC_PIXBLOCKDSP_H +#include <stddef.h> #include <stdint.h> -#include "avcodec.h" - typedef struct PixblockDSPContext { void (*get_pixels)(int16_t *restrict block /* align 16 */, const uint8_t *pixels /* align 8 */, @@ -41,7 +40,7 @@ typedef struct PixblockDSPContext { } PixblockDSPContext; -void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); +void ff_pixblockdsp_init(PixblockDSPContext *c, int bits_per_raw_sample); void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c, unsigned high_bit_depth); void ff_pixblockdsp_init_arm(PixblockDSPContext *c, diff --git a/tests/checkasm/pixblockdsp.c b/tests/checkasm/pixblockdsp.c index 26a697a346..79763de1ea 100644 --- a/tests/checkasm/pixblockdsp.c +++ b/tests/checkasm/pixblockdsp.c @@ -90,11 +90,8 @@ void checkasm_check_pixblockdsp(void) uint16_t *dst0 = (uint16_t *)dst0_; uint16_t *dst1 = (uint16_t *)dst1_; PixblockDSPContext h; - AVCodecContext avctx = { - .bits_per_raw_sample = 8, - }; - ff_pixblockdsp_init(&h, &avctx); + ff_pixblockdsp_init(&h, 8); if (check_func(h.get_pixels, "get_pixels")) check_get_pixels(uint8_t, 1); -- 2.45.2
From bf34e5c5f90b3e01d94b00e8a122c0b064862b54 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Tue, 27 May 2025 21:21:25 +0200 Subject: [PATCH 5/7] avcodec/pixblockdsp: Fix get_pixels alignment documentation Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavcodec/pixblockdsp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h index 487a39244d..8d767b2fd3 100644 --- a/libavcodec/pixblockdsp.h +++ b/libavcodec/pixblockdsp.h @@ -24,7 +24,7 @@ typedef struct PixblockDSPContext { void (*get_pixels)(int16_t *restrict block /* align 16 */, - const uint8_t *pixels /* align 8 */, + const uint8_t *pixels /* align 8 for <= 8 bit, 16 otherwise */, ptrdiff_t stride); void (*get_pixels_unaligned)(int16_t *restrict block /* align 16 */, const uint8_t *pixels, -- 2.45.2
From 5f5ea7a21858e7194bc0e455fd5bc45897bfbc83 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Tue, 27 May 2025 21:46:06 +0200 Subject: [PATCH 6/7] avcodec/asvenc,dvenc: Optimize unaligned checks away if possible For certain arches (AARCH64, x86, generic) get_pixels and get_pixels_unaligned always coincide for 8 bit input. In these cases it is possible to avoid checks for unaligned input in asvenc, dvenc. Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavcodec/asvenc.c | 7 ++++--- libavcodec/dvenc.c | 7 ++++--- libavcodec/pixblockdsp.h | 7 ++++++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c index 159b070821..883edd0468 100644 --- a/libavcodec/asvenc.c +++ b/libavcodec/asvenc.c @@ -301,9 +301,10 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, if (ret < 0) return ret; - if ((uintptr_t)pict->data[0] & 7 || pict->linesize[0] & 7 || - (uintptr_t)pict->data[1] & 7 || pict->linesize[1] & 7 || - (uintptr_t)pict->data[2] & 7 || pict->linesize[2] & 7) + if (!PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED && + ((uintptr_t)pict->data[0] & 7 || pict->linesize[0] & 7 || + (uintptr_t)pict->data[1] & 7 || pict->linesize[1] & 7 || + (uintptr_t)pict->data[2] & 7 || pict->linesize[2] & 7)) a->get_pixels = a->pdsp.get_pixels_unaligned; else a->get_pixels = a->pdsp.get_pixels; diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c index b6764e9c2c..a477b84261 100644 --- a/libavcodec/dvenc.c +++ b/libavcodec/dvenc.c @@ -1200,9 +1200,10 @@ static int dvvideo_encode_frame(AVCodecContext *c, AVPacket *pkt, DVEncContext *s = c->priv_data; int ret; - if ((uintptr_t)frame->data[0] & 7 || frame->linesize[0] & 7 || - (uintptr_t)frame->data[1] & 7 || frame->linesize[1] & 7 || - (uintptr_t)frame->data[2] & 7 || frame->linesize[2] & 7) + if (!PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED && + ((uintptr_t)frame->data[0] & 7 || frame->linesize[0] & 7 || + (uintptr_t)frame->data[1] & 7 || frame->linesize[1] & 7 || + (uintptr_t)frame->data[2] & 7 || frame->linesize[2] & 7)) s->get_pixels = s->pdsp.get_pixels_unaligned; else s->get_pixels = s->pdsp.get_pixels; diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h index 8d767b2fd3..d493d0e22b 100644 --- a/libavcodec/pixblockdsp.h +++ b/libavcodec/pixblockdsp.h @@ -22,9 +22,14 @@ #include <stddef.h> #include <stdint.h> +#define PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED \ + !(ARCH_ARM || ARCH_MIPS || ARCH_PPC || ARCH_RISCV) + typedef struct PixblockDSPContext { void (*get_pixels)(int16_t *restrict block /* align 16 */, - const uint8_t *pixels /* align 8 for <= 8 bit, 16 otherwise */, + /* align 16 for > 8 bits; align 8 for <= 8 bits + * (or 1 if PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED is set) */ + const uint8_t *pixels, ptrdiff_t stride); void (*get_pixels_unaligned)(int16_t *restrict block /* align 16 */, const uint8_t *pixels, -- 2.45.2
From d7cc7eea3d14154ac066c47577da86bbc36f3e9d Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinha...@outlook.com> Date: Tue, 27 May 2025 19:34:29 +0200 Subject: [PATCH 7/7] avfilter/x86/vf_spp: Remove permutation-specific code The MMX requantize functions have the MMX permutation (i.e. FF_IDCT_PERM_SIMPLE) hardcoded and therefore check for the used permutation (namely via a CRC). Yet this is very ugly and could even lead to misdetection; furthermore, since d7246ea9f229db64ed909d7446196128d6f53de0 the permutation used here is de-facto and since bfb28b5ce89f3e950214b67ea95b45e3355c2caf definitely impossible on x64, making this code dead on x64. So remove it. Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com> --- libavfilter/x86/vf_spp.c | 163 --------------------------------------- 1 file changed, 163 deletions(-) diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c index 498660d7d0..f8e5727bfc 100644 --- a/libavfilter/x86/vf_spp.c +++ b/libavfilter/x86/vf_spp.c @@ -21,159 +21,9 @@ #include "libavutil/attributes.h" #include "libavutil/cpu.h" -#include "libavutil/crc.h" -#include "libavutil/x86/asm.h" #include "libavfilter/vf_spp.h" #if HAVE_MMX_INLINE -static void hardthresh_mmx(int16_t dst[64], const int16_t src[64], - int qp, const uint8_t *permutation) -{ - int bias = 0; //FIXME - unsigned int threshold1; - - threshold1 = qp * ((1<<4) - bias) - 1; - -#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ - "movq " #src0 ", %%mm0 \n" \ - "movq " #src1 ", %%mm1 \n" \ - "movq " #src2 ", %%mm2 \n" \ - "movq " #src3 ", %%mm3 \n" \ - "psubw %%mm4, %%mm0 \n" \ - "psubw %%mm4, %%mm1 \n" \ - "psubw %%mm4, %%mm2 \n" \ - "psubw %%mm4, %%mm3 \n" \ - "paddusw %%mm5, %%mm0 \n" \ - "paddusw %%mm5, %%mm1 \n" \ - "paddusw %%mm5, %%mm2 \n" \ - "paddusw %%mm5, %%mm3 \n" \ - "paddw %%mm6, %%mm0 \n" \ - "paddw %%mm6, %%mm1 \n" \ - "paddw %%mm6, %%mm2 \n" \ - "paddw %%mm6, %%mm3 \n" \ - "psubusw %%mm6, %%mm0 \n" \ - "psubusw %%mm6, %%mm1 \n" \ - "psubusw %%mm6, %%mm2 \n" \ - "psubusw %%mm6, %%mm3 \n" \ - "psraw $3, %%mm0 \n" \ - "psraw $3, %%mm1 \n" \ - "psraw $3, %%mm2 \n" \ - "psraw $3, %%mm3 \n" \ - \ - "movq %%mm0, %%mm7 \n" \ - "punpcklwd %%mm2, %%mm0 \n" /*A*/ \ - "punpckhwd %%mm2, %%mm7 \n" /*C*/ \ - "movq %%mm1, %%mm2 \n" \ - "punpcklwd %%mm3, %%mm1 \n" /*B*/ \ - "punpckhwd %%mm3, %%mm2 \n" /*D*/ \ - "movq %%mm0, %%mm3 \n" \ - "punpcklwd %%mm1, %%mm0 \n" /*A*/ \ - "punpckhwd %%mm7, %%mm3 \n" /*C*/ \ - "punpcklwd %%mm2, %%mm7 \n" /*B*/ \ - "punpckhwd %%mm2, %%mm1 \n" /*D*/ \ - \ - "movq %%mm0, " #dst0 " \n" \ - "movq %%mm7, " #dst1 " \n" \ - "movq %%mm3, " #dst2 " \n" \ - "movq %%mm1, " #dst3 " \n" - - __asm__ volatile( - "movd %2, %%mm4 \n" - "movd %3, %%mm5 \n" - "movd %4, %%mm6 \n" - "packssdw %%mm4, %%mm4 \n" - "packssdw %%mm5, %%mm5 \n" - "packssdw %%mm6, %%mm6 \n" - "packssdw %%mm4, %%mm4 \n" - "packssdw %%mm5, %%mm5 \n" - "packssdw %%mm6, %%mm6 \n" - REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0)) - REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0)) - REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0)) - REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0)) - : : "r" (src), "r" (dst), "g" (threshold1+1), "g" (threshold1+5), "g" (threshold1-4) //FIXME maybe more accurate then needed? - ); - dst[0] = (src[0] + 4) >> 3; -} - -static void softthresh_mmx(int16_t dst[64], const int16_t src[64], - int qp, const uint8_t *permutation) -{ - int bias = 0; //FIXME - unsigned int threshold1; - - threshold1 = qp*((1<<4) - bias) - 1; - -#undef REQUANT_CORE -#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \ - "movq " #src0 ", %%mm0 \n" \ - "movq " #src1 ", %%mm1 \n" \ - "pxor %%mm6, %%mm6 \n" \ - "pxor %%mm7, %%mm7 \n" \ - "pcmpgtw %%mm0, %%mm6 \n" \ - "pcmpgtw %%mm1, %%mm7 \n" \ - "pxor %%mm6, %%mm0 \n" \ - "pxor %%mm7, %%mm1 \n" \ - "psubusw %%mm4, %%mm0 \n" \ - "psubusw %%mm4, %%mm1 \n" \ - "pxor %%mm6, %%mm0 \n" \ - "pxor %%mm7, %%mm1 \n" \ - "movq " #src2 ", %%mm2 \n" \ - "movq " #src3 ", %%mm3 \n" \ - "pxor %%mm6, %%mm6 \n" \ - "pxor %%mm7, %%mm7 \n" \ - "pcmpgtw %%mm2, %%mm6 \n" \ - "pcmpgtw %%mm3, %%mm7 \n" \ - "pxor %%mm6, %%mm2 \n" \ - "pxor %%mm7, %%mm3 \n" \ - "psubusw %%mm4, %%mm2 \n" \ - "psubusw %%mm4, %%mm3 \n" \ - "pxor %%mm6, %%mm2 \n" \ - "pxor %%mm7, %%mm3 \n" \ - \ - "paddsw %%mm5, %%mm0 \n" \ - "paddsw %%mm5, %%mm1 \n" \ - "paddsw %%mm5, %%mm2 \n" \ - "paddsw %%mm5, %%mm3 \n" \ - "psraw $3, %%mm0 \n" \ - "psraw $3, %%mm1 \n" \ - "psraw $3, %%mm2 \n" \ - "psraw $3, %%mm3 \n" \ - \ - "movq %%mm0, %%mm7 \n" \ - "punpcklwd %%mm2, %%mm0 \n" /*A*/ \ - "punpckhwd %%mm2, %%mm7 \n" /*C*/ \ - "movq %%mm1, %%mm2 \n" \ - "punpcklwd %%mm3, %%mm1 \n" /*B*/ \ - "punpckhwd %%mm3, %%mm2 \n" /*D*/ \ - "movq %%mm0, %%mm3 \n" \ - "punpcklwd %%mm1, %%mm0 \n" /*A*/ \ - "punpckhwd %%mm7, %%mm3 \n" /*C*/ \ - "punpcklwd %%mm2, %%mm7 \n" /*B*/ \ - "punpckhwd %%mm2, %%mm1 \n" /*D*/ \ - \ - "movq %%mm0, " #dst0 " \n" \ - "movq %%mm7, " #dst1 " \n" \ - "movq %%mm3, " #dst2 " \n" \ - "movq %%mm1, " #dst3 " \n" - - __asm__ volatile( - "movd %2, %%mm4 \n" - "movd %3, %%mm5 \n" - "packssdw %%mm4, %%mm4 \n" - "packssdw %%mm5, %%mm5 \n" - "packssdw %%mm4, %%mm4 \n" - "packssdw %%mm5, %%mm5 \n" - REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0)) - REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0)) - REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0)) - REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0)) - : : "r" (src), "r" (dst), "g" (threshold1), "rm" (4) //FIXME maybe more accurate then needed? - ); - - dst[0] = (src[0] + 4) >> 3; -} - static void store_slice_mmx(uint8_t *dst, const int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale, @@ -223,20 +73,7 @@ av_cold void ff_spp_init_x86(SPPContext *s) int cpu_flags = av_get_cpu_flags(); if (cpu_flags & AV_CPU_FLAG_MMX) { - static const uint32_t mmx_idct_perm_crc = 0xe5e8adc4; - uint32_t idct_perm_crc = - av_crc(av_crc_get_table(AV_CRC_32_IEEE), 0, - s->dct->idct_permutation, - sizeof(s->dct->idct_permutation)); - int64_t bps; s->store_slice = store_slice_mmx; - av_opt_get_int(s->dct, "bits_per_sample", 0, &bps); - if (bps <= 8 && idct_perm_crc == mmx_idct_perm_crc) { - switch (s->mode) { - case 0: s->requantize = hardthresh_mmx; break; - case 1: s->requantize = softthresh_mmx; break; - } - } } #endif } -- 2.45.2
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".