Patches attached.

- Andreas
From d06296ccf36da8132b567d75d161ff6a0127368a Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
Date: Tue, 27 May 2025 15:38:17 +0200
Subject: [PATCH 1/7] avcodec/asvenc: Fix crash with unaligned
 pointers/linesizes

This happens on systems where get_pixels really needs
to be properly aligned, like ARMV7 or RISC-V. For these
systems, 0401ca714a2714743573e27c384ffa810fd31a92 caused
a bus error for the vsynth3-asv[12] tests, because
the stride in these tests is unaligned. See e.g.
https://fate.ffmpeg.org/report.cgi?slot=armv7-linux-gcc-13&time=20250527020548
https://fate.ffmpeg.org/report.cgi?slot=rv64gcvb-linux-gnu-gcc&time=20250527001827

It can also happen (even before said commit) if the pointers
itself are unaligned, e.g. by using the crop filter:
ffmpeg -filter_complex nullsrc=s=740x576:r=25,format=yuv420p,crop=w=720:x=2 \
-c:v asv2 -f null -

The alignment requirements for the frames passed to encoders are
mostly undocumented; the only thing I could find is the documentation
of AVFrame.linesize: "For video the linesizes should be multiples
of the CPUs alignment preference". This means that the FFmpeg cli
violates our API.

Yet as the above command line shows, it can also happen with
unaligned pointers and there does not seem to be a prohibition
of this, so we need to handle this case. This commit does so
by using get_pixels_unaligned when needed.

Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
---
 libavcodec/asvenc.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c
index bcdb5cfbe2..ba48aa8f08 100644
--- a/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@ -45,6 +45,10 @@ typedef struct ASVEncContext {
 
     PutBitContext pb;
 
+    void (*get_pixels)(int16_t *restrict block,
+                       const uint8_t *pixels,
+                       ptrdiff_t stride);
+
     PixblockDSPContext pdsp;
     FDCTDSPContext fdsp;
     DECLARE_ALIGNED(32, int16_t, block)[6][64];
@@ -219,16 +223,16 @@ static inline void dct_get(ASVEncContext *a, const AVFrame *frame,
     const uint8_t *ptr_cb = frame->data[1] + (mb_y *  8 * frame->linesize[1]) + mb_x *  8;
     const uint8_t *ptr_cr = frame->data[2] + (mb_y *  8 * frame->linesize[2]) + mb_x *  8;
 
-    a->pdsp.get_pixels(block[0], ptr_y,                    linesize);
-    a->pdsp.get_pixels(block[1], ptr_y + 8,                linesize);
-    a->pdsp.get_pixels(block[2], ptr_y + 8 * linesize,     linesize);
-    a->pdsp.get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize);
+    a->get_pixels(block[0], ptr_y,                    linesize);
+    a->get_pixels(block[1], ptr_y + 8,                linesize);
+    a->get_pixels(block[2], ptr_y + 8 * linesize,     linesize);
+    a->get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize);
     for (i = 0; i < 4; i++)
         a->fdsp.fdct(block[i]);
 
     if (!(a->c.avctx->flags & AV_CODEC_FLAG_GRAY)) {
-        a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]);
-        a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]);
+        a->get_pixels(block[4], ptr_cb, frame->linesize[1]);
+        a->get_pixels(block[5], ptr_cr, frame->linesize[2]);
         for (i = 4; i < 6; i++)
             a->fdsp.fdct(block[i]);
     }
@@ -297,6 +301,13 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     if (ret < 0)
         return ret;
 
+    if ((uintptr_t)pict->data[0] & 7 || pict->linesize[0] & 7 ||
+        (uintptr_t)pict->data[1] & 7 || pict->linesize[1] & 7 ||
+        (uintptr_t)pict->data[2] & 7 || pict->linesize[2] & 7)
+        a->get_pixels = a->pdsp.get_pixels_unaligned;
+    else
+        a->get_pixels = a->pdsp.get_pixels;
+
     init_put_bits(&a->pb, pkt->data, pkt->size);
 
     for (int mb_y = 0; mb_y < c->mb_height2; mb_y++) {
-- 
2.45.2

From 49a1efb0b2f3a0370da3ea8175c5477fa1cb2a26 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
Date: Tue, 27 May 2025 16:30:11 +0200
Subject: [PATCH 2/7] avcodec/dvenc: Check for unaligned pointers, strides

Fixes segfaults on systems where PixblockDSPContext.get_pixels
really requires to be properly aligned (e.g. ARMv7).
Before this commit input created by
-filter_complex nullsrc=s=740x576:r=25,format=yuv420p,crop=w=720:x=2
led to crashes.

(The unaligned strides are in violation of the AVFrame.linesize
documentation, unaligned pointers itself do not seem to be
prohibited for encoders.)

Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
---
 libavcodec/dvenc.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c
index c7fc930b4b..5ff114da9f 100644
--- a/libavcodec/dvenc.c
+++ b/libavcodec/dvenc.c
@@ -63,6 +63,8 @@ typedef struct DVEncContext {
     DVwork_chunk work_chunks[4 * 12 * 27];
 
     int quant_deadzone;
+
+    PixblockDSPContext pdsp;
 } DVEncContext;
 
 
@@ -70,7 +72,6 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
 {
     DVEncContext *s = avctx->priv_data;
     FDCTDSPContext fdsp;
-    PixblockDSPContext pdsp;
     int ret;
 
     s->avctx = avctx;
@@ -108,12 +109,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
     }
 
     memset(&fdsp,0, sizeof(fdsp));
-    memset(&pdsp,0, sizeof(pdsp));
     ff_fdctdsp_init(&fdsp, avctx);
-    ff_pixblockdsp_init(&pdsp, avctx);
-    s->get_pixels = pdsp.get_pixels;
     s->fdct[0]    = fdsp.fdct;
     s->fdct[1]    = fdsp.fdct248;
+    ff_pixblockdsp_init(&s->pdsp, avctx);
 
 #if !CONFIG_HARDCODED_TABLES
     {
@@ -1201,6 +1200,13 @@ static int dvvideo_encode_frame(AVCodecContext *c, AVPacket *pkt,
     DVEncContext *s = c->priv_data;
     int ret;
 
+    if ((uintptr_t)frame->data[0] & 7 || frame->linesize[0] & 7 ||
+        (uintptr_t)frame->data[1] & 7 || frame->linesize[1] & 7 ||
+        (uintptr_t)frame->data[2] & 7 || frame->linesize[2] & 7)
+        s->get_pixels = s->pdsp.get_pixels_unaligned;
+    else
+        s->get_pixels = s->pdsp.get_pixels;
+
     if ((ret = ff_get_encode_buffer(c, pkt, s->sys->frame_size, 0)) < 0)
         return ret;
     /* Fixme: Only zero the part that is not overwritten later. */
-- 
2.45.2

From 4ff3b21de5000d0e7cf0c99485b98671ea831bea Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
Date: Tue, 27 May 2025 17:09:34 +0200
Subject: [PATCH 3/7] avcodec/pixblockdsp: Improve 8 vs 16 bit check

Before this commit, the input in get_pixels and get_pixels_unaligned
has been treated inconsistenly:
- The generic code treated 9, 10, 12 and 14 bits as 16bit input
(these bits correspond to what FFmpeg's dsputils supported),
everything with <= 8 bits as 8 bit and everything else as 8 bit
when used via AVDCT (which exposes these functions and purports
to support up to 14 bits).
- AARCH64, ARM, PPC and RISC-V, x86 ignore this AVDCT special case.
- RISC-V also ignored the restriction to 9, 10, 12 and 14 for its
16bit check and treated everything > 8 bits as 16bit.
- The mmi MIPS code treats everything as 8 bit when used via
AVDCT (this is certainly broken); otherwise it checks for <= 8 bits.
The msa MIPS code behaves like the generic code.

This commit changes this to treat 9..16 bits as 16 bit input,
everything else as 8 bit (the former because it makes sense,
the latter to preserve the behaviour for external users*).

*: The only internal user of AVDCT (the spp filter) always
uses 8, 9 or 10 bits.

Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
---
 libavcodec/aarch64/pixblockdsp_init_aarch64.c |  2 --
 libavcodec/arm/pixblockdsp_init_arm.c         |  2 --
 libavcodec/mips/pixblockdsp_init_mips.c       | 20 ++---------
 libavcodec/pixblockdsp.c                      | 33 ++++++++-----------
 libavcodec/pixblockdsp.h                      | 12 +++----
 libavcodec/ppc/pixblockdsp.c                  |  2 --
 libavcodec/riscv/pixblockdsp_init.c           |  2 --
 libavcodec/x86/pixblockdsp_init.c             |  1 -
 8 files changed, 22 insertions(+), 52 deletions(-)

diff --git a/libavcodec/aarch64/pixblockdsp_init_aarch64.c b/libavcodec/aarch64/pixblockdsp_init_aarch64.c
index e4bac722f8..404f3680a6 100644
--- a/libavcodec/aarch64/pixblockdsp_init_aarch64.c
+++ b/libavcodec/aarch64/pixblockdsp_init_aarch64.c
@@ -21,7 +21,6 @@
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/aarch64/cpu.h"
-#include "libavcodec/avcodec.h"
 #include "libavcodec/pixblockdsp.h"
 
 void ff_get_pixels_neon(int16_t *block, const uint8_t *pixels,
@@ -30,7 +29,6 @@ void ff_diff_pixels_neon(int16_t *block, const uint8_t *s1,
                          const uint8_t *s2, ptrdiff_t stride);
 
 av_cold void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c,
-                                         AVCodecContext *avctx,
                                          unsigned high_bit_depth)
 {
     int cpu_flags = av_get_cpu_flags();
diff --git a/libavcodec/arm/pixblockdsp_init_arm.c b/libavcodec/arm/pixblockdsp_init_arm.c
index 5481c0178c..121338ad0c 100644
--- a/libavcodec/arm/pixblockdsp_init_arm.c
+++ b/libavcodec/arm/pixblockdsp_init_arm.c
@@ -21,7 +21,6 @@
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/arm/cpu.h"
-#include "libavcodec/avcodec.h"
 #include "libavcodec/pixblockdsp.h"
 
 void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels,
@@ -39,7 +38,6 @@ void ff_diff_pixels_unaligned_neon(int16_t *block, const uint8_t *s1,
                                    const uint8_t *s2, ptrdiff_t stride);
 
 av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
-                                     AVCodecContext *avctx,
                                      unsigned high_bit_depth)
 {
     int cpu_flags = av_get_cpu_flags();
diff --git a/libavcodec/mips/pixblockdsp_init_mips.c b/libavcodec/mips/pixblockdsp_init_mips.c
index 00f189d558..acea95d36e 100644
--- a/libavcodec/mips/pixblockdsp_init_mips.c
+++ b/libavcodec/mips/pixblockdsp_init_mips.c
@@ -23,7 +23,7 @@
 #include "libavcodec/pixblockdsp.h"
 #include "pixblockdsp_mips.h"
 
-void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx,
+void ff_pixblockdsp_init_mips(PixblockDSPContext *c,
                               unsigned high_bit_depth)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -31,27 +31,13 @@ void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx,
     if (have_mmi(cpu_flags)) {
         c->diff_pixels = ff_diff_pixels_mmi;
 
-        if (!high_bit_depth || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
+        if (!high_bit_depth)
             c->get_pixels = ff_get_pixels_8_mmi;
-        }
     }
 
     if (have_msa(cpu_flags)) {
         c->diff_pixels = ff_diff_pixels_msa;
 
-        switch (avctx->bits_per_raw_sample) {
-        case 9:
-        case 10:
-        case 12:
-        case 14:
-            c->get_pixels = ff_get_pixels_16_msa;
-            break;
-        default:
-            if (avctx->bits_per_raw_sample <= 8 || avctx->codec_type !=
-                AVMEDIA_TYPE_VIDEO) {
-                c->get_pixels = ff_get_pixels_8_msa;
-            }
-            break;
-        }
+        c->get_pixels = high_bit_depth ? ff_get_pixels_16_msa : ff_get_pixels_8_msa;
     }
 }
diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index 1fff244511..78f1f9b5c7 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -87,38 +87,31 @@ static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
 
 av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
 {
-    av_unused const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
+    const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8 &&
+                                    avctx->bits_per_raw_sample <= 16;
 
     c->diff_pixels_unaligned =
     c->diff_pixels = diff_pixels_c;
 
-    switch (avctx->bits_per_raw_sample) {
-    case 9:
-    case 10:
-    case 12:
-    case 14:
+    if (high_bit_depth) {
         c->get_pixels_unaligned = get_pixels_unaligned_16_c;
-        c->get_pixels = get_pixels_16_c;
-        break;
-    default:
-        if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
-            c->get_pixels_unaligned =
-            c->get_pixels = get_pixels_8_c;
-        }
-        break;
+        c->get_pixels           = get_pixels_16_c;
+    } else {
+        c->get_pixels_unaligned =
+        c->get_pixels           = get_pixels_8_c;
     }
 
 #if ARCH_AARCH64
-    ff_pixblockdsp_init_aarch64(c, avctx, high_bit_depth);
+    ff_pixblockdsp_init_aarch64(c, high_bit_depth);
 #elif ARCH_ARM
-    ff_pixblockdsp_init_arm(c, avctx, high_bit_depth);
+    ff_pixblockdsp_init_arm(c, high_bit_depth);
 #elif ARCH_PPC
-    ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth);
+    ff_pixblockdsp_init_ppc(c, high_bit_depth);
 #elif ARCH_RISCV
-    ff_pixblockdsp_init_riscv(c, avctx, high_bit_depth);
+    ff_pixblockdsp_init_riscv(c, high_bit_depth);
 #elif ARCH_X86
-    ff_pixblockdsp_init_x86(c, avctx, high_bit_depth);
+    ff_pixblockdsp_init_x86(c, high_bit_depth);
 #elif ARCH_MIPS
-    ff_pixblockdsp_init_mips(c, avctx, high_bit_depth);
+    ff_pixblockdsp_init_mips(c, high_bit_depth);
 #endif
 }
diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
index 215b0905d7..999aa8a926 100644
--- a/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@ -42,17 +42,17 @@ typedef struct PixblockDSPContext {
 } PixblockDSPContext;
 
 void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
-void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c, AVCodecContext *avctx,
+void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c,
                                  unsigned high_bit_depth);
-void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx,
+void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
                              unsigned high_bit_depth);
-void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx,
+void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
                              unsigned high_bit_depth);
-void ff_pixblockdsp_init_riscv(PixblockDSPContext *c, AVCodecContext *avctx,
+void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
                                unsigned high_bit_depth);
-void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx,
+void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
                              unsigned high_bit_depth);
-void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx,
+void ff_pixblockdsp_init_mips(PixblockDSPContext *c,
                               unsigned high_bit_depth);
 
 #endif /* AVCODEC_PIXBLOCKDSP_H */
diff --git a/libavcodec/ppc/pixblockdsp.c b/libavcodec/ppc/pixblockdsp.c
index 01d14b4124..75287b1e85 100644
--- a/libavcodec/ppc/pixblockdsp.c
+++ b/libavcodec/ppc/pixblockdsp.c
@@ -27,7 +27,6 @@
 #include "libavutil/ppc/cpu.h"
 #include "libavutil/ppc/util_altivec.h"
 
-#include "libavcodec/avcodec.h"
 #include "libavcodec/pixblockdsp.h"
 
 #if HAVE_ALTIVEC
@@ -263,7 +262,6 @@ static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
 #endif /* HAVE_VSX */
 
 av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
-                                     AVCodecContext *avctx,
                                      unsigned high_bit_depth)
 {
 #if HAVE_ALTIVEC
diff --git a/libavcodec/riscv/pixblockdsp_init.c b/libavcodec/riscv/pixblockdsp_init.c
index 28caa99dff..e59fba63cc 100644
--- a/libavcodec/riscv/pixblockdsp_init.c
+++ b/libavcodec/riscv/pixblockdsp_init.c
@@ -24,7 +24,6 @@
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/riscv/cpu.h"
-#include "libavcodec/avcodec.h"
 #include "libavcodec/pixblockdsp.h"
 
 void ff_get_pixels_8_rvi(int16_t *block, const uint8_t *pixels,
@@ -42,7 +41,6 @@ void ff_diff_pixels_unaligned_rvv(int16_t *block, const uint8_t *s1,
                                   const uint8_t *s2, ptrdiff_t stride);
 
 av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
-                                       AVCodecContext *avctx,
                                        unsigned high_bit_depth)
 {
 #if HAVE_RV
diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c
index 51f2a0033a..f105775c2b 100644
--- a/libavcodec/x86/pixblockdsp_init.c
+++ b/libavcodec/x86/pixblockdsp_init.c
@@ -28,7 +28,6 @@ void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
                          ptrdiff_t stride);
 
 av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
-                                     AVCodecContext *avctx,
                                      unsigned high_bit_depth)
 {
     int cpu_flags = av_get_cpu_flags();
-- 
2.45.2

From 5c0d6d9bb8b7ac4a983d34e7a2e5c3737e93f49e Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
Date: Tue, 27 May 2025 19:13:45 +0200
Subject: [PATCH 4/7] avcodec/pixblockdsp: Pass bits_per_raw_sample directly

Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
---
 libavcodec/asvenc.c          | 2 +-
 libavcodec/avdct.c           | 2 +-
 libavcodec/dnxhdenc.c        | 2 +-
 libavcodec/dvenc.c           | 2 +-
 libavcodec/mpegvideo_enc.c   | 2 +-
 libavcodec/pixblockdsp.c     | 7 +++----
 libavcodec/pixblockdsp.h     | 5 ++---
 tests/checkasm/pixblockdsp.c | 5 +----
 8 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c
index ba48aa8f08..159b070821 100644
--- a/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@ -378,7 +378,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
     ff_asv_common_init(avctx);
     ff_fdctdsp_init(&a->fdsp, avctx);
-    ff_pixblockdsp_init(&a->pdsp, avctx);
+    ff_pixblockdsp_init(&a->pdsp, 8);
 
     if (avctx->global_quality <= 0)
         avctx->global_quality = 4 * FF_QUALITY_SCALE;
diff --git a/libavcodec/avdct.c b/libavcodec/avdct.c
index f995e73eab..5322b181bc 100644
--- a/libavcodec/avdct.c
+++ b/libavcodec/avdct.c
@@ -119,7 +119,7 @@ int avcodec_dct_init(AVDCT *dsp)
 #if CONFIG_PIXBLOCKDSP
     {
         PixblockDSPContext pdsp;
-        ff_pixblockdsp_init(&pdsp, avctx);
+        ff_pixblockdsp_init(&pdsp, dsp->bits_per_sample);
         COPY(pdsp, get_pixels);
         COPY(pdsp, get_pixels_unaligned);
     }
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index a8f8ab3cd9..7a5978c137 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -423,7 +423,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
     ff_fdctdsp_init(&ctx->m.fdsp, avctx);
     ff_mpv_idct_init(&ctx->m.c);
     ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
-    ff_pixblockdsp_init(&ctx->m.pdsp, avctx);
+    ff_pixblockdsp_init(&ctx->m.pdsp, ctx->bit_depth);
     ff_dct_encode_init(&ctx->m);
 
     if (ctx->profile != AV_PROFILE_DNXHD)
diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c
index 5ff114da9f..b6764e9c2c 100644
--- a/libavcodec/dvenc.c
+++ b/libavcodec/dvenc.c
@@ -112,7 +112,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
     ff_fdctdsp_init(&fdsp, avctx);
     s->fdct[0]    = fdsp.fdct;
     s->fdct[1]    = fdsp.fdct248;
-    ff_pixblockdsp_init(&s->pdsp, avctx);
+    ff_pixblockdsp_init(&s->pdsp, 8);
 
 #if !CONFIG_HARDCODED_TABLES
     {
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 233d729008..46901fc506 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -1031,7 +1031,7 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
     init_unquantize(s, avctx);
     ff_fdctdsp_init(&s->fdsp, avctx);
     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
-    ff_pixblockdsp_init(&s->pdsp, avctx);
+    ff_pixblockdsp_init(&s->pdsp, 8);
     ret = me_cmp_init(m, avctx);
     if (ret < 0)
         return ret;
diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index 78f1f9b5c7..110a374260 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -21,7 +21,6 @@
 #include "config.h"
 #include "libavutil/attributes.h"
 #include "libavutil/intreadwrite.h"
-#include "avcodec.h"
 #include "pixblockdsp.h"
 
 static void get_pixels_16_c(int16_t *restrict block, const uint8_t *pixels,
@@ -85,10 +84,10 @@ static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
     }
 }
 
-av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
+av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, int bits_per_raw_sample)
 {
-    const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8 &&
-                                    avctx->bits_per_raw_sample <= 16;
+    const unsigned high_bit_depth = bits_per_raw_sample > 8 &&
+                                    bits_per_raw_sample <= 16;
 
     c->diff_pixels_unaligned =
     c->diff_pixels = diff_pixels_c;
diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
index 999aa8a926..487a39244d 100644
--- a/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@ -19,10 +19,9 @@
 #ifndef AVCODEC_PIXBLOCKDSP_H
 #define AVCODEC_PIXBLOCKDSP_H
 
+#include <stddef.h>
 #include <stdint.h>
 
-#include "avcodec.h"
-
 typedef struct PixblockDSPContext {
     void (*get_pixels)(int16_t *restrict block /* align 16 */,
                        const uint8_t *pixels /* align 8 */,
@@ -41,7 +40,7 @@ typedef struct PixblockDSPContext {
 
 } PixblockDSPContext;
 
-void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
+void ff_pixblockdsp_init(PixblockDSPContext *c, int bits_per_raw_sample);
 void ff_pixblockdsp_init_aarch64(PixblockDSPContext *c,
                                  unsigned high_bit_depth);
 void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
diff --git a/tests/checkasm/pixblockdsp.c b/tests/checkasm/pixblockdsp.c
index 26a697a346..79763de1ea 100644
--- a/tests/checkasm/pixblockdsp.c
+++ b/tests/checkasm/pixblockdsp.c
@@ -90,11 +90,8 @@ void checkasm_check_pixblockdsp(void)
     uint16_t *dst0 = (uint16_t *)dst0_;
     uint16_t *dst1 = (uint16_t *)dst1_;
     PixblockDSPContext h;
-    AVCodecContext avctx = {
-        .bits_per_raw_sample = 8,
-    };
 
-    ff_pixblockdsp_init(&h, &avctx);
+    ff_pixblockdsp_init(&h, 8);
 
     if (check_func(h.get_pixels, "get_pixels"))
         check_get_pixels(uint8_t, 1);
-- 
2.45.2

From bf34e5c5f90b3e01d94b00e8a122c0b064862b54 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
Date: Tue, 27 May 2025 21:21:25 +0200
Subject: [PATCH 5/7] avcodec/pixblockdsp: Fix get_pixels alignment
 documentation

Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
---
 libavcodec/pixblockdsp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
index 487a39244d..8d767b2fd3 100644
--- a/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@ -24,7 +24,7 @@
 
 typedef struct PixblockDSPContext {
     void (*get_pixels)(int16_t *restrict block /* align 16 */,
-                       const uint8_t *pixels /* align 8 */,
+                       const uint8_t *pixels /* align 8 for <= 8 bit, 16 otherwise */,
                        ptrdiff_t stride);
     void (*get_pixels_unaligned)(int16_t *restrict block /* align 16 */,
                        const uint8_t *pixels,
-- 
2.45.2

From 5f5ea7a21858e7194bc0e455fd5bc45897bfbc83 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
Date: Tue, 27 May 2025 21:46:06 +0200
Subject: [PATCH 6/7] avcodec/asvenc,dvenc: Optimize unaligned checks away if
 possible

For certain arches (AARCH64, x86, generic) get_pixels and
get_pixels_unaligned always coincide for 8 bit input.
In these cases it is possible to avoid checks for unaligned
input in asvenc, dvenc.

Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
---
 libavcodec/asvenc.c      | 7 ++++---
 libavcodec/dvenc.c       | 7 ++++---
 libavcodec/pixblockdsp.h | 7 ++++++-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c
index 159b070821..883edd0468 100644
--- a/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@ -301,9 +301,10 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     if (ret < 0)
         return ret;
 
-    if ((uintptr_t)pict->data[0] & 7 || pict->linesize[0] & 7 ||
-        (uintptr_t)pict->data[1] & 7 || pict->linesize[1] & 7 ||
-        (uintptr_t)pict->data[2] & 7 || pict->linesize[2] & 7)
+    if (!PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED &&
+        ((uintptr_t)pict->data[0] & 7 || pict->linesize[0] & 7 ||
+         (uintptr_t)pict->data[1] & 7 || pict->linesize[1] & 7 ||
+         (uintptr_t)pict->data[2] & 7 || pict->linesize[2] & 7))
         a->get_pixels = a->pdsp.get_pixels_unaligned;
     else
         a->get_pixels = a->pdsp.get_pixels;
diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c
index b6764e9c2c..a477b84261 100644
--- a/libavcodec/dvenc.c
+++ b/libavcodec/dvenc.c
@@ -1200,9 +1200,10 @@ static int dvvideo_encode_frame(AVCodecContext *c, AVPacket *pkt,
     DVEncContext *s = c->priv_data;
     int ret;
 
-    if ((uintptr_t)frame->data[0] & 7 || frame->linesize[0] & 7 ||
-        (uintptr_t)frame->data[1] & 7 || frame->linesize[1] & 7 ||
-        (uintptr_t)frame->data[2] & 7 || frame->linesize[2] & 7)
+    if (!PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED &&
+        ((uintptr_t)frame->data[0] & 7 || frame->linesize[0] & 7 ||
+         (uintptr_t)frame->data[1] & 7 || frame->linesize[1] & 7 ||
+         (uintptr_t)frame->data[2] & 7 || frame->linesize[2] & 7))
         s->get_pixels = s->pdsp.get_pixels_unaligned;
     else
         s->get_pixels = s->pdsp.get_pixels;
diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
index 8d767b2fd3..d493d0e22b 100644
--- a/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@ -22,9 +22,14 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#define PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED \
+    !(ARCH_ARM || ARCH_MIPS || ARCH_PPC || ARCH_RISCV)
+
 typedef struct PixblockDSPContext {
     void (*get_pixels)(int16_t *restrict block /* align 16 */,
-                       const uint8_t *pixels /* align 8 for <= 8 bit, 16 otherwise */,
+                       /* align 16 for > 8 bits; align 8 for <= 8 bits
+                        * (or 1 if PIXBLOCKDSP_8BPP_GET_PIXELS_SUPPORTS_UNALIGNED is set) */
+                       const uint8_t *pixels,
                        ptrdiff_t stride);
     void (*get_pixels_unaligned)(int16_t *restrict block /* align 16 */,
                        const uint8_t *pixels,
-- 
2.45.2

From d7cc7eea3d14154ac066c47577da86bbc36f3e9d Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
Date: Tue, 27 May 2025 19:34:29 +0200
Subject: [PATCH 7/7] avfilter/x86/vf_spp: Remove permutation-specific code

The MMX requantize functions have the MMX permutation
(i.e. FF_IDCT_PERM_SIMPLE) hardcoded and therefore
check for the used permutation (namely via a CRC).
Yet this is very ugly and could even lead to misdetection;
furthermore, since d7246ea9f229db64ed909d7446196128d6f53de0
the permutation used here is de-facto and since
bfb28b5ce89f3e950214b67ea95b45e3355c2caf definitely
impossible on x64, making this code dead on x64.
So remove it.

Signed-off-by: Andreas Rheinhardt <andreas.rheinha...@outlook.com>
---
 libavfilter/x86/vf_spp.c | 163 ---------------------------------------
 1 file changed, 163 deletions(-)

diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c
index 498660d7d0..f8e5727bfc 100644
--- a/libavfilter/x86/vf_spp.c
+++ b/libavfilter/x86/vf_spp.c
@@ -21,159 +21,9 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
-#include "libavutil/crc.h"
-#include "libavutil/x86/asm.h"
 #include "libavfilter/vf_spp.h"
 
 #if HAVE_MMX_INLINE
-static void hardthresh_mmx(int16_t dst[64], const int16_t src[64],
-                           int qp, const uint8_t *permutation)
-{
-    int bias = 0; //FIXME
-    unsigned int threshold1;
-
-    threshold1 = qp * ((1<<4) - bias) - 1;
-
-#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3)    \
-    "movq " #src0 ", %%mm0      \n"                                     \
-    "movq " #src1 ", %%mm1      \n"                                     \
-    "movq " #src2 ", %%mm2      \n"                                     \
-    "movq " #src3 ", %%mm3      \n"                                     \
-    "psubw %%mm4, %%mm0         \n"                                     \
-    "psubw %%mm4, %%mm1         \n"                                     \
-    "psubw %%mm4, %%mm2         \n"                                     \
-    "psubw %%mm4, %%mm3         \n"                                     \
-    "paddusw %%mm5, %%mm0       \n"                                     \
-    "paddusw %%mm5, %%mm1       \n"                                     \
-    "paddusw %%mm5, %%mm2       \n"                                     \
-    "paddusw %%mm5, %%mm3       \n"                                     \
-    "paddw %%mm6, %%mm0         \n"                                     \
-    "paddw %%mm6, %%mm1         \n"                                     \
-    "paddw %%mm6, %%mm2         \n"                                     \
-    "paddw %%mm6, %%mm3         \n"                                     \
-    "psubusw %%mm6, %%mm0       \n"                                     \
-    "psubusw %%mm6, %%mm1       \n"                                     \
-    "psubusw %%mm6, %%mm2       \n"                                     \
-    "psubusw %%mm6, %%mm3       \n"                                     \
-    "psraw $3, %%mm0            \n"                                     \
-    "psraw $3, %%mm1            \n"                                     \
-    "psraw $3, %%mm2            \n"                                     \
-    "psraw $3, %%mm3            \n"                                     \
-                                                                        \
-    "movq %%mm0, %%mm7          \n"                                     \
-    "punpcklwd %%mm2, %%mm0     \n" /*A*/                               \
-    "punpckhwd %%mm2, %%mm7     \n" /*C*/                               \
-    "movq %%mm1, %%mm2          \n"                                     \
-    "punpcklwd %%mm3, %%mm1     \n" /*B*/                               \
-    "punpckhwd %%mm3, %%mm2     \n" /*D*/                               \
-    "movq %%mm0, %%mm3          \n"                                     \
-    "punpcklwd %%mm1, %%mm0     \n" /*A*/                               \
-    "punpckhwd %%mm7, %%mm3     \n" /*C*/                               \
-    "punpcklwd %%mm2, %%mm7     \n" /*B*/                               \
-    "punpckhwd %%mm2, %%mm1     \n" /*D*/                               \
-                                                                        \
-    "movq %%mm0, " #dst0 "      \n"                                     \
-    "movq %%mm7, " #dst1 "      \n"                                     \
-    "movq %%mm3, " #dst2 "      \n"                                     \
-    "movq %%mm1, " #dst3 "      \n"
-
-    __asm__ volatile(
-        "movd %2, %%mm4             \n"
-        "movd %3, %%mm5             \n"
-        "movd %4, %%mm6             \n"
-        "packssdw %%mm4, %%mm4      \n"
-        "packssdw %%mm5, %%mm5      \n"
-        "packssdw %%mm6, %%mm6      \n"
-        "packssdw %%mm4, %%mm4      \n"
-        "packssdw %%mm5, %%mm5      \n"
-        "packssdw %%mm6, %%mm6      \n"
-        REQUANT_CORE(  (%1),  8(%1), 16(%1), 24(%1),  (%0), 8(%0), 64(%0), 72(%0))
-        REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
-        REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
-        REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
-        : : "r" (src), "r" (dst), "g" (threshold1+1), "g" (threshold1+5), "g" (threshold1-4) //FIXME maybe more accurate then needed?
-    );
-    dst[0] = (src[0] + 4) >> 3;
-}
-
-static void softthresh_mmx(int16_t dst[64], const int16_t src[64],
-                           int qp, const uint8_t *permutation)
-{
-    int bias = 0; //FIXME
-    unsigned int threshold1;
-
-    threshold1 = qp*((1<<4) - bias) - 1;
-
-#undef REQUANT_CORE
-#define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3)    \
-    "movq " #src0 ", %%mm0      \n"                                     \
-    "movq " #src1 ", %%mm1      \n"                                     \
-    "pxor %%mm6, %%mm6          \n"                                     \
-    "pxor %%mm7, %%mm7          \n"                                     \
-    "pcmpgtw %%mm0, %%mm6       \n"                                     \
-    "pcmpgtw %%mm1, %%mm7       \n"                                     \
-    "pxor %%mm6, %%mm0          \n"                                     \
-    "pxor %%mm7, %%mm1          \n"                                     \
-    "psubusw %%mm4, %%mm0       \n"                                     \
-    "psubusw %%mm4, %%mm1       \n"                                     \
-    "pxor %%mm6, %%mm0          \n"                                     \
-    "pxor %%mm7, %%mm1          \n"                                     \
-    "movq " #src2 ", %%mm2      \n"                                     \
-    "movq " #src3 ", %%mm3      \n"                                     \
-    "pxor %%mm6, %%mm6          \n"                                     \
-    "pxor %%mm7, %%mm7          \n"                                     \
-    "pcmpgtw %%mm2, %%mm6       \n"                                     \
-    "pcmpgtw %%mm3, %%mm7       \n"                                     \
-    "pxor %%mm6, %%mm2          \n"                                     \
-    "pxor %%mm7, %%mm3          \n"                                     \
-    "psubusw %%mm4, %%mm2       \n"                                     \
-    "psubusw %%mm4, %%mm3       \n"                                     \
-    "pxor %%mm6, %%mm2          \n"                                     \
-    "pxor %%mm7, %%mm3          \n"                                     \
-                                                                        \
-    "paddsw %%mm5, %%mm0        \n"                                     \
-    "paddsw %%mm5, %%mm1        \n"                                     \
-    "paddsw %%mm5, %%mm2        \n"                                     \
-    "paddsw %%mm5, %%mm3        \n"                                     \
-    "psraw $3, %%mm0            \n"                                     \
-    "psraw $3, %%mm1            \n"                                     \
-    "psraw $3, %%mm2            \n"                                     \
-    "psraw $3, %%mm3            \n"                                     \
-                                                                        \
-    "movq %%mm0, %%mm7          \n"                                     \
-    "punpcklwd %%mm2, %%mm0     \n" /*A*/                               \
-    "punpckhwd %%mm2, %%mm7     \n" /*C*/                               \
-    "movq %%mm1, %%mm2          \n"                                     \
-    "punpcklwd %%mm3, %%mm1     \n" /*B*/                               \
-    "punpckhwd %%mm3, %%mm2     \n" /*D*/                               \
-    "movq %%mm0, %%mm3          \n"                                     \
-    "punpcklwd %%mm1, %%mm0     \n" /*A*/                               \
-    "punpckhwd %%mm7, %%mm3     \n" /*C*/                               \
-    "punpcklwd %%mm2, %%mm7     \n" /*B*/                               \
-    "punpckhwd %%mm2, %%mm1     \n" /*D*/                               \
-                                                                        \
-    "movq %%mm0, " #dst0 "      \n"                                     \
-    "movq %%mm7, " #dst1 "      \n"                                     \
-    "movq %%mm3, " #dst2 "      \n"                                     \
-    "movq %%mm1, " #dst3 "      \n"
-
-    __asm__ volatile(
-        "movd %2, %%mm4             \n"
-        "movd %3, %%mm5             \n"
-        "packssdw %%mm4, %%mm4      \n"
-        "packssdw %%mm5, %%mm5      \n"
-        "packssdw %%mm4, %%mm4      \n"
-        "packssdw %%mm5, %%mm5      \n"
-        REQUANT_CORE(  (%1),  8(%1), 16(%1), 24(%1),  (%0), 8(%0), 64(%0), 72(%0))
-        REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
-        REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
-        REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
-        : : "r" (src), "r" (dst), "g" (threshold1), "rm" (4) //FIXME maybe more accurate then needed?
-    );
-
-    dst[0] = (src[0] + 4) >> 3;
-}
-
 static void store_slice_mmx(uint8_t *dst, const int16_t *src,
                             int dst_stride, int src_stride,
                             int width, int height, int log2_scale,
@@ -223,20 +73,7 @@ av_cold void ff_spp_init_x86(SPPContext *s)
     int cpu_flags = av_get_cpu_flags();
 
     if (cpu_flags & AV_CPU_FLAG_MMX) {
-        static const uint32_t mmx_idct_perm_crc = 0xe5e8adc4;
-        uint32_t idct_perm_crc =
-            av_crc(av_crc_get_table(AV_CRC_32_IEEE), 0,
-                   s->dct->idct_permutation,
-                   sizeof(s->dct->idct_permutation));
-        int64_t bps;
         s->store_slice = store_slice_mmx;
-        av_opt_get_int(s->dct, "bits_per_sample", 0, &bps);
-        if (bps <= 8 && idct_perm_crc == mmx_idct_perm_crc) {
-            switch (s->mode) {
-            case 0: s->requantize = hardthresh_mmx; break;
-            case 1: s->requantize = softthresh_mmx; break;
-            }
-        }
     }
 #endif
 }
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to