On 11/14/2024 11:30 AM, Kyosuke Kawakami wrote:
Signed-off-by: Kyosuke Kawakami <kawakami150...@gmail.com>
---
  tests/checkasm/Makefile   |  1 +
  tests/checkasm/checkasm.c |  3 ++
  tests/checkasm/checkasm.h |  1 +
  tests/checkasm/diracdsp.c | 86 +++++++++++++++++++++++++++++++++++++++
  tests/fate/checkasm.mak   |  1 +
  5 files changed, 92 insertions(+)
  create mode 100644 tests/checkasm/diracdsp.c

[...]

diff --git a/tests/checkasm/diracdsp.c b/tests/checkasm/diracdsp.c
new file mode 100644
index 0000000000..8833c2d223
--- /dev/null
+++ b/tests/checkasm/diracdsp.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2024 Kyosuke Kawakami
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "checkasm.h"
+
+#include "libavcodec/diracdsp.h"
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem_internal.h"
+
+#define RANDOMIZE_DESTS(name, size)             \
+    do {                                        \
+        int i;                                  \
+        for (i = 0; i < size; ++i) {            \
+            uint16_t r = rnd();                 \
+            AV_WN16A(name##0 + i, r);           \
+            AV_WN16A(name##1 + i, r);           \
+        }                                       \
+    } while (0)
+
+#define RANDOMIZE_BUFFER8(name, size)         \
+    do {                                      \
+        int i;                                \
+        for (i = 0; i < size; ++i) {          \
+            uint8_t r = rnd();                \
+            name[i] = r;                      \
+        }                                     \
+    } while (0)
+
+#define OBMC_STRIDE 32
+#define XBLEN_MAX 32
+#define YBLEN_MAX 64
+
+static void check_add_obmc(size_t func_index, int xblen)
+{
+    LOCAL_ALIGNED_8(uint8_t, src, [XBLEN_MAX * YBLEN_MAX]);
+    LOCAL_ALIGNED_16(uint16_t, dst0, [XBLEN_MAX * YBLEN_MAX]);
+    LOCAL_ALIGNED_16(uint16_t, dst1, [XBLEN_MAX * YBLEN_MAX]);

The loads in the asm functions use movdqu, so i assume the buffers in the decoder are not 16 byte aligned. To ensure future implementations don't mistakenly use aligned loads, you could make this be:

LOCAL_ALIGNED_16(uint16_t, _dst0, [XBLEN_MAX * YBLEN_MAX + 4]);
LOCAL_ALIGNED_16(uint16_t, _dst1, [XBLEN_MAX * YBLEN_MAX + 4]);
uint16_t *dst0 = _dst0 + 4, *dst1 = _dst1 + 4;

Using LOCAL_ALIGNED_8() could also end up with a 16 byte aligned buffer, so the above will make sure the buffer is 8 byte aligned.

+    LOCAL_ALIGNED_8(uint8_t, obmc_weight, [XBLEN_MAX * YBLEN_MAX]);
+
+    int yblen;
+    DiracDSPContext h;
+
+    ff_diracdsp_init(&h);
+
+    if (check_func(h.add_dirac_obmc[func_index], "diracdsp.add_dirac_obmc_%d", 
xblen)) {
+        declare_func(void, uint16_t*, const uint8_t*, int, const uint8_t *, 
int);
+
+        yblen = 1 + (rnd() % YBLEN_MAX);

Use YBLEN_MAX directly. No real gain in using randomized height, and this way every --bench run will give wildly different results.

+        RANDOMIZE_BUFFER8(src, yblen * xblen);
+        RANDOMIZE_DESTS(dst, yblen * xblen);
+        RANDOMIZE_BUFFER8(obmc_weight, yblen * OBMC_STRIDE);
+
+        call_ref(dst0, src, xblen, obmc_weight, yblen);
+        call_new(dst1, src, xblen, obmc_weight, yblen);
+        if (memcmp(dst0, dst1, yblen * xblen))
+            fail();
+
+        bench_new(dst1, src, xblen, obmc_weight, yblen);
+    }
+}
+
+void checkasm_check_diracdsp(void)
+{
+    check_add_obmc(0, 8);
+    check_add_obmc(1, 16);
+    check_add_obmc(2, 32);
+    report("diracdsp");
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index d1396cb641..8a2c04e1cd 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -7,6 +7,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                         
        \
                  fate-checkasm-av_tx                                     \
                  fate-checkasm-blockdsp                                  \
                  fate-checkasm-bswapdsp                                  \
+                fate-checkasm-diracdsp                                  \
                  fate-checkasm-exrdsp                                    \
                  fate-checkasm-fdctdsp                                   \
                  fate-checkasm-fixed_dsp                                 \

Attachment: OpenPGP_signature.asc
Description: OpenPGP digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to