On Fri, 2 May 2025, Nuo Mi wrote:

On Fri, May 2, 2025 at 3:49 PM Martin Storsjö <mar...@martin.st> wrote:
      On Fri, 2 May 2025, Nuo Mi wrote:

      > From: Shaun Loo <shaunlo...@gmail.com>
      >
      > This is a part of Google Summer of Code 2023
      >
      > AVX2:
      > - vvc_sao.sao_band [OK]
      > - vvc_sao.sao_edge [OK]
      >
      > Co-authored-by: Nuo Mi <nuomi2...@gmail.com>
      > ---
      > tests/checkasm/Makefile   |   2 +-
      > tests/checkasm/checkasm.c |   1 +
      > tests/checkasm/checkasm.h |   1 +
      > tests/checkasm/vvc_sao.c  | 161
      ++++++++++++++++++++++++++++++++++++++
      > 4 files changed, 164 insertions(+), 1 deletion(-)
      > create mode 100644 tests/checkasm/vvc_sao.c
      >
      > diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
      > index 193c1e4633..fabbf595b4 100644
      > --- a/tests/checkasm/Makefile
      > +++ b/tests/checkasm/Makefile
      > @@ -47,7 +47,7 @@ AVCODECOBJS-$(CONFIG_V210_DECODER)      +=
      v210dec.o
      > AVCODECOBJS-$(CONFIG_V210_ENCODER)      += v210enc.o
      > AVCODECOBJS-$(CONFIG_VORBIS_DECODER)    += vorbisdsp.o
      > AVCODECOBJS-$(CONFIG_VP9_DECODER)       += vp9dsp.o
      > -AVCODECOBJS-$(CONFIG_VVC_DECODER)       += vvc_alf.o vvc_mc.o
      > +AVCODECOBJS-$(CONFIG_VVC_DECODER)       += vvc_alf.o vvc_mc.o
      vvc_sao.o
      >
      > CHECKASMOBJS-$(CONFIG_AVCODEC)          += $(AVCODECOBJS-yes)
      >
      > diff --git a/tests/checkasm/checkasm.c
      b/tests/checkasm/checkasm.c
      > index 3bb82ed0e5..0734cd26bf 100644
      > --- a/tests/checkasm/checkasm.c
      > +++ b/tests/checkasm/checkasm.c
      > @@ -256,6 +256,7 @@ static const struct {
      >     #if CONFIG_VVC_DECODER
      >         { "vvc_alf", checkasm_check_vvc_alf },
      >         { "vvc_mc",  checkasm_check_vvc_mc  },
      > +        { "vvc_sao", checkasm_check_vvc_sao },
      >     #endif
      > #endif
      > #if CONFIG_AVFILTER
      > diff --git a/tests/checkasm/checkasm.h
      b/tests/checkasm/checkasm.h
      > index a6b5965e02..146bfdec35 100644
      > --- a/tests/checkasm/checkasm.h
      > +++ b/tests/checkasm/checkasm.h
      > @@ -149,6 +149,7 @@ void checkasm_check_videodsp(void);
      > void checkasm_check_vorbisdsp(void);
      > void checkasm_check_vvc_alf(void);
      > void checkasm_check_vvc_mc(void);
      > +void checkasm_check_vvc_sao(void);
      >
      > struct CheckasmPerf;
      >
      > diff --git a/tests/checkasm/vvc_sao.c
      b/tests/checkasm/vvc_sao.c
      > new file mode 100644
      > index 0000000000..026078ff02
      > --- /dev/null
      > +++ b/tests/checkasm/vvc_sao.c
      > @@ -0,0 +1,161 @@
      > +/*
      > + * Copyright (c) 2018 Yingming Fan <yingming...@gmail.com>
      > + *
      > + * This file is part of FFmpeg.
      > + *
      > + * FFmpeg is free software; you can redistribute it and/or
      modify
      > + * it under the terms of the GNU General Public License as
      published by
      > + * the Free Software Foundation; either version 2 of the
      License, or
      > + * (at your option) any later version.
      > + *
      > + * FFmpeg is distributed in the hope that it will be useful,
      > + * but WITHOUT ANY WARRANTY; without even the implied
      warranty of
      > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
      the
      > + * GNU General Public License for more details.
      > + *
      > + * You should have received a copy of the GNU General Public
      License along
      > + * with FFmpeg; if not, write to the Free Software
      Foundation, Inc.,
      > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
      USA.
      > + */
      > +
      > +#include <string.h>
      > +
      > +#include "libavutil/intreadwrite.h"
      > +#include "libavutil/mem_internal.h"
      > +
      > +#include "libavcodec/vvc/dsp.h"
      > +#include "libavcodec/vvc/ctu.h"
      > +
      > +#include "checkasm.h"
      > +
      > +static const uint32_t pixel_mask[3] = { 0xffffffff,
      0x03ff03ff, 0x0fff0fff };
      > +static const uint32_t sao_size[] = {8, 16, 32, 48, 64, 80,
      96, 112, 128};
      > +
      > +#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
      > +#define PIXEL_STRIDE (2*MAX_PB_SIZE +
      AV_INPUT_BUFFER_PADDING_SIZE) //same with sao_edge src_stride
      > +#define BUF_SIZE (PIXEL_STRIDE * (MAX_PB_SIZE+2) * 2) //+2
      for top and bottom row, *2 for high bit depth
      > +#define OFFSET_THRESH (1 << (bit_depth - 5))
      > +#define OFFSET_LENGTH 5
      > +
      > +#define randomize_buffers(buf0, buf1, size)                 \
      > +    do {                                                    \
      > +        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1];   \
      > +        int k;                                              \
      > +        for (k = 0; k < size; k += 4) {                     \
      > +            uint32_t r = rnd() & mask;                      \
      > +            AV_WN32A(buf0 + k, r);                          \
      > +            AV_WN32A(buf1 + k, r);                          \
      > +        }                                                   \
      > +    } while (0)
      > +
      > +#define randomize_buffers2(buf, size)                       \
      > +    do {                                                    \
      > +        uint32_t max_offset = OFFSET_THRESH;                \
      > +        int k;                                              \
      > +        if (bit_depth == 8) {                               \
      > +            for (k = 0; k < size; k++) {                    \
      > +                uint8_t r = rnd() % max_offset;             \
      > +                buf[k] = r;                                 \
      > +            }                                               \
      > +        } else {                                            \
      > +            for (k = 0; k < size; k++) {                    \
      > +                uint16_t r = rnd() % max_offset;            \
      > +                buf[k] = r;                                 \
      > +            }                                               \
      > +        }                                                   \
      > +    } while (0)
      > +
      > +static void check_sao_band(VVCDSPContext *h, int bit_depth)
      > +{
      > +    int i;
      > +    LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
      > +    LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
      > +    LOCAL_ALIGNED_32(uint8_t, src0, [BUF_SIZE]);
      > +    LOCAL_ALIGNED_32(uint8_t, src1, [BUF_SIZE]);
      > +    int16_t offset_val[OFFSET_LENGTH];
      > +    int left_class = rnd()%32;
      > +
      > +    for (i = 0; i < FF_ARRAY_ELEMS(sao_size); i++) {
      > +        int block_size = sao_size[i];
      > +        int prev_size = i > 0 ? sao_size[i - 1] : 0;
      > +        ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
      > +        declare_func(void, uint8_t *dst, const uint8_t *src,
      ptrdiff_t dst_stride, ptrdiff_t src_stride,
      > +                     const int16_t *sao_offset_val, int
      sao_left_class, int width, int height);
      > +
      > +        if (check_func(h->sao.band_filter[i],
      "vvc_sao_band_%d_%d", block_size, bit_depth)) {
      > +
      > +            for (int w = prev_size + 4; w <= block_size; w +=
      4) {
      > +                randomize_buffers(src0, src1, BUF_SIZE);
      > +                randomize_buffers2(offset_val,
      OFFSET_LENGTH);
      > +                memset(dst0, 0, BUF_SIZE);
      > +                memset(dst1, 0, BUF_SIZE);
      > +
      > +                call_ref(dst0, src0, stride, stride,
      offset_val, left_class, w, block_size);
      > +                call_new(dst1, src1, stride, stride,
      offset_val, left_class, w, block_size);
      > +                for (int j = 0; j < block_size; j++) {
      > +                    if (memcmp(dst0 + j*stride, dst1 +
      j*stride, w*SIZEOF_PIXEL))
      > +                        fail();
      > +                }
      > +            }

      For new checkasm tests, I would suggest trying to use the
      helpers for
      doing bounds checks automatically; see the recent commits
      4d4b301e4a269adfabceaeca1a20c653bde47554 and
      c1a2da72cc27cf9b78a0cbea2f60265909d8b253 for how to use them.
      This will
      make sure that any new SIMD implementation doesn't accidentally
      write
      outside of the designated area, which previously could happen
      without
      noticing.

Hi Martin,
Thank you for the review. 

SAO does perform some overwrites on each line and aligns the width to 8:
https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/hevc/filter.c

Sorry, on a quick browse through that I didn't see which of the 924 lines you're referring to that does that.

I'm not sure if the current bounds-checking API can handle this. 

It definitely can do that. 4d4b301e4a269adfabceaeca1a20c653bde47554 uses checkasm_check_pixel_padded(), which assumes that we do no overwriting outside of the output area. (The same goes for checkasm_check_padded() if not using the bit_depth variable for switching between 8 and 16 bits per pixel.) If we'd instead use checkasm_check_pixel_padded_align(), or checkasm_check_padded_align(), then you pass two extra parameters at the end, for how much to align the output size.

So e.g. if you have width=12 and call checkasm_check_pixel_padded_align(..., 8, 1), then it will allow the output width to be aligned to 8 - i.e. up to 16. So in that case, it checks that a 12 pixel wide area has the correct output pixels, ignore the following 4 pixels and check that pixels outside of that area haven't been touched.

If you really don't want checking outside of the bounds, you can still use checkasm_check_pixel() or checkasm_check(). Even if not checking the bounds, it automatically allows printing the mismatched data on failures, if you run checkasm with the "-v" option.

Could you help add support for it in the HEVC SAO checkasm? 

I can try to give it a shot in a few days maybe. Otherwise you can try following the examples of 4d4b301e4a269adfabceaeca1a20c653bde47554, while using checkasm_check_pixel_padded_align().

// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to