[FFmpeg-cvslog] checkasm/vvc_alf: only check the valid filter and classify sizes

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sat Jun 22 10:40:24 
2024 +0800| [b82ef7c0baeb9a7033dd702baac754755bd0a54a] | committer: Nuo Mi

checkasm/vvc_alf: only check the valid filter and classify sizes

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b82ef7c0baeb9a7033dd702baac754755bd0a54a
---

 tests/checkasm/vvc_alf.c | 61 +++-
 1 file changed, 34 insertions(+), 27 deletions(-)

diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
index 84b0f9da15..902757aff1 100644
--- a/tests/checkasm/vvc_alf.c
+++ b/tests/checkasm/vvc_alf.c
@@ -90,35 +90,41 @@ static void check_alf_filter(VVCDSPContext *c, const int 
bit_depth)
 randomize_buffers2(filter, LUMA_PARAMS_SIZE, 1);
 randomize_buffers2(clip, LUMA_PARAMS_SIZE, 0);
 
-for (int h = 4; h <= MAX_CTU_SIZE; h += 8) {
-for (int w = 4; w <= MAX_CTU_SIZE; w += 8) {
+for (int h = 4; h <= MAX_CTU_SIZE; h += 4) {
+for (int w = 4; w <= MAX_CTU_SIZE; w += 4) {
 const int ctu_size = MAX_CTU_SIZE;
-if (check_func(c->alf.filter[LUMA], 
"vvc_alf_filter_luma_%dx%d_%d", w, h, bit_depth)) {
-const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_LUMA;
-memset(dst0, 0, DST_BUF_SIZE);
-memset(dst1, 0, DST_BUF_SIZE);
-call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, 
filter, clip, vb_pos);
-call_new(dst1, dst_stride, src1 + offset, src_stride, w, h, 
filter, clip, vb_pos);
-for (int i = 0; i < h; i++) {
-if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w 
* SIZEOF_PIXEL))
-fail();
+//Both picture size and virtual boundaries are 8-aligned. For 
luma, we only need to check 8-aligned sizes.
+if (!(w % 8) && !(h % 8)) {
+if (check_func(c->alf.filter[LUMA], 
"vvc_alf_filter_luma_%dx%d_%d", w, h, bit_depth)) {
+const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_LUMA;
+memset(dst0, 0, DST_BUF_SIZE);
+memset(dst1, 0, DST_BUF_SIZE);
+call_ref(dst0, dst_stride, src0 + offset, src_stride, w, 
h, filter, clip, vb_pos);
+call_new(dst1, dst_stride, src1 + offset, src_stride, w, 
h, filter, clip, vb_pos);
+for (int i = 0; i < h; i++) {
+if (memcmp(dst0 + i * dst_stride, dst1 + i * 
dst_stride, w * SIZEOF_PIXEL))
+fail();
+}
+// Bench only square sizes, and ones with dimensions being 
a power of two.
+if (w == h && (w & (w - 1)) == 0)
+bench_new(dst1, dst_stride, src1 + offset, src_stride, 
w, h, filter, clip, vb_pos);
 }
-// Bench only square sizes, and ones with dimensions being a 
power of two.
-if (w == h && (w & (w - 1)) == 0)
-bench_new(dst1, dst_stride, src1 + offset, src_stride, w, 
h, filter, clip, vb_pos);
 }
-if (check_func(c->alf.filter[CHROMA], 
"vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
-const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
-memset(dst0, 0, DST_BUF_SIZE);
-memset(dst1, 0, DST_BUF_SIZE);
-call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, 
filter, clip, vb_pos);
-call_new(dst1, dst_stride, src1 + offset, src_stride, w, h, 
filter, clip, vb_pos);
-for (int i = 0; i < h; i++) {
-if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w 
* SIZEOF_PIXEL))
-fail();
+//For chroma, once it exceeds 64, it's not a 4:2:0 format, so we 
only need to check 8-aligned sizes as well.
+if ((w <= 64 || !(w % 8)) && (h <= 64 || !(h % 8))) {
+if (check_func(c->alf.filter[CHROMA], 
"vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
+const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
+memset(dst0, 0, DST_BUF_SIZE);
+memset(dst1, 0, DST_BUF_SIZE);
+call_ref(dst0, dst_stride, src0 + offset, src_stride, w, 
h, filter, clip, vb_pos);
+call_new(dst1, dst_stride, src1 + offset, src_stride, w, 
h, filter, clip, vb_pos);
+for (int i = 0; i < h; i++) {
+if (memcmp(dst0 + i * dst_stride, dst1 + i * 
dst_stride, w * SIZEOF_PIXEL))
+fail();
+}
+if (w == h && (w & (w - 1)) == 0)
+bench_new(dst1, dst_stride, src1 + offset, src_stride, 
w, h, filter, clip, vb_pos);
 }
-if (w == h && (w & (w - 1)) == 0)
-bench_new(dst1, dst_stride, src1 + offset, src_stride, w, 
h, filter, 

[FFmpeg-cvslog] checkasm/vvc_alf: random select alf virtual boundaries position

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sat Jun 22 11:22:40 
2024 +0800| [1fa9f5b17f0e17643cc1601f86924f859b5fccfa] | committer: Nuo Mi

checkasm/vvc_alf: random select alf virtual boundaries position

A picture's virtual boundaries will split a CTU into 4 ALF blocks.
The ALF virtual boundary may cross or not cross a ALF block.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1fa9f5b17f0e17643cc1601f86924f859b5fccfa
---

 tests/checkasm/vvc_alf.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
index 902757aff1..be8b930810 100644
--- a/tests/checkasm/vvc_alf.c
+++ b/tests/checkasm/vvc_alf.c
@@ -66,6 +66,14 @@ static const uint32_t pixel_mask[3] = { 0x, 
0x03ff03ff, 0x0fff0fff };
 }   \
 } while (0)
 
+static int get_alf_vb_pos(const int h, const int vb_pos_above)
+{
+if (h == MAX_CTU_SIZE)
+return MAX_CTU_SIZE - vb_pos_above;
+// If h < MAX_CTU_SIZE and picture virtual boundaries are involved, ALF 
virtual boundaries can either be within or outside this ALF block.
+return ((rnd() & 1) ? h : MAX_CTU_SIZE) - vb_pos_above;
+}
+
 static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
 {
 LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]);
@@ -92,11 +100,10 @@ static void check_alf_filter(VVCDSPContext *c, const int 
bit_depth)
 
 for (int h = 4; h <= MAX_CTU_SIZE; h += 4) {
 for (int w = 4; w <= MAX_CTU_SIZE; w += 4) {
-const int ctu_size = MAX_CTU_SIZE;
 //Both picture size and virtual boundaries are 8-aligned. For 
luma, we only need to check 8-aligned sizes.
 if (!(w % 8) && !(h % 8)) {
 if (check_func(c->alf.filter[LUMA], 
"vvc_alf_filter_luma_%dx%d_%d", w, h, bit_depth)) {
-const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_LUMA;
+const int vb_pos = get_alf_vb_pos(h, 
ALF_VB_POS_ABOVE_LUMA);
 memset(dst0, 0, DST_BUF_SIZE);
 memset(dst1, 0, DST_BUF_SIZE);
 call_ref(dst0, dst_stride, src0 + offset, src_stride, w, 
h, filter, clip, vb_pos);
@@ -113,7 +120,7 @@ static void check_alf_filter(VVCDSPContext *c, const int 
bit_depth)
 //For chroma, once it exceeds 64, it's not a 4:2:0 format, so we 
only need to check 8-aligned sizes as well.
 if ((w <= 64 || !(w % 8)) && (h <= 64 || !(h % 8))) {
 if (check_func(c->alf.filter[CHROMA], 
"vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
-const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
+const int vb_pos = get_alf_vb_pos(h, 
ALF_VB_POS_ABOVE_CHROMA);
 memset(dst0, 0, DST_BUF_SIZE);
 memset(dst1, 0, DST_BUF_SIZE);
 call_ref(dst0, dst_stride, src0 + offset, src_stride, w, 
h, filter, clip, vb_pos);
@@ -152,7 +159,7 @@ static void check_alf_classify(VVCDSPContext *c, const int 
bit_depth)
 for (int h = 8; h <= MAX_CTU_SIZE; h += 8) {
 for (int w = 8; w <= MAX_CTU_SIZE; w += 8) {
 const int id_size = w * h / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE * 
sizeof(int);
-const int vb_pos  = MAX_CTU_SIZE - ALF_BLOCK_SIZE;
+const int vb_pos  = get_alf_vb_pos(h, ALF_VB_POS_ABOVE_LUMA);
 if (check_func(c->alf.classify, "vvc_alf_classify_%dx%d_%d", w, h, 
bit_depth)) {
 memset(class_idx0, 0, id_size);
 memset(class_idx1, 0, id_size);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] checkasm/vvc_alf: ensure right and bottom boundaries are not overwritten by asm

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sat Jun 22 11:34:11 
2024 +0800| [0333b97414d2c26a8236111d623fcafb7805d0a0] | committer: Nuo Mi

checkasm/vvc_alf: ensure right and bottom boundaries are not overwritten by asm

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0333b97414d2c26a8236111d623fcafb7805d0a0
---

 tests/checkasm/vvc_alf.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
index be8b930810..15e79289cd 100644
--- a/tests/checkasm/vvc_alf.c
+++ b/tests/checkasm/vvc_alf.c
@@ -108,8 +108,8 @@ static void check_alf_filter(VVCDSPContext *c, const int 
bit_depth)
 memset(dst1, 0, DST_BUF_SIZE);
 call_ref(dst0, dst_stride, src0 + offset, src_stride, w, 
h, filter, clip, vb_pos);
 call_new(dst1, dst_stride, src1 + offset, src_stride, w, 
h, filter, clip, vb_pos);
-for (int i = 0; i < h; i++) {
-if (memcmp(dst0 + i * dst_stride, dst1 + i * 
dst_stride, w * SIZEOF_PIXEL))
+for (int i = 0; i < (h + 1); i++) {
+if (memcmp(dst0 + i * dst_stride, dst1 + i * 
dst_stride, (w + 1) * SIZEOF_PIXEL))
 fail();
 }
 // Bench only square sizes, and ones with dimensions being 
a power of two.
@@ -125,8 +125,8 @@ static void check_alf_filter(VVCDSPContext *c, const int 
bit_depth)
 memset(dst1, 0, DST_BUF_SIZE);
 call_ref(dst0, dst_stride, src0 + offset, src_stride, w, 
h, filter, clip, vb_pos);
 call_new(dst1, dst_stride, src1 + offset, src_stride, w, 
h, filter, clip, vb_pos);
-for (int i = 0; i < h; i++) {
-if (memcmp(dst0 + i * dst_stride, dst1 + i * 
dst_stride, w * SIZEOF_PIXEL))
+for (int i = 0; i < (h + 1); i++) {
+if (memcmp(dst0 + i * dst_stride, dst1 + i * 
dst_stride, (w + 1) * SIZEOF_PIXEL))
 fail();
 }
 if (w == h && (w & (w - 1)) == 0)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/vvcdec: refact, unify vvc_deblock_subblock_bs_{horizontal, vertical}

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sun Jun  9 21:36:10 
2024 +0800| [cb6538e9a10f6e9b03b87087e98de70c384f6636] | committer: Nuo Mi

avcodec/vvcdec: refact, unify vvc_deblock_subblock_bs_{horizontal, vertical}

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cb6538e9a10f6e9b03b87087e98de70c384f6636
---

 libavcodec/vvc/filter.c | 93 +
 1 file changed, 32 insertions(+), 61 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 7844d34eac..d4c09b69f3 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -406,30 +406,43 @@ static void derive_max_filter_length_luma(const 
VVCFrameContext *fc, const int q
 *max_len_p = FFMIN(5, *max_len_p);
 }
 
-static void vvc_deblock_subblock_bs_vertical(const VVCLocalContext *lc,
-const int cb_x, const int cb_y, const int x0, const int y0, const int 
width, const int height)
+static void vvc_deblock_subblock_bs(const VVCLocalContext *lc,
+const int cb, int x0, int y0, int width, int height, const int vertical)
 {
 const VVCFrameContext  *fc = lc->fc;
 const MvField *tab_mvf = fc->tab.mvf;
 const RefPicList *rpl  = lc->sc->rpl;
-const int min_pu_width = fc->ps.pps->min_pu_width;
+int stridea= fc->ps.pps->min_pu_width;
+int strideb= 1;
+uint8_t *tab_bs= vertical ? fc->tab.vertical_bs[LUMA] : 
fc->tab.horizontal_bs[LUMA];
+uint8_t *tab_max_len_p = vertical ? fc->tab.vertical_p : 
fc->tab.horizontal_p;
+uint8_t *tab_max_len_q = vertical ? fc->tab.vertical_q : 
fc->tab.horizontal_q;
 const int log2_min_pu_size = MIN_PU_LOG2;
 
+if (!vertical) {
+FFSWAP(int, x0, y0);
+FFSWAP(int, width, height);
+FFSWAP(int, stridea, strideb);
+}
+
 // bs for TU internal vertical PU boundaries
-for (int j = 0; j < height; j += 4) {
-const int y_pu = (y0 + j) >> log2_min_pu_size;
-
-for (int i = 8 - ((x0 - cb_x) % 8); i < width; i += 8) {
-const int xp_pu = (x0 + i - 1) >> log2_min_pu_size;
-const int xq_pu = (x0 + i) >> log2_min_pu_size;
-const MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
-const MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
-const int x = x0 + i;
-const int y = y0 + j;
-const int bs = boundary_strength(lc, curr, left, rpl);
+for (int i = 8 - ((x0 - cb) % 8); i < width; i += 8) {
+const int xp_pu = (x0 + i - 1) >> log2_min_pu_size;
+const int xq_pu = (x0 + i) >> log2_min_pu_size;
+
+for (int j = 0; j < height; j += 4) {
+const int y_pu   = (y0 + j) >> log2_min_pu_size;
+const MvField *mvf_p = &tab_mvf[y_pu * stridea + xp_pu * strideb];
+const MvField *mvf_q = &tab_mvf[y_pu * stridea + xq_pu * strideb];
+const int bs = boundary_strength(lc, mvf_q, mvf_p, rpl);
+int x= x0 + i;
+int y= y0 + j;
 uint8_t max_len_p = 0, max_len_q = 0;
 
-TAB_BS(fc->tab.vertical_bs[LUMA], x, y) = bs;
+if (!vertical)
+FFSWAP(int, x, y);
+
+TAB_BS(tab_bs, x, y) = bs;
 
 if (i == 4 || i == width - 4)
 max_len_p = max_len_q = 1;
@@ -438,48 +451,8 @@ static void vvc_deblock_subblock_bs_vertical(const 
VVCLocalContext *lc,
 else
 max_len_p = max_len_q = 3;
 
-TAB_MAX_LEN(fc->tab.vertical_p, x, y) = max_len_p;
-TAB_MAX_LEN(fc->tab.vertical_q, x, y) = max_len_q;
-}
-}
-}
-
-static void vvc_deblock_subblock_bs_horizontal(const VVCLocalContext *lc,
-const int cb_x, const int cb_y, const int x0, const int y0, const int 
width, const int height)
-{
-const VVCFrameContext  *fc = lc->fc;
-const MvField* tab_mvf = fc->tab.mvf;
-const RefPicList* rpl  = lc->sc->rpl;
-const int min_pu_width = fc->ps.pps->min_pu_width;
-const int log2_min_pu_size = MIN_PU_LOG2;
-
-// bs for TU internal horizontal PU boundaries
-for (int j = 8 - ((y0 - cb_y) % 8); j < height; j += 8) {
-int yp_pu = (y0 + j - 1) >> log2_min_pu_size;
-int yq_pu = (y0 + j) >> log2_min_pu_size;
-
-for (int i = 0; i < width; i += 4) {
-const int x_pu = (x0 + i) >> log2_min_pu_size;
-const MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
-const MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
-const int x = x0 + i;
-const int y = y0 + j;
-const int bs = boundary_strength(lc, curr, top, rpl);
-uint8_t max_len_p = 0, max_len_q = 0;
-
-TAB_BS(fc->tab.horizontal_bs[LUMA], x, y) = bs;
-
-//fixme:
-//edgeTbFlags[ x − sbW ][ y ] is equal to 1
-//edgeTbFlags[ x + sbW ][ y ] is equ

[FFmpeg-cvslog] x86/vvc_alf: avoid overwriting for non-16 aligned widths

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Thu Jun 20 22:02:55 
2024 +0800| [6b0e6a98b57a80c1165d7a3012e0440029088406] | committer: Nuo Mi

x86/vvc_alf: avoid overwriting for non-16 aligned widths

Previously, the code allowed overwriting on 16-aligned blocks, which was 
suitable when there were
no picture's virtual boundaries because both CTU sizes and strides were 
16-aligned. However, with
picture's virtual boundaries, each CTU is divided into four ALF blocks, leading 
to potential issues
with overwriting later CTUs.

In cases involving picture virtual boundaries, each ALF block is 8-pixel 
aligned.
For luma, we consistently ensure an 8-aligned width. For chroma in 4:2:0 format,
we need to account for a 4-aligned width.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6b0e6a98b57a80c1165d7a3012e0440029088406
---

 libavcodec/x86/vvc/vvc_alf.asm | 85 +-
 1 file changed, 75 insertions(+), 10 deletions(-)

diff --git a/libavcodec/x86/vvc/vvc_alf.asm b/libavcodec/x86/vvc/vvc_alf.asm
index b35dd9b0e9..f69a69f05f 100644
--- a/libavcodec/x86/vvc/vvc_alf.asm
+++ b/libavcodec/x86/vvc/vvc_alf.asm
@@ -324,18 +324,69 @@ SECTION .text
 %endif
 %endmacro
 
-; STORE_PIXELS(dst, src)
-%macro STORE_PIXELS 2
+; STORE_PIXELS_W16(dst, src)
+%macro STORE_PIXELS_W16 2
 %if ps == 2
-movu %1, m%2
+movu   [%1],  m%2
 %else
+movu   [%1], xm%2
+%endif
+%endmacro
+
+%macro STORE_PIXELS_W8 2
+%if ps == 2
+movu   [%1], xm%2
+%else
+movq   [%1], xm%2
+%endif
+%endmacro
+
+; STORE_PIXELS_W4(dst, src, offset)
+%macro STORE_PIXELS_W4 3
+%if ps == 2
+movq   [%1 + %3 * ps], xm%2
+%else
+movd[%1 + %3], xm%2
+%endif
+%endmacro
+
+%macro STORE_PIXELS_W8LE 3
+cmp %3, 8
+jl .w4
+STORE_PIXELS_W8 %1, %2
+cmp %3, 12
+%if ps == 2
+vpermq  m%2,  m%2, q0302
+%else
+vpermq  m%2,  m%2, q0101
+%endif
+jl .end
+STORE_PIXELS_W4 %1, %2, 8
+jmp .end
+.w4:
+STORE_PIXELS_W4 %1, %2, 0
+.end:
+%endmacro
+
+; STORE_PIXELS(dst, src, width)
+%macro STORE_PIXELS 3
+%if ps == 1
 packuswbm%2, m%2
 vpermq  m%2, m%2, 0x8
-movu %1, xm%2
+%endif
+
+%ifidn %3, 16
+STORE_PIXELS_W16  %1, %2
+%else
+%if LUMA
+STORE_PIXELS_W8   %1, %2
+%else
+STORE_PIXELS_W8LE %1, %2, %3
+%endif
 %endif
 %endmacro
 
-%macro FILTER_16x4 0
+%macro FILTER_16x4 1
 %if LUMA
 push clipq
 push strideq
@@ -362,7 +413,7 @@ SECTION .text
 ; clip to pixel
 CLIPW m0, m14, m15
 
-STORE_PIXELS  [dstq], 0
+STORE_PIXELSdstq, 0, %1
 
 lea srcq, [srcq + src_strideq]
 lea dstq, [dstq + dst_strideq]
@@ -399,7 +450,7 @@ SECTION .text
 ;  const uint8_t *src, ptrdiff_t src_stride, const ptrdiff_t width, cosnt 
ptr_diff_t height,
 ;  const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t 
vb_pos, ptrdiff_t pixel_max);
 ; **
-cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x28, dst, dst_stride, src, 
src_stride, width, height, filter, clip, stride, vb_pos, pixel_max, \
+cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x30, dst, dst_stride, src, 
src_stride, width, height, filter, clip, stride, vb_pos, pixel_max, \
 offset, x, s5, s6
 %define ps (%1 / 8) ; pixel size
 movdxm15, pixel_maxd
@@ -409,18 +460,32 @@ cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x28, dst, 
dst_stride, src, src_s
 .loop:
 pushsrcq
 pushdstq
+push  widthq
 xor   xq, xq
 
 .loop_w:
+cmp   widthq, 16
+jl   .loop_w_end
+
 LOAD_PARAMS
-FILTER_16x4
+FILTER_16x4   16
 
 add srcq, 16 * ps
 add dstq, 16 * ps
 add   xq, 16
-cmp   xq, widthq
-jl   .loop_w
+sub   widthq, 16
+jmp  .loop_w
+
+.loop_w_end:
+cmp   widthq, 0
+je.w_end
+
+LOAD_PARAMS
+FILTER_16x4  widthq
+
+.w_end:
 
+pop   widthq
 pop dstq
 pop srcq
 lea srcq, [srcq + 4 * src_strideq]

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/vvcdec: refact, unify vvc_deblock_bs_luma_{horizontal, vertical}

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Mon Jun 10 16:52:09 
2024 +0800| [0a5bbcf0f37b3fdb837b406796ce3e08a88eb996] | committer: Nuo Mi

avcodec/vvcdec: refact, unify vvc_deblock_bs_luma_{horizontal, vertical}

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0a5bbcf0f37b3fdb837b406796ce3e08a88eb996
---

 libavcodec/vvc/filter.c | 108 
 1 file changed, 36 insertions(+), 72 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index d4c09b69f3..996e58dc3e 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -538,100 +538,64 @@ static int deblock_is_boundary(const VVCLocalContext 
*lc, const int boundary,
 return boundary;
 }
 
-static void vvc_deblock_bs_luma_vertical(const VVCLocalContext *lc,
-const int x0, const int y0, const int width, const int height, const int 
rs)
+static void vvc_deblock_bs_luma(const VVCLocalContext *lc,
+const int x0, const int y0, const int width, const int height, const int 
rs, const int vertical)
 {
 const VVCFrameContext *fc  = lc->fc;
 const MvField *tab_mvf = fc->tab.mvf;
+const int mask = LUMA_GRID - 1;
 const int log2_min_pu_size = MIN_PU_LOG2;
 const int min_pu_width = fc->ps.pps->min_pu_width;
 const int min_cb_log2  = fc->ps.sps->min_cb_log2_size_y;
 const int min_cb_width = fc->ps.pps->min_cb_width;
-const int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * 
min_pu_width +
-(x0 >> log2_min_pu_size)].pred_flag == PF_INTRA;
-int boundary_left;
-int has_vertical_sb = 0;
-
+const int pos  = vertical ? x0 : y0;
 const int off_q= (y0 >> min_cb_log2) * min_cb_width + (x0 >> 
min_cb_log2);
-const int cb_x = fc->tab.cb_pos_x[LUMA][off_q];
-const int cb_width = fc->tab.cb_width[LUMA][off_q];
-const int off_x= cb_x - x0;
-
-if (!is_intra) {
-if (fc->tab.msf[off_q] || fc->tab.iaf[off_q])
-has_vertical_sb = cb_width  > 8;
-}
-
-// bs for vertical TU boundaries
-boundary_left = deblock_is_boundary(lc, x0 > 0 && !(x0 & 3), x0, rs, 1);
-
-if (boundary_left) {
-const RefPicList *rpl_left =
-(lc->boundary_flags & BOUNDARY_LEFT_SLICE) ? 
ff_vvc_get_ref_list(fc, fc->ref, x0 - 1, y0) : lc->sc->rpl;
-for (int i = 0; i < height; i += 4) {
+const int cb   = (vertical ? fc->tab.cb_pos_x : 
fc->tab.cb_pos_y )[LUMA][off_q];
+const int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * 
min_pu_width +
+(x0 >> log2_min_pu_size)].pred_flag == PF_INTRA;
+
+if (deblock_is_boundary(lc, pos > 0 && !(pos & mask), pos, rs, vertical)) {
+const int size  = vertical ? height : width;
+const int off   = cb - pos;
+const int cb_size   = (vertical ? fc->tab.cb_width : 
fc->tab.cb_height)[LUMA][off_q];
+const int has_sb= !is_intra && (fc->tab.msf[off_q] || 
fc->tab.iaf[off_q]) && cb_size > 8;
+const int flag  = vertical ? BOUNDARY_LEFT_SLICE : 
BOUNDARY_UPPER_SLICE;
+const RefPicList *rpl_p =
+(lc->boundary_flags & flag) ? ff_vvc_get_ref_list(fc, fc->ref, x0 
- vertical, y0 - !vertical) : lc->sc->rpl;
+uint8_t *tab_bs = vertical ? fc->tab.vertical_bs[LUMA] : 
fc->tab.horizontal_bs[LUMA];
+uint8_t *tab_max_len_p  = vertical ? fc->tab.vertical_p : 
fc->tab.horizontal_p;
+uint8_t *tab_max_len_q  = vertical ? fc->tab.vertical_q : 
fc->tab.horizontal_q;
+
+for (int i = 0; i < size; i += 4) {
+const int x = x0 + i * !vertical;
+const int y = y0 + i * vertical;
 uint8_t max_len_p, max_len_q;
-const int bs = deblock_bs(lc, x0 - 1, y0 + i, x0, y0 + i, 
rpl_left, 0, off_x, has_vertical_sb);
+const int bs = deblock_bs(lc, x - vertical, y - !vertical, x, y, 
rpl_p, LUMA, off, has_sb);
 
-TAB_BS(fc->tab.vertical_bs[LUMA], x0, (y0 + i)) = bs;
+TAB_BS(tab_bs, x, y) = bs;
 
-derive_max_filter_length_luma(fc, x0, y0 + i, is_intra, 
has_vertical_sb, 1, &max_len_p, &max_len_q);
-TAB_MAX_LEN(fc->tab.vertical_p, x0, y0 + i) = max_len_p;
-TAB_MAX_LEN(fc->tab.vertical_q, x0, y0 + i) = max_len_q;
+derive_max_filter_length_luma(fc, x, y, is_intra, has_sb, 
vertical, &max_len_p, &max_len_q);
+TAB_MAX_LEN(tab_max_len_p, x, y) = max_len_p;
+TAB_MAX_LEN(tab_max_len_q, x, y) = max_len_q;
 }
 }
 
 if (!is_intra) {
 if (fc->tab.msf[off_q] || fc->tab.iaf[off_q])
-vvc_deblock_subblock_bs(lc, cb_x, x0, y0, width, height, 1);
+vvc_deblock_subblock_bs(lc, cb, x0, y0, width, height, vertical);
 }
 }
 
-static void vvc_deblock_bs_luma_horizontal(const VVCLocalContext *lc,
+static void vvc_deblock_bs_luma_vertical(const VVCLocalContext *lc,
 

[FFmpeg-cvslog] avcodec/vvcdec: refact out sao_get_edges

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sat Jun 15 14:24:33 
2024 +0800| [200862b14497f4b3ce62fade9d7c9243cf3f5e38] | committer: Nuo Mi

avcodec/vvcdec: refact out sao_get_edges

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=200862b14497f4b3ce62fade9d7c9243cf3f5e38
---

 libavcodec/vvc/filter.c | 119 ++--
 1 file changed, 65 insertions(+), 54 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 89b794195e..1326d2c82e 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -155,70 +155,81 @@ void ff_vvc_sao_copy_ctb_to_hv(VVCLocalContext *lc, const 
int rx, const int ry,
 sao_copy_ctb_to_hv(lc, rx, ry, 0);
 }
 
+static int sao_can_cross_slices(const VVCFrameContext *fc, const int rx, const 
int ry, const int dx, const int dy)
+{
+const uint8_t lfase = 
fc->ps.pps->r->pps_loop_filter_across_slices_enabled_flag;
+
+return lfase || CTB(fc->tab.slice_idx, rx, ry) == CTB(fc->tab.slice_idx, 
rx + dx, ry + dy);
+}
+
+static void sao_get_edges(uint8_t vert_edge[2], uint8_t horiz_edge[2], uint8_t 
diag_edge[4], int *restore,
+const VVCLocalContext *lc, const int edges[4], const int rx, const int ry)
+{
+const VVCFrameContext *fc  = lc->fc;
+const VVCSPS *sps  = fc->ps.sps;
+const H266RawSPS *rsps = sps->r;
+const VVCPPS *pps  = fc->ps.pps;
+const int subpic_idx   = lc->sc->sh.r->curr_subpic_idx;
+const uint8_t lfase= 
fc->ps.pps->r->pps_loop_filter_across_slices_enabled_flag;
+const uint8_t no_tile_filter   = pps->r->num_tiles_in_pic > 1 && 
!pps->r->pps_loop_filter_across_tiles_enabled_flag;
+const uint8_t no_subpic_filter = rsps->sps_num_subpics_minus1 && 
!rsps->sps_loop_filter_across_subpic_enabled_flag[subpic_idx];
+uint8_t lf_edge[] = { 0, 0, 0, 0 };
+
+*restore = no_subpic_filter || no_tile_filter || !lfase;
+
+if (!*restore)
+return;
+
+if (!edges[LEFT]) {
+lf_edge[LEFT]  = no_tile_filter && pps->ctb_to_col_bd[rx] == rx;
+lf_edge[LEFT] |= no_subpic_filter && 
rsps->sps_subpic_ctu_top_left_x[subpic_idx] == rx;
+vert_edge[0]   = !sao_can_cross_slices(fc, rx, ry, -1, 0) || 
lf_edge[LEFT];
+}
+if (!edges[RIGHT]) {
+lf_edge[RIGHT]  = no_tile_filter && pps->ctb_to_col_bd[rx] != 
pps->ctb_to_col_bd[rx + 1];
+lf_edge[RIGHT] |= no_subpic_filter && 
rsps->sps_subpic_ctu_top_left_x[subpic_idx] + 
rsps->sps_subpic_width_minus1[subpic_idx] == rx;
+vert_edge[1]= !sao_can_cross_slices(fc, rx, ry, 1, 0) || 
lf_edge[RIGHT];
+}
+if (!edges[TOP]) {
+lf_edge[TOP]   = no_tile_filter && pps->ctb_to_row_bd[ry] == ry;
+lf_edge[TOP]  |= no_subpic_filter && 
rsps->sps_subpic_ctu_top_left_y[subpic_idx] == ry;
+horiz_edge[0]  = !sao_can_cross_slices(fc, rx, ry, 0, -1) || 
lf_edge[TOP];
+}
+if (!edges[BOTTOM]) {
+lf_edge[BOTTOM]  = no_tile_filter && pps->ctb_to_row_bd[ry] != 
pps->ctb_to_row_bd[ry + 1];
+lf_edge[BOTTOM] |= no_subpic_filter && 
rsps->sps_subpic_ctu_top_left_y[subpic_idx] + 
rsps->sps_subpic_height_minus1[subpic_idx] == ry;
+horiz_edge[1]= !sao_can_cross_slices(fc, rx, ry, 0, 1) || 
lf_edge[BOTTOM];
+}
+
+if (!edges[LEFT] && !edges[TOP])
+diag_edge[0] = !sao_can_cross_slices(fc, rx, ry, -1, -1) || 
lf_edge[LEFT] || lf_edge[TOP];
+
+if (!edges[TOP] && !edges[RIGHT])
+diag_edge[1] = !sao_can_cross_slices(fc, rx, ry,  1, -1) || 
lf_edge[RIGHT] || lf_edge[TOP];
+
+if (!edges[RIGHT] && !edges[BOTTOM])
+diag_edge[2] = !sao_can_cross_slices(fc, rx, ry,  1,  1) || 
lf_edge[RIGHT] || lf_edge[BOTTOM];
+
+if (!edges[LEFT] && !edges[BOTTOM])
+diag_edge[3] = !sao_can_cross_slices(fc, rx, ry, -1,  1) || 
lf_edge[LEFT] || lf_edge[BOTTOM];
+}
+
 void ff_vvc_sao_filter(VVCLocalContext *lc, int x, int y)
 {
 VVCFrameContext *fc  = lc->fc;
 const int ctb_size_y = fc->ps.sps->ctb_size_y;
 static const uint8_t sao_tab[16] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 
7, 7, 8, 8 };
-int c_idx;
+int c_idx, restore;
 const int rx = x >> fc->ps.sps->ctb_log2_size_y;
 const int ry = y >> fc->ps.sps->ctb_log2_size_y;
 int edges[4] = { !rx, !ry, rx == fc->ps.pps->ctb_width - 1, ry == 
fc->ps.pps->ctb_height - 1 };
 const SAOParams *sao = &CTB(fc->tab.sao, rx, ry);
 // flags indicating unfilterable edges
-uint8_t vert_edge[]  = { 0, 0 };
-uint8_t horiz_edge[] = { 0, 0 };
-uint8_t diag_edge[]  = { 0, 0, 0, 0 };
-uint8_t tile_edge[]  = { 0, 0, 0, 0 };
-uint8_t subpic_edge[]= { 0, 0, 0, 0 };
-const int subpic_idx = lc->sc->sh.r->curr_subpic_idx;
-const uint8_t lfase  = 
fc->ps.pps->r->pps_loop_filter_across_slices_enabled_flag;
-const uint8_t no_tile_filter = fc->ps.pps->r->num_tiles_in_pic > 1 &&
-

[FFmpeg-cvslog] avcodec/vvcdec: misc, use POS to simplify filter code

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Thu Jun 13 20:55:10 
2024 +0800| [16e410aaac83410ed0e701434f9db866cc78c5ec] | committer: Nuo Mi

avcodec/vvcdec: misc, use POS to simplify filter code

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=16e410aaac83410ed0e701434f9db866cc78c5ec
---

 libavcodec/vvc/filter.c | 23 +--
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 7ae36b2344..82a58a7ea8 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -34,6 +34,10 @@
 
 #define DEFAULT_INTRA_TC_OFFSET 2
 
+#define POS(c_idx, x, y)   
 \
+&fc->frame->data[c_idx][((y) >> fc->ps.sps->vshift[c_idx]) * 
fc->frame->linesize[c_idx] +   \
+(((x) >> fc->ps.sps->hshift[c_idx]) << fc->ps.sps->pixel_shift)]
+
 //Table 43 Derivation of threshold variables beta' and tc' from input Q
 static const uint16_t tctable[66] = {
   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 
  0,
@@ -135,7 +139,7 @@ static void sao_copy_ctb_to_hv(VVCLocalContext *lc, const 
int rx, const int ry,
 const int ctb_size_v   = ctb_size_y >> fc->ps.sps->vshift[c_idx];
 const int width= FFMIN(ctb_size_h, (fc->ps.pps->width  >> 
fc->ps.sps->hshift[c_idx]) - x);
 const int height   = FFMIN(ctb_size_v, (fc->ps.pps->height >> 
fc->ps.sps->vshift[c_idx]) - y);
-const uint8_t *src  = &fc->frame->data[c_idx][y * src_stride + 
(x << fc->ps.sps->pixel_shift)];
+const uint8_t *src = POS(c_idx, x0, y0);
 copy_ctb_to_hv(fc, src, src_stride, x, y, width, height, c_idx, rx, 
ry, top);
 }
 }
@@ -225,7 +229,7 @@ void ff_vvc_sao_filter(VVCLocalContext *lc, int x, int y)
 int width= FFMIN(ctb_size_h, (fc->ps.pps->width  >> 
fc->ps.sps->hshift[c_idx]) - x0);
 int height   = FFMIN(ctb_size_v, (fc->ps.pps->height >> 
fc->ps.sps->vshift[c_idx]) - y0);
 int tab  = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
-uint8_t *src = &fc->frame->data[c_idx][y0 * src_stride + (x0 << 
fc->ps.sps->pixel_shift)];
+uint8_t *src = POS(c_idx, x, y);
 ptrdiff_t dst_stride;
 uint8_t *dst;
 
@@ -750,7 +754,7 @@ void ff_vvc_deblock_vertical(const VVCLocalContext *lc, 
const int x0, const int
 const int dy = i << 2;
 bs[i] = (y + dy < y_end) ? TAB_BS(fc->tab.bs[1][c_idx], x, 
y + dy) : 0;
 if (bs[i]) {
-src = &fc->frame->data[c_idx][((y + dy) >> vs) * 
fc->frame->linesize[c_idx] + ((x >> hs) << fc->ps.sps->pixel_shift)];
+src = POS(c_idx, x, y + dy);
 qp = get_qp(fc, src, x, y + dy, c_idx, 1);
 
 beta[i] = betatable[av_clip(qp + beta_offset, 0, 
MAX_QP)];
@@ -762,7 +766,7 @@ void ff_vvc_deblock_vertical(const VVCLocalContext *lc, 
const int x0, const int
 }
 
 if (!all_zero_bs) {
-src = &fc->frame->data[c_idx][(y >> vs) * 
fc->frame->linesize[c_idx] + ((x >> hs) << fc->ps.sps->pixel_shift)];
+src = POS(c_idx, x, y);
 if (!c_idx) {
 fc->vvcdsp.lf.filter_luma[1](src, 
fc->frame->linesize[c_idx],
 beta, tc, no_p, no_q, max_len_p, max_len_q, 0);
@@ -823,7 +827,7 @@ void ff_vvc_deblock_horizontal(const VVCLocalContext *lc, 
const int x0, const in
 
 bs[i] = (x + dx < x_end) ? TAB_BS(fc->tab.bs[0][c_idx], x 
+ dx, y) : 0;
 if (bs[i]) {
-src = &fc->frame->data[c_idx][(y >> vs) * 
fc->frame->linesize[c_idx] + (((x + dx)>> hs) << fc->ps.sps->pixel_shift)];
+src = POS(c_idx, x + dx, y);
 qp = get_qp(fc, src, x + dx, y, c_idx, 0);
 
 beta[i] = betatable[av_clip(qp + beta_offset, 0, 
MAX_QP)];
@@ -834,7 +838,7 @@ void ff_vvc_deblock_horizontal(const VVCLocalContext *lc, 
const int x0, const in
 tc[i] = bs[i] ? TC_CALC(qp, bs[i]) : 0;
 }
 if (!all_zero_bs) {
-src = &fc->frame->data[c_idx][(y >> vs) * 
fc->frame->linesize[c_idx] + ((x >> hs) << fc->ps.sps->pixel_shift)];
+src = POS(c_idx, x, y);
 if (!c_idx) {
 fc->vvcdsp.lf.filter_luma[0](src, 
fc->frame->linesize[c_idx],
 beta, tc, no_p, no_q, max_len_p, max_len_q, 
horizontal_ctu_edge);
@@ -1079,7 +1083,6 @@ void ff_vvc_alf_copy_ctu_to_hv(VVCLocalContext* lc, const 
int x0, const int y0)
 const int rx = x0 >> fc->ps.sps->ctb_log2_size_y;
 const int ry = y0 >> fc->ps.sps->ctb_log2_size_y;
 const int ctb_size_y = fc->ps.sps->ctb_size_y;
-const int ps = fc->ps.sps->pixe

[FFmpeg-cvslog] avcodec/vvcdec: refact, unify vvc_deblock_bs_chroma_{horizontal, vertical}

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Tue Jun 11 21:03:05 
2024 +0800| [cc89832d56cc070337cfaccbf0aadc0531a7fc03] | committer: Nuo Mi

avcodec/vvcdec: refact, unify vvc_deblock_bs_chroma_{horizontal, vertical}

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cc89832d56cc070337cfaccbf0aadc0531a7fc03
---

 libavcodec/vvc/filter.c | 62 +++--
 1 file changed, 19 insertions(+), 43 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 996e58dc3e..06e1717b13 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -586,56 +586,33 @@ static void vvc_deblock_bs_luma(const VVCLocalContext *lc,
 }
 }
 
-static void vvc_deblock_bs_luma_vertical(const VVCLocalContext *lc,
-const int x0, const int y0, const int width, const int height, const int 
rs)
-{
-vvc_deblock_bs_luma(lc, x0, y0, width, height, rs, 1);
-}
-
-static void vvc_deblock_bs_luma_horizontal(const VVCLocalContext *lc,
-const int x0, const int y0, const int width, const int height, const int 
rs)
-{
-vvc_deblock_bs_luma(lc, x0, y0, width, height, rs, 0);
-}
-
-static void vvc_deblock_bs_chroma_vertical(const VVCLocalContext *lc,
-const int x0, const int y0, const int width, const int height, const int 
rs)
+static void vvc_deblock_bs_chroma(const VVCLocalContext *lc,
+const int x0, const int y0, const int width, const int height, const int 
rs, const int vertical)
 {
 const VVCFrameContext *fc = lc->fc;
-const int boundary_left = deblock_is_boundary(lc,
- x0 > 0 && !(x0 & ((CHROMA_GRID << fc->ps.sps->hshift[CHROMA]) - 1)), 
x0, rs, 1);
-
-if (boundary_left) {
-for (int i = 0; i < height; i += 2) {
-for (int c_idx = CB; c_idx <= CR; c_idx++) {
-const int bs = deblock_bs(lc, x0 - 1, y0 + i, x0, y0 + i, 
NULL, c_idx, 0, 0);
+const int shift   = (vertical ? fc->ps.sps->hshift : 
fc->ps.sps->vshift)[CHROMA];
+const int mask= (CHROMA_GRID << shift) - 1;
+const int pos = vertical ? x0 : y0;
 
-TAB_BS(fc->tab.vertical_bs[c_idx], x0, (y0 + i)) = bs;
-}
-}
-}
-}
+if (deblock_is_boundary(lc, pos > 0 && !(pos & mask), pos, rs, vertical)) {
+const int size = vertical ? height : width;
 
-static void vvc_deblock_bs_chroma_horizontal(const VVCLocalContext *lc,
-const int x0, const int y0, const int width, const int height, const int 
rs)
-{
-const VVCFrameContext *fc = lc->fc;
-const int boundary_upper  = deblock_is_boundary(lc,
-y0 > 0 && !(y0 & ((CHROMA_GRID << fc->ps.sps->vshift[CHROMA]) - 1)), 
y0, rs, 0);
+for (int c_idx = CB; c_idx <= CR; c_idx++) {
+uint8_t *tab_bs = (vertical ? fc->tab.vertical_bs : 
fc->tab.horizontal_bs)[c_idx];
 
-if (boundary_upper) {
-for (int i = 0; i < width; i += 2) {
-for (int c_idx = CB; c_idx <= CR; c_idx++) {
-const int bs = deblock_bs(lc, x0 + i, y0 - 1, x0 + i, y0, 
NULL, c_idx, 0, 0);
+for (int i = 0; i < size; i += 2) {
+const int x  = x0 + i * !vertical;
+const int y  = y0 + i * vertical;
+const int bs = deblock_bs(lc, x - vertical, y - !vertical, x, 
y, NULL, c_idx, 0, 0);
 
-TAB_BS(fc->tab.horizontal_bs[c_idx], x0 + i, y0) = bs;
+TAB_BS(tab_bs, x, y) = bs;
 }
 }
 }
 }
 
 typedef void (*deblock_bs_fn)(const VVCLocalContext *lc, const int x0, const 
int y0,
-const int width, const int height, const int rs);
+const int width, const int height, const int rs, const int vertical);
 
 static void vvc_deblock_bs(const VVCLocalContext *lc, const int x0, const int 
y0, const int rs, const int vertical)
 {
@@ -645,9 +622,8 @@ static void vvc_deblock_bs(const VVCLocalContext *lc, const 
int x0, const int y0
 const int ctb_size = sps->ctb_size_y;
 const int x_end= FFMIN(x0 + ctb_size, pps->width) >> MIN_TU_LOG2;
 const int y_end= FFMIN(y0 + ctb_size, pps->height) >> MIN_TU_LOG2;
-deblock_bs_fn deblock_bs[2][2] = {
-{ vvc_deblock_bs_luma_horizontal, vvc_deblock_bs_chroma_horizontal },
-{ vvc_deblock_bs_luma_vertical,   vvc_deblock_bs_chroma_vertical   }
+deblock_bs_fn deblock_bs[] = {
+vvc_deblock_bs_luma, vvc_deblock_bs_chroma
 };
 
 for (int is_chroma = 0; is_chroma <= 1; is_chroma++) {
@@ -657,8 +633,8 @@ static void vvc_deblock_bs(const VVCLocalContext *lc, const 
int x0, const int y0
 for (int x = x0 >> MIN_TU_LOG2; x < x_end; x++) {
 const int off = y * fc->ps.pps->min_tu_width + x;
 if ((fc->tab.tb_pos_x0[is_chroma][off] >> MIN_TU_LOG2) == x && 
(fc->tab.tb_pos_y0[is_chroma][off] >> MIN_TU_LOG2) == y) {
-deblock_bs[vertical][is_chroma](lc, x << MIN_TU_LOG2, y << 
MIN_TU_LOG2,
-fc->tab.tb_width[is_chroma][off] << hs, 
f

[FFmpeg-cvslog] avcodec/vvcdec: refact out sao_extends_edges

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sun Jun 16 10:35:06 
2024 +0800| [ccf1d4172cb8d01c709871a2d68deff918e9a0e7] | committer: Nuo Mi

avcodec/vvcdec: refact out sao_extends_edges

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ccf1d4172cb8d01c709871a2d68deff918e9a0e7
---

 libavcodec/vvc/filter.c | 108 ++--
 1 file changed, 50 insertions(+), 58 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 1326d2c82e..534ba57205 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -214,6 +214,52 @@ static void sao_get_edges(uint8_t vert_edge[2], uint8_t 
horiz_edge[2], uint8_t d
 diag_edge[3] = !sao_can_cross_slices(fc, rx, ry, -1,  1) || 
lf_edge[LEFT] || lf_edge[BOTTOM];
 }
 
+static void sao_copy_hor(uint8_t *dst, const ptrdiff_t dst_stride,
+const uint8_t *src, const ptrdiff_t src_stride, const int width, const int 
edges[4], const int ps)
+{
+const int left  = 1 - edges[LEFT];
+const int right = 1 - edges[RIGHT];
+int pos = 0;
+
+src -= left << ps;
+dst -= left << ps;
+
+if (left) {
+copy_pixel(dst, src, ps);
+pos += (1 << ps);
+}
+memcpy(dst + pos, src + pos, width << ps);
+if (right) {
+pos += width << ps;
+copy_pixel(dst + pos, src + pos, ps);
+}
+}
+
+static void sao_extends_edges(uint8_t *dst, const ptrdiff_t dst_stride,
+const uint8_t *src, const ptrdiff_t src_stride, const int width, const int 
height,
+const VVCFrameContext *fc, const int x, const int y, const int rx, const 
int ry, const int edges[4], const int c_idx)
+{
+const uint8_t *sao_h = fc->tab.sao_pixel_buffer_h[c_idx];
+const uint8_t *sao_v = fc->tab.sao_pixel_buffer_v[c_idx];
+const int w  = fc->ps.pps->width >> fc->ps.sps->hshift[c_idx];
+const int h  = fc->ps.pps->height >> fc->ps.sps->vshift[c_idx];
+const int ps = fc->ps.sps->pixel_shift;
+
+if (!edges[TOP])
+sao_copy_hor(dst - dst_stride, dst_stride, sao_h + (((2 * ry - 1) * w 
+ x) << ps), src_stride, width, edges, ps);
+
+if (!edges[BOTTOM])
+sao_copy_hor(dst + height * dst_stride, dst_stride, sao_h + (((2 * ry 
+ 2) * w + x) << ps), src_stride, width, edges, ps);
+
+if (!edges[LEFT])
+copy_vert(dst - (1 << ps), sao_v + (((2 * rx - 1) * h + y) << ps), ps, 
height, dst_stride, 1 << ps);
+
+if (!edges[RIGHT])
+copy_vert(dst + (width << ps), sao_v + (((2 * rx + 2) * h + y) << ps), 
 ps, height, dst_stride, 1 << ps);
+
+copy_ctb(dst, src, width << ps, height, dst_stride, src_stride);
+}
+
 void ff_vvc_sao_filter(VVCLocalContext *lc, int x, int y)
 {
 VVCFrameContext *fc  = lc->fc;
@@ -241,8 +287,6 @@ void ff_vvc_sao_filter(VVCLocalContext *lc, int x, int y)
 int height   = FFMIN(ctb_size_v, (fc->ps.pps->height >> 
fc->ps.sps->vshift[c_idx]) - y0);
 int tab  = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
 uint8_t *src = POS(c_idx, x, y);
-ptrdiff_t dst_stride;
-uint8_t *dst;
 
 switch (sao->type_idx[c_idx]) {
 case SAO_BAND:
@@ -251,63 +295,11 @@ void ff_vvc_sao_filter(VVCLocalContext *lc, int x, int y)
 break;
 case SAO_EDGE:
 {
-const int w = fc->ps.pps->width >> fc->ps.sps->hshift[c_idx];
-const int h = fc->ps.pps->height >> fc->ps.sps->vshift[c_idx];
-const int sh = fc->ps.sps->pixel_shift;
-
-dst_stride = 2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE;
-dst = lc->sao_buffer + dst_stride + AV_INPUT_BUFFER_PADDING_SIZE;
-
-if (!edges[TOP]) {
-const int left = 1 - edges[LEFT];
-const int right = 1 - edges[RIGHT];
-const uint8_t *src1;
-uint8_t *dst1;
-int pos = 0;
-
-dst1 = dst - dst_stride - (left << sh);
-src1 = fc->tab.sao_pixel_buffer_h[c_idx] + (((2 * ry - 1) * w 
+ x0 - left) << sh);
-if (left) {
-copy_pixel(dst1, src1, sh);
-pos += (1 << sh);
-}
-memcpy(dst1 + pos, src1 + pos, width << sh);
-if (right) {
-pos += width << sh;
-copy_pixel(dst1 + pos, src1 + pos, sh);
-}
-}
-if (!edges[BOTTOM]) {
-const int left = 1 - edges[LEFT];
-const int right = 1 - edges[RIGHT];
-const uint8_t *src1;
-uint8_t *dst1;
-int pos = 0;
-
-dst1 = dst + height * dst_stride - (left << sh);
-src1 = fc->tab.sao_pixel_buffer_h[c_idx] + (((2 * ry + 2) * w 
+ x0 - left) << sh);
-if (left) {
-copy_pixel(dst1, src1, sh);
-pos += (1 << sh);
-}
-memcpy(dst1 + pos, src1

[FFmpeg-cvslog] avcodec/vvcdec: refact, unify ff_vvc_deblock_{horizontal, vertical}

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Thu Jun 13 22:04:10 
2024 +0800| [5b9320b209c727ab2df3e76f77aad676f986c8e4] | committer: Nuo Mi

avcodec/vvcdec: refact, unify ff_vvc_deblock_{horizontal, vertical}

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5b9320b209c727ab2df3e76f77aad676f986c8e4
---

 libavcodec/vvc/filter.c | 153 ++--
 1 file changed, 44 insertions(+), 109 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 82a58a7ea8..89b794195e 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -712,144 +712,79 @@ static int get_qp(const VVCFrameContext *fc, const 
uint8_t *src, const int x, co
 return get_qp_c(fc, x, y, c_idx, vertical);
 }
 
-void ff_vvc_deblock_vertical(const VVCLocalContext *lc, const int x0, const 
int y0, const int rs)
+static void vvc_deblock(const VVCLocalContext *lc, int x0, int y0, const int 
rs, const int vertical)
 {
-VVCFrameContext *fc = lc->fc;
-const VVCSPS *sps   = fc->ps.sps;
-const int c_end = sps->r->sps_chroma_format_idc ? 
VVC_MAX_SAMPLE_ARRAYS : 1;
-uint8_t *src;
-int x, y, qp;
+VVCFrameContext *fc= lc->fc;
+const VVCSPS *sps  = fc->ps.sps;
+const int c_end= sps->r->sps_chroma_format_idc ? 
VVC_MAX_SAMPLE_ARRAYS : 1;
+const int ctb_size = fc->ps.sps->ctb_size_y;
+const DBParams *params = fc->tab.deblock + rs;
+int x_end  = FFMIN(x0 + ctb_size, fc->ps.pps->width);
+int y_end  = FFMIN(y0 + ctb_size, fc->ps.pps->height);
 
 //not use this yet, may needed by plt.
-const uint8_t no_p[4] = { 0 };
-const uint8_t no_q[4] = { 0 } ;
-
-const int ctb_log2_size_y = fc->ps.sps->ctb_log2_size_y;
-int x_end, y_end;
-const int ctb_size = 1 << ctb_log2_size_y;
-const DBParams *params = fc->tab.deblock + rs;
+const uint8_t no_p[4]  = { 0 };
+const uint8_t no_q[4]  = { 0 } ;
 
-vvc_deblock_bs(lc, x0, y0, rs, 1);
+vvc_deblock_bs(lc, x0, y0, rs, vertical);
 
-x_end = x0 + ctb_size;
-if (x_end > fc->ps.pps->width)
-x_end = fc->ps.pps->width;
-y_end = y0 + ctb_size;
-if (y_end > fc->ps.pps->height)
-y_end = fc->ps.pps->height;
+if (!vertical) {
+FFSWAP(int, x_end, y_end);
+FFSWAP(int, x0, y0);
+}
 
 for (int c_idx = 0; c_idx < c_end; c_idx++) {
-const int hs  = sps->hshift[c_idx];
-const int vs  = sps->vshift[c_idx];
+const int hs  = (vertical ? sps->hshift : sps->vshift)[c_idx];
+const int vs  = (vertical ? sps->vshift : sps->hshift)[c_idx];
 const int grid= c_idx ? (CHROMA_GRID << hs) : LUMA_GRID;
 const int tc_offset   = params->tc_offset[c_idx];
 const int beta_offset = params->beta_offset[c_idx];
+const int src_stride  = fc->frame->linesize[c_idx];
 
-for (y = y0; y < y_end; y += (DEBLOCK_STEP << vs)) {
-for (x = x0 ? x0 : grid; x < x_end; x += grid) {
-int32_t bs[4], beta[4], tc[4], all_zero_bs = 1;
+for (int y = y0; y < y_end; y += (DEBLOCK_STEP << vs)) {
+for (int x = x0 ? x0 : grid; x < x_end; x += grid) {
+const uint8_t horizontal_ctu_edge = !vertical && !(x % 
ctb_size);
+int32_t bs[4], beta[4], tc[4] = { }, all_zero_bs = 1;
 uint8_t max_len_p[4], max_len_q[4];
 
 for (int i = 0; i < DEBLOCK_STEP >> (2 - vs); i++) {
-const int dy = i << 2;
-bs[i] = (y + dy < y_end) ? TAB_BS(fc->tab.bs[1][c_idx], x, 
y + dy) : 0;
-if (bs[i]) {
-src = POS(c_idx, x, y + dy);
-qp = get_qp(fc, src, x, y + dy, c_idx, 1);
+int tx = x;
+int ty = y + (i << 2);
+const int end  = ty >= y_end;
 
-beta[i] = betatable[av_clip(qp + beta_offset, 0, 
MAX_QP)];
+if (!vertical)
+FFSWAP(int, tx, ty);
 
-max_filter_length(fc, x, y + dy, c_idx, 1, 0, bs[i], 
&max_len_p[i], &max_len_q[i]);
+bs[i] = end ? 0 : TAB_BS(fc->tab.bs[vertical][c_idx], tx, 
ty);
+if (bs[i]) {
+const int qp = get_qp(fc, POS(c_idx, tx, ty), tx, ty, 
c_idx, vertical);
+beta[i] = betatable[av_clip(qp + beta_offset, 0, 
MAX_QP)];
+tc[i] = TC_CALC(qp, bs[i]) ;
+max_filter_length(fc, tx, ty, c_idx, vertical, 
horizontal_ctu_edge, bs[i], &max_len_p[i], &max_len_q[i]);
 all_zero_bs = 0;
 }
-tc[i] = bs[i] ? TC_CALC(qp, bs[i]) : 0;
 }
 
 if (!all_zero_bs) {
-src = POS(c_idx, x, y);
-if (!c_idx) {

[FFmpeg-cvslog] avcodec/vvcdec: refact, unify {horizontal, vertical}_bs, {horizontal, vertical}_p, {horizontal, vertical}_q

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Tue Jun 11 21:42:00 
2024 +0800| [a6ea542a34b449d9d59771e1b5d108617df51f34] | committer: Nuo Mi

avcodec/vvcdec: refact, unify {horizontal, vertical}_bs, {horizontal, 
vertical}_p, {horizontal, vertical}_q

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a6ea542a34b449d9d59771e1b5d108617df51f34
---

 libavcodec/vvc/dec.c| 12 +---
 libavcodec/vvc/dec.h|  9 +++--
 libavcodec/vvc/filter.c | 32 +++-
 3 files changed, 19 insertions(+), 34 deletions(-)

diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
index f5603306f3..a8a9a7c99d 100644
--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@@ -191,14 +191,12 @@ static void bs_tl_init(TabList *l, VVCFrameContext *fc)
 
 tl_init(l, 1, changed);
 
-for (int i = 0; i < VVC_MAX_SAMPLE_ARRAYS; i++) {
-TL_ADD(horizontal_bs[i], bs_count);
-TL_ADD(vertical_bs[i],   bs_count);
+for (int i = 0; i < 2; i++) {
+for (int j = 0; j < VVC_MAX_SAMPLE_ARRAYS; j++)
+TL_ADD(bs[i][j], bs_count);
+TL_ADD(max_len_p[i], bs_count);
+TL_ADD(max_len_q[i], bs_count);
 }
-TL_ADD(horizontal_q, bs_count);
-TL_ADD(horizontal_p, bs_count);
-TL_ADD(vertical_p,   bs_count);
-TL_ADD(vertical_q,   bs_count);
 }
 
 static void pixel_buffer_nz_tl_init(TabList *l, VVCFrameContext *fc)
diff --git a/libavcodec/vvc/dec.h b/libavcodec/vvc/dec.h
index 1e0b76f283..a8492f1398 100644
--- a/libavcodec/vvc/dec.h
+++ b/libavcodec/vvc/dec.h
@@ -178,12 +178,9 @@ typedef struct VVCFrameContext {
 uint8_t *tb_height[2];
 uint8_t *pcmf[2];
 
-uint8_t *horizontal_bs[VVC_MAX_SAMPLE_ARRAYS];
-uint8_t *vertical_bs[VVC_MAX_SAMPLE_ARRAYS];
-uint8_t *horizontal_p;  ///< horizontal 
maxFilterLengthPs for luma
-uint8_t *horizontal_q;  ///< horizontal 
maxFilterLengthQs for luma
-uint8_t *vertical_p;///< vertical   
maxFilterLengthPs for luma
-uint8_t *vertical_q;///< vertical   
maxFilterLengthQs for luma
+uint8_t *bs[2][VVC_MAX_SAMPLE_ARRAYS];  ///< horizontal, 
vertical boundary filtering strength
+uint8_t *max_len_p[2];  ///< horizontal, 
vertical maxFilterLengthPs for luma
+uint8_t *max_len_q[2];  ///< horizontal, 
vertical maxFilterLengthQs for luma
 
 uint8_t *sao_pixel_buffer_h[VVC_MAX_SAMPLE_ARRAYS];
 uint8_t *sao_pixel_buffer_v[VVC_MAX_SAMPLE_ARRAYS];
diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 06e1717b13..7ae36b2344 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -414,9 +414,6 @@ static void vvc_deblock_subblock_bs(const VVCLocalContext 
*lc,
 const RefPicList *rpl  = lc->sc->rpl;
 int stridea= fc->ps.pps->min_pu_width;
 int strideb= 1;
-uint8_t *tab_bs= vertical ? fc->tab.vertical_bs[LUMA] : 
fc->tab.horizontal_bs[LUMA];
-uint8_t *tab_max_len_p = vertical ? fc->tab.vertical_p : 
fc->tab.horizontal_p;
-uint8_t *tab_max_len_q = vertical ? fc->tab.vertical_q : 
fc->tab.horizontal_q;
 const int log2_min_pu_size = MIN_PU_LOG2;
 
 if (!vertical) {
@@ -442,7 +439,7 @@ static void vvc_deblock_subblock_bs(const VVCLocalContext 
*lc,
 if (!vertical)
 FFSWAP(int, x, y);
 
-TAB_BS(tab_bs, x, y) = bs;
+TAB_BS(fc->tab.bs[vertical][LUMA], x, y) = bs;
 
 if (i == 4 || i == width - 4)
 max_len_p = max_len_q = 1;
@@ -451,8 +448,8 @@ static void vvc_deblock_subblock_bs(const VVCLocalContext 
*lc,
 else
 max_len_p = max_len_q = 3;
 
-TAB_MAX_LEN(tab_max_len_p, x, y) = max_len_p;
-TAB_MAX_LEN(tab_max_len_q, x, y) = max_len_q;
+TAB_MAX_LEN(fc->tab.max_len_p[vertical], x, y) = max_len_p;
+TAB_MAX_LEN(fc->tab.max_len_q[vertical], x, y) = max_len_q;
 }
 }
 }
@@ -562,9 +559,6 @@ static void vvc_deblock_bs_luma(const VVCLocalContext *lc,
 const int flag  = vertical ? BOUNDARY_LEFT_SLICE : 
BOUNDARY_UPPER_SLICE;
 const RefPicList *rpl_p =
 (lc->boundary_flags & flag) ? ff_vvc_get_ref_list(fc, fc->ref, x0 
- vertical, y0 - !vertical) : lc->sc->rpl;
-uint8_t *tab_bs = vertical ? fc->tab.vertical_bs[LUMA] : 
fc->tab.horizontal_bs[LUMA];
-uint8_t *tab_max_len_p  = vertical ? fc->tab.vertical_p : 
fc->tab.horizontal_p;
-uint8_t *tab_max_len_q  = vertical ? fc->tab.vertical_q : 
fc->tab.horizontal_q;
 
 for (int i = 0; i < size; i += 4) {
 const int x = x0 + i * !vertical;
@@ -572,11 +566,11 @@ static void vvc_deblock_bs_luma(const VVCLocalContext *lc,
 uint8_t max_len_p, max_len_q;
 const int bs = 

[FFmpeg-cvslog] avcodec/vvcdec: refact, fix naming convention of x0, y0 for sao

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sun Jun 16 14:37:13 
2024 +0800| [a69b07dc798d21565c367b14a528299878e1fec1] | committer: Nuo Mi

avcodec/vvcdec: refact, fix naming convention of x0, y0 for sao

it's mismatched with the ff_vvc_sao_filter function declaration

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a69b07dc798d21565c367b14a528299878e1fec1
---

 libavcodec/vvc/filter.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 534ba57205..10d11ce31f 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -237,10 +237,12 @@ static void sao_copy_hor(uint8_t *dst, const ptrdiff_t 
dst_stride,
 
 static void sao_extends_edges(uint8_t *dst, const ptrdiff_t dst_stride,
 const uint8_t *src, const ptrdiff_t src_stride, const int width, const int 
height,
-const VVCFrameContext *fc, const int x, const int y, const int rx, const 
int ry, const int edges[4], const int c_idx)
+const VVCFrameContext *fc, const int x0, const int y0, const int rx, const 
int ry, const int edges[4], const int c_idx)
 {
 const uint8_t *sao_h = fc->tab.sao_pixel_buffer_h[c_idx];
 const uint8_t *sao_v = fc->tab.sao_pixel_buffer_v[c_idx];
+const int x  = x0 >> fc->ps.sps->hshift[c_idx];
+const int y  = y0 >> fc->ps.sps->vshift[c_idx];
 const int w  = fc->ps.pps->width >> fc->ps.sps->hshift[c_idx];
 const int h  = fc->ps.pps->height >> fc->ps.sps->vshift[c_idx];
 const int ps = fc->ps.sps->pixel_shift;
@@ -260,14 +262,14 @@ static void sao_extends_edges(uint8_t *dst, const 
ptrdiff_t dst_stride,
 copy_ctb(dst, src, width << ps, height, dst_stride, src_stride);
 }
 
-void ff_vvc_sao_filter(VVCLocalContext *lc, int x, int y)
+void ff_vvc_sao_filter(VVCLocalContext *lc, int x0, int y0)
 {
 VVCFrameContext *fc  = lc->fc;
-const int ctb_size_y = fc->ps.sps->ctb_size_y;
+const VVCSPS *sps= fc->ps.sps;
 static const uint8_t sao_tab[16] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 
7, 7, 8, 8 };
 int c_idx, restore;
-const int rx = x >> fc->ps.sps->ctb_log2_size_y;
-const int ry = y >> fc->ps.sps->ctb_log2_size_y;
+const int rx = x0 >> sps->ctb_log2_size_y;
+const int ry = y0 >> sps->ctb_log2_size_y;
 int edges[4] = { !rx, !ry, rx == fc->ps.pps->ctb_width - 1, ry == 
fc->ps.pps->ctb_height - 1 };
 const SAOParams *sao = &CTB(fc->tab.sao, rx, ry);
 // flags indicating unfilterable edges
@@ -277,16 +279,12 @@ void ff_vvc_sao_filter(VVCLocalContext *lc, int x, int y)
 
 sao_get_edges(vert_edge, horiz_edge, diag_edge, &restore, lc, edges, rx, 
ry);
 
-for (c_idx = 0; c_idx < (fc->ps.sps->r->sps_chroma_format_idc ? 3 : 1); 
c_idx++) {
-int x0   = x >> fc->ps.sps->hshift[c_idx];
-int y0   = y >> fc->ps.sps->vshift[c_idx];
+for (c_idx = 0; c_idx < (sps->r->sps_chroma_format_idc ? 3 : 1); c_idx++) {
 ptrdiff_t src_stride = fc->frame->linesize[c_idx];
-int ctb_size_h = ctb_size_y >> fc->ps.sps->hshift[c_idx];
-int ctb_size_v = ctb_size_y >> fc->ps.sps->vshift[c_idx];
-int width= FFMIN(ctb_size_h, (fc->ps.pps->width  >> 
fc->ps.sps->hshift[c_idx]) - x0);
-int height   = FFMIN(ctb_size_v, (fc->ps.pps->height >> 
fc->ps.sps->vshift[c_idx]) - y0);
+const int width  = FFMIN(sps->ctb_size_y, fc->ps.pps->width - x0) >> 
sps->hshift[c_idx];
+const int height = FFMIN(sps->ctb_size_y, fc->ps.pps->height - y0) >> 
sps->vshift[c_idx];
 int tab  = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
-uint8_t *src = POS(c_idx, x, y);
+uint8_t *src = POS(c_idx, x0, y0);
 
 switch (sao->type_idx[c_idx]) {
 case SAO_BAND:

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/vvcdec: misc, reformat ff_vvc_sao_filter

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sun Jun 16 14:40:24 
2024 +0800| [45d0ba8313368b8ce7dbe3ba991edeb2b93d895a] | committer: Nuo Mi

avcodec/vvcdec: misc, reformat ff_vvc_sao_filter

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=45d0ba8313368b8ce7dbe3ba991edeb2b93d895a
---

 libavcodec/vvc/filter.c | 52 -
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 10d11ce31f..3aa241ad90 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -266,44 +266,44 @@ void ff_vvc_sao_filter(VVCLocalContext *lc, int x0, int 
y0)
 {
 VVCFrameContext *fc  = lc->fc;
 const VVCSPS *sps= fc->ps.sps;
-static const uint8_t sao_tab[16] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 
7, 7, 8, 8 };
-int c_idx, restore;
 const int rx = x0 >> sps->ctb_log2_size_y;
 const int ry = y0 >> sps->ctb_log2_size_y;
-int edges[4] = { !rx, !ry, rx == fc->ps.pps->ctb_width - 1, ry == 
fc->ps.pps->ctb_height - 1 };
+const int edges[4]   = { !rx, !ry, rx == fc->ps.pps->ctb_width - 1, ry == 
fc->ps.pps->ctb_height - 1 };
 const SAOParams *sao = &CTB(fc->tab.sao, rx, ry);
 // flags indicating unfilterable edges
 uint8_t vert_edge[]  = { 0, 0 };
 uint8_t horiz_edge[] = { 0, 0 };
 uint8_t diag_edge[]  = { 0, 0, 0, 0 };
+int restore;
 
 sao_get_edges(vert_edge, horiz_edge, diag_edge, &restore, lc, edges, rx, 
ry);
 
-for (c_idx = 0; c_idx < (sps->r->sps_chroma_format_idc ? 3 : 1); c_idx++) {
-ptrdiff_t src_stride = fc->frame->linesize[c_idx];
-const int width  = FFMIN(sps->ctb_size_y, fc->ps.pps->width - x0) >> 
sps->hshift[c_idx];
-const int height = FFMIN(sps->ctb_size_y, fc->ps.pps->height - y0) >> 
sps->vshift[c_idx];
-int tab  = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
-uint8_t *src = POS(c_idx, x0, y0);
+for (int c_idx = 0; c_idx < (sps->r->sps_chroma_format_idc ? 3 : 1); 
c_idx++) {
+static const uint8_t sao_tab[16] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 
6, 7, 7, 8, 8 };
+const ptrdiff_t src_stride   = fc->frame->linesize[c_idx];
+uint8_t *src = POS(c_idx, x0, y0);
+const int width  = FFMIN(sps->ctb_size_y, 
fc->ps.pps->width - x0) >> sps->hshift[c_idx];
+const int height = FFMIN(sps->ctb_size_y, 
fc->ps.pps->height - y0) >> sps->vshift[c_idx];
+const int tab= sao_tab[(FFALIGN(width, 8) >> 3) - 
1];
 
 switch (sao->type_idx[c_idx]) {
-case SAO_BAND:
-fc->vvcdsp.sao.band_filter[tab](src, src, src_stride, src_stride,
-sao->offset_val[c_idx], sao->band_position[c_idx], width, 
height);
-break;
-case SAO_EDGE:
-{
-const ptrdiff_t dst_stride = 2 * MAX_PB_SIZE + 
AV_INPUT_BUFFER_PADDING_SIZE;
-uint8_t *dst   = lc->sao_buffer + dst_stride + 
AV_INPUT_BUFFER_PADDING_SIZE;
-
-sao_extends_edges(dst, dst_stride, src, src_stride, width, height, 
fc, x0, y0, rx, ry, edges, c_idx);
-
-fc->vvcdsp.sao.edge_filter[tab](src, dst, src_stride, 
sao->offset_val[c_idx],
-sao->eo_class[c_idx], width, height);
-fc->vvcdsp.sao.edge_restore[restore](src, dst, src_stride, 
dst_stride,
-sao, edges, width, height, c_idx, vert_edge, horiz_edge, 
diag_edge);
-break;
-}
+case SAO_BAND:
+fc->vvcdsp.sao.band_filter[tab](src, src, src_stride, 
src_stride,
+sao->offset_val[c_idx], sao->band_position[c_idx], width, 
height);
+break;
+case SAO_EDGE:
+{
+const ptrdiff_t dst_stride = 2 * MAX_PB_SIZE + 
AV_INPUT_BUFFER_PADDING_SIZE;
+uint8_t *dst   = lc->sao_buffer + dst_stride + 
AV_INPUT_BUFFER_PADDING_SIZE;
+
+sao_extends_edges(dst, dst_stride, src, src_stride, width, 
height, fc, x0, y0, rx, ry, edges, c_idx);
+
+fc->vvcdsp.sao.edge_filter[tab](src, dst, src_stride, 
sao->offset_val[c_idx],
+sao->eo_class[c_idx], width, height);
+fc->vvcdsp.sao.edge_restore[restore](src, dst, src_stride, 
dst_stride,
+sao, edges, width, height, c_idx, vert_edge, horiz_edge, 
diag_edge);
+break;
+}
 }
 }
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/vvcdec: misc, constify ALFParams

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Mon Jun 17 05:26:23 
2024 -0400| [fc731d8b39dd6d17431260c66cc15f49a25181f1] | committer: Nuo Mi

avcodec/vvcdec: misc, constify ALFParams

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fc731d8b39dd6d17431260c66cc15f49a25181f1
---

 libavcodec/vvc/filter.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index e6c0e89989..26974eae2b 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -926,7 +926,7 @@ static void alf_prepare_buffer(VVCFrameContext *fc, uint8_t 
*_dst, const uint8_t
 #define ALF_MAX_FILTER_SIZE (ALF_MAX_BLOCKS_IN_CTU * ALF_NUM_COEFF_LUMA)
 
 static void alf_get_coeff_and_clip(VVCLocalContext *lc, int16_t *coeff, 
int16_t *clip,
-const uint8_t *src, ptrdiff_t src_stride, int width, int height, int 
vb_pos, ALFParams *alf)
+const uint8_t *src, ptrdiff_t src_stride, int width, int height, int 
vb_pos, const ALFParams *alf)
 {
 const VVCFrameContext *fc = lc->fc;
 const H266RawSliceHeader *rsh = lc->sc->sh.r;
@@ -957,7 +957,7 @@ static void alf_get_coeff_and_clip(VVCLocalContext *lc, 
int16_t *coeff, int16_t
 
 static void alf_filter_luma(VVCLocalContext *lc, uint8_t *dst, const uint8_t 
*src,
 const ptrdiff_t dst_stride, const ptrdiff_t src_stride, const int x0, 
const int y0,
-const int width, const int height, const int _vb_pos, ALFParams *alf)
+const int width, const int height, const int _vb_pos, const ALFParams *alf)
 {
 const VVCFrameContext *fc = lc->fc;
 int vb_pos= _vb_pos - y0;
@@ -981,7 +981,7 @@ static int alf_clip_from_idx(const VVCFrameContext *fc, 
const int idx)
 
 static void alf_filter_chroma(VVCLocalContext *lc, uint8_t *dst, const uint8_t 
*src,
 const ptrdiff_t dst_stride, const ptrdiff_t src_stride, const int c_idx,
-const int width, const int height, const int vb_pos, ALFParams *alf)
+const int width, const int height, const int vb_pos, const ALFParams *alf)
 {
 VVCFrameContext *fc   = lc->fc;
 const H266RawSliceHeader *rsh = lc->sc->sh.r;
@@ -998,7 +998,7 @@ static void alf_filter_chroma(VVCLocalContext *lc, uint8_t 
*dst, const uint8_t *
 
 static void alf_filter_cc(VVCLocalContext *lc, uint8_t *dst, const uint8_t 
*luma,
 const ptrdiff_t dst_stride, const ptrdiff_t luma_stride, const int c_idx,
-const int width, const int height, const int hs, const int vs, const int 
vb_pos, ALFParams *alf)
+const int width, const int height, const int hs, const int vs, const int 
vb_pos, const ALFParams *alf)
 {
 const VVCFrameContext *fc = lc->fc;
 const H266RawSliceHeader *rsh = lc->sc->sh.r;
@@ -1079,7 +1079,7 @@ void ff_vvc_alf_filter(VVCLocalContext *lc, const int x0, 
const int y0)
 const int padded_stride = EDGE_EMU_BUFFER_STRIDE << ps;
 const int padded_offset = padded_stride * ALF_PADDING_SIZE + 
(ALF_PADDING_SIZE << ps);
 const int c_end = sps->r->sps_chroma_format_idc ? 
VVC_MAX_SAMPLE_ARRAYS : 1;
-ALFParams *alf  = &CTB(fc->tab.alf, rx, ry);
+const ALFParams *alf= &CTB(fc->tab.alf, rx, ry);
 int edges[MAX_EDGES]= { rx == 0, ry == 0, rx == pps->ctb_width - 1, ry 
== pps->ctb_height - 1 };
 
 alf_get_edges(lc, edges, rx, ry);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/vvcdec: refact out alf_get_edges

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sun Jun 16 21:06:54 
2024 -0400| [98b6b90601ceb5e99918f89c48bc2b7e787f9b37] | committer: Nuo Mi

avcodec/vvcdec: refact out alf_get_edges

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=98b6b90601ceb5e99918f89c48bc2b7e787f9b37
---

 libavcodec/vvc/filter.c | 55 +
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 3aa241ad90..546ec75c65 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -1036,6 +1036,37 @@ void ff_vvc_alf_copy_ctu_to_hv(VVCLocalContext* lc, 
const int x0, const int y0)
 }
 }
 
+static void alf_get_edges(const VVCLocalContext *lc, int edges[MAX_EDGES], 
const int rx, const int ry)
+{
+VVCFrameContext *fc  = lc->fc;
+const VVCSPS *sps= fc->ps.sps;
+const VVCPPS *pps= fc->ps.pps;
+const int subpic_idx = lc->sc->sh.r->curr_subpic_idx;
+
+// we can't use |= instead of || in this function; |= is not a shortcut 
operator
+
+if (!pps->r->pps_loop_filter_across_tiles_enabled_flag) {
+edges[LEFT]   = edges[LEFT]   || (lc->boundary_flags & 
BOUNDARY_LEFT_TILE);
+edges[TOP]= edges[TOP]|| (lc->boundary_flags & 
BOUNDARY_UPPER_TILE);
+edges[RIGHT]  = edges[RIGHT]  || pps->ctb_to_col_bd[rx] != 
pps->ctb_to_col_bd[rx + 1];
+edges[BOTTOM] = edges[BOTTOM] || pps->ctb_to_row_bd[ry] != 
pps->ctb_to_row_bd[ry + 1];
+}
+
+if (!pps->r->pps_loop_filter_across_slices_enabled_flag) {
+edges[LEFT]   = edges[LEFT]   || (lc->boundary_flags & 
BOUNDARY_LEFT_SLICE);
+edges[TOP]= edges[TOP]|| (lc->boundary_flags & 
BOUNDARY_UPPER_SLICE);
+edges[RIGHT]  = edges[RIGHT]  || CTB(fc->tab.slice_idx, rx, ry) != 
CTB(fc->tab.slice_idx, rx + 1, ry);
+edges[BOTTOM] = edges[BOTTOM] || CTB(fc->tab.slice_idx, rx, ry) != 
CTB(fc->tab.slice_idx, rx, ry + 1);
+}
+
+if (!sps->r->sps_loop_filter_across_subpic_enabled_flag[subpic_idx]) {
+edges[LEFT]   = edges[LEFT]   || (lc->boundary_flags & 
BOUNDARY_LEFT_SUBPIC);
+edges[TOP]= edges[TOP]|| (lc->boundary_flags & 
BOUNDARY_UPPER_SUBPIC);
+edges[RIGHT]  = edges[RIGHT]  || 
fc->ps.sps->r->sps_subpic_ctu_top_left_x[subpic_idx] + 
fc->ps.sps->r->sps_subpic_width_minus1[subpic_idx] == rx;
+edges[BOTTOM] = edges[BOTTOM] || 
fc->ps.sps->r->sps_subpic_ctu_top_left_y[subpic_idx] + 
fc->ps.sps->r->sps_subpic_height_minus1[subpic_idx] == ry;
+}
+}
+
 void ff_vvc_alf_filter(VVCLocalContext *lc, const int x0, const int y0)
 {
 VVCFrameContext *fc = lc->fc;
@@ -1047,31 +1078,11 @@ void ff_vvc_alf_filter(VVCLocalContext *lc, const int 
x0, const int y0)
 const int ps= fc->ps.sps->pixel_shift;
 const int padded_stride = EDGE_EMU_BUFFER_STRIDE << ps;
 const int padded_offset = padded_stride * ALF_PADDING_SIZE + 
(ALF_PADDING_SIZE << ps);
-const int c_end = fc->ps.sps->r->sps_chroma_format_idc ? 
VVC_MAX_SAMPLE_ARRAYS : 1;
-const int subpic_idx= lc->sc->sh.r->curr_subpic_idx;
+const int c_end = sps->r->sps_chroma_format_idc ? 
VVC_MAX_SAMPLE_ARRAYS : 1;
 ALFParams *alf  = &CTB(fc->tab.alf, rx, ry);
 int edges[MAX_EDGES]= { rx == 0, ry == 0, rx == pps->ctb_width - 1, ry 
== pps->ctb_height - 1 };
 
-if (!pps->r->pps_loop_filter_across_tiles_enabled_flag) {
-edges[LEFT]   = edges[LEFT] || (lc->boundary_flags & 
BOUNDARY_LEFT_TILE);
-edges[TOP]= edges[TOP] || (lc->boundary_flags & 
BOUNDARY_UPPER_TILE);
-edges[RIGHT]  = edges[RIGHT] || pps->ctb_to_col_bd[rx] != 
pps->ctb_to_col_bd[rx + 1];
-edges[BOTTOM] = edges[BOTTOM] || pps->ctb_to_row_bd[ry] != 
pps->ctb_to_row_bd[ry + 1];
-}
-
-if (!pps->r->pps_loop_filter_across_slices_enabled_flag) {
-edges[LEFT]   = edges[LEFT] || (lc->boundary_flags & 
BOUNDARY_LEFT_SLICE);
-edges[TOP]= edges[TOP] || (lc->boundary_flags & 
BOUNDARY_UPPER_SLICE);
-edges[RIGHT]  = edges[RIGHT] || CTB(fc->tab.slice_idx, rx, ry) != 
CTB(fc->tab.slice_idx, rx + 1, ry);
-edges[BOTTOM] = edges[BOTTOM] || CTB(fc->tab.slice_idx, rx, ry) != 
CTB(fc->tab.slice_idx, rx, ry + 1);
-}
-
-if (!sps->r->sps_loop_filter_across_subpic_enabled_flag[subpic_idx]) {
-edges[LEFT]   = edges[LEFT] || (lc->boundary_flags & 
BOUNDARY_LEFT_SUBPIC);
-edges[TOP]= edges[TOP] || (lc->boundary_flags & 
BOUNDARY_UPPER_SUBPIC);
-edges[RIGHT]  = edges[RIGHT] || 
fc->ps.sps->r->sps_subpic_ctu_top_left_x[subpic_idx] + 
fc->ps.sps->r->sps_subpic_width_minus1[subpic_idx] == rx;
-edges[BOTTOM] = edges[BOTTOM] || 
fc->ps.sps->r->sps_subpic_ctu_top_left_y[subpic_idx] + 
fc->ps.sps->r->sps_subpic_height_minus1[subpic_idx] == ry;
-}
+alf_get_edges(lc, edges, rx, ry);
 
 for (int c_idx = 0; c_idx < c_end; c_idx++) {
 const int hs = fc->ps.sps->hshift

[FFmpeg-cvslog] avcodec/vvcdec: misc, remove unused ALFParams.applied

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sun Jun 16 23:44:23 
2024 -0400| [9b532ace0292711c837a801d535ddd029a08e876] | committer: Nuo Mi

avcodec/vvcdec: misc, remove unused ALFParams.applied

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9b532ace0292711c837a801d535ddd029a08e876
---

 libavcodec/vvc/ctu.h| 2 --
 libavcodec/vvc/filter.c | 2 --
 2 files changed, 4 deletions(-)

diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h
index a987328d81..432dbc5ade 100644
--- a/libavcodec/vvc/ctu.h
+++ b/libavcodec/vvc/ctu.h
@@ -461,8 +461,6 @@ typedef struct ALFParams {
 uint8_t ctb_filt_set_idx_y; ///< AlfCtbFiltSetIdxY
 uint8_t alf_ctb_filter_alt_idx[2];  ///< alf_ctb_filter_alt_idx[]
 uint8_t ctb_cc_idc[2];  ///< alf_ctb_cc_cb_idc, 
alf_ctb_cc_cr_idc
-
-uint8_t applied[3];
 } ALFParams;
 
 /**
diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 546ec75c65..e6c0e89989 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -1118,8 +1118,6 @@ void ff_vvc_alf_filter(VVCLocalContext *lc, const int x0, 
const int y0)
 alf_filter_cc(lc, src, padded, src_stride, padded_stride, c_idx,
 width, height, hs, vs, (ctb_size_v << vs) - 
ALF_VB_POS_ABOVE_LUMA, alf);
 }
-
-alf->applied[c_idx] = 1;
 }
 }
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/vvcdec: ps, derive virtual boundaries

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sun Jun  9 14:35:08 
2024 +0800| [80ded9cfe70c103b94a5d384c7e7fd59a0c0d4fb] | committer: Nuo Mi

avcodec/vvcdec: ps, derive virtual boundaries

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=80ded9cfe70c103b94a5d384c7e7fd59a0c0d4fb
---

 libavcodec/vvc/ps.c | 45 +
 libavcodec/vvc/ps.h |  6 ++
 2 files changed, 51 insertions(+)

diff --git a/libavcodec/vvc/ps.c b/libavcodec/vvc/ps.c
index 92368eafc2..58496c9fba 100644
--- a/libavcodec/vvc/ps.c
+++ b/libavcodec/vvc/ps.c
@@ -796,8 +796,49 @@ static int ph_max_num_subblock_merge_cand(const H266RawSPS 
*sps, const H266RawPi
 return sps->sps_sbtmvp_enabled_flag && ph->ph_temporal_mvp_enabled_flag;
 }
 
+static int ph_vb_pos(uint16_t *vbs, uint8_t *num_vbs, const uint16_t 
*pos_minus_1, const uint8_t num_pos, uint16_t max, const int ctb_size_y)
+{
+max = FF_CEIL_RSHIFT(max, 3) - 2;
+for (int i = 0; i < num_pos; i++) {
+if (pos_minus_1[i] > max)
+return AVERROR_INVALIDDATA;
+
+vbs[i] = (pos_minus_1[i] + 1) << 3;
+
+// The distance between any two vertical virtual boundaries shall be 
greater than or equal to CtbSizeY luma samples
+if (i && vbs[i] < vbs[i - 1] + ctb_size_y)
+return AVERROR_INVALIDDATA;
+}
+*num_vbs = num_pos;
+
+return 0;
+}
+
+#define VBF(f) (sps->sps_virtual_boundaries_present_flag ? sps->sps_##f : 
ph->r->ph_##f)
+#define VBFS(c, d) VBF(virtual_boundary_pos_##c##_minus1), 
VBF(num_##d##_virtual_boundaries)
+
+static int ph_vb(VVCPH *ph, const H266RawSPS *sps, const H266RawPPS *pps)
+{
+const int ctb_size_y = 1 << (sps->sps_log2_ctu_size_minus5 + 5);
+int ret;
+
+if (!sps->sps_virtual_boundaries_enabled_flag)
+return 0;
+
+ret = ph_vb_pos(ph->vb_pos_x, &ph->num_ver_vbs, VBFS(x, ver), 
pps->pps_pic_width_in_luma_samples, ctb_size_y);
+if (ret < 0)
+return ret;
+
+ret = ph_vb_pos(ph->vb_pos_y, &ph->num_hor_vbs, VBFS(y, hor), 
pps->pps_pic_height_in_luma_samples, ctb_size_y);
+if (ret < 0)
+return ret;
+
+return 0;
+}
+
 static int ph_derive(VVCPH *ph, const H266RawSPS *sps, const H266RawPPS *pps, 
const int poc_tid0, const int is_clvss)
 {
+int ret;
 ph->max_num_subblock_merge_cand = ph_max_num_subblock_merge_cand(sps, 
ph->r);
 
 ph->poc = ph_compute_poc(ph->r, sps, poc_tid0, is_clvss);
@@ -805,6 +846,10 @@ static int ph_derive(VVCPH *ph, const H266RawSPS *sps, 
const H266RawPPS *pps, co
 if (pps->pps_wp_info_in_ph_flag)
 pred_weight_table(&ph->pwt, &ph->r->ph_pred_weight_table);
 
+ret = ph_vb(ph, sps, pps);
+if (ret < 0)
+return ret;
+
 return 0;
 }
 
diff --git a/libavcodec/vvc/ps.h b/libavcodec/vvc/ps.h
index 6656a06320..9203e2c57f 100644
--- a/libavcodec/vvc/ps.h
+++ b/libavcodec/vvc/ps.h
@@ -151,6 +151,12 @@ typedef struct VVCPH {
 //derived values
 uint32_t max_num_subblock_merge_cand;   ///< 
MaxNumSubblockMergeCand
 int32_t  poc;   ///< PicOrderCntVal
+
+uint8_t  num_ver_vbs;   ///< 
NumVerVirtualBoundaries
+uint16_t vb_pos_x[VVC_MAX_VBS]; ///< VirtualBoundaryPosX
+uint8_t  num_hor_vbs;   ///< 
NumHorVirtualBoundaries
+uint16_t vb_pos_y[VVC_MAX_VBS]; ///< VirtualBoundaryPosY
+
 PredWeightTable pwt;
 } VVCPH;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] cbs_h266: add VVC_MAX_VBS for max num of virtual boundaries

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sun Jun  9 12:28:16 
2024 +0800| [8b3d1f1fb3791bfb240e54a07a66c7f9dc172304] | committer: Nuo Mi

cbs_h266: add VVC_MAX_VBS for max num of virtual boundaries

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8b3d1f1fb3791bfb240e54a07a66c7f9dc172304
---

 libavcodec/cbs_h266.h | 8 
 libavcodec/cbs_h266_syntax_template.c | 8 
 libavcodec/vvc.h  | 3 +++
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/libavcodec/cbs_h266.h b/libavcodec/cbs_h266.h
index 21b9a4196c..5f12915b65 100644
--- a/libavcodec/cbs_h266.h
+++ b/libavcodec/cbs_h266.h
@@ -464,9 +464,9 @@ typedef struct H266RawSPS {
 uint8_t  sps_virtual_boundaries_enabled_flag;
 uint8_t  sps_virtual_boundaries_present_flag;
 uint8_t  sps_num_ver_virtual_boundaries;
-uint16_t sps_virtual_boundary_pos_x_minus1[3];
+uint16_t sps_virtual_boundary_pos_x_minus1[VVC_MAX_VBS];
 uint8_t  sps_num_hor_virtual_boundaries;
-uint16_t sps_virtual_boundary_pos_y_minus1[3];
+uint16_t sps_virtual_boundary_pos_y_minus1[VVC_MAX_VBS];
 
 uint8_t  sps_timing_hrd_params_present_flag;
 uint8_t  sps_sublayer_cpb_params_present_flag;
@@ -703,9 +703,9 @@ typedef struct  H266RawPictureHeader {
 
 uint8_t  ph_virtual_boundaries_present_flag;
 uint8_t  ph_num_ver_virtual_boundaries;
-uint16_t ph_virtual_boundary_pos_x_minus1[3];
+uint16_t ph_virtual_boundary_pos_x_minus1[VVC_MAX_VBS];
 uint8_t  ph_num_hor_virtual_boundaries;
-uint16_t ph_virtual_boundary_pos_y_minus1[3];
+uint16_t ph_virtual_boundary_pos_y_minus1[VVC_MAX_VBS];
 
 uint8_t  ph_pic_output_flag;
 H266RefPicLists ph_ref_pic_lists;
diff --git a/libavcodec/cbs_h266_syntax_template.c 
b/libavcodec/cbs_h266_syntax_template.c
index 53c4b60b0d..34b766c7af 100644
--- a/libavcodec/cbs_h266_syntax_template.c
+++ b/libavcodec/cbs_h266_syntax_template.c
@@ -1562,13 +1562,13 @@ static int FUNC(sps)(CodedBitstreamContext *ctx, 
RWContext *rw,
 flag(sps_virtual_boundaries_present_flag);
 if (current->sps_virtual_boundaries_present_flag) {
 ue(sps_num_ver_virtual_boundaries,
-   0, current->sps_pic_width_max_in_luma_samples <= 8 ? 0 : 3);
+   0, current->sps_pic_width_max_in_luma_samples <= 8 ? 0 : 
VVC_MAX_VBS);
 for (i = 0; i < current->sps_num_ver_virtual_boundaries; i++)
 ues(sps_virtual_boundary_pos_x_minus1[i],
 0, (current->sps_pic_width_max_in_luma_samples + 7) / 8 - 
2,
 1, i);
 ue(sps_num_hor_virtual_boundaries,
-   0, current->sps_pic_height_max_in_luma_samples <= 8 ? 0 : 3);
+   0, current->sps_pic_height_max_in_luma_samples <= 8 ? 0 : 
VVC_MAX_VBS);
 for (i = 0; i < current->sps_num_hor_virtual_boundaries; i++)
 ues(sps_virtual_boundary_pos_y_minus1[i],
 0, (current->sps_pic_height_max_in_luma_samples + 7) /
@@ -2714,13 +2714,13 @@ static int FUNC(picture_header) (CodedBitstreamContext 
*ctx, RWContext *rw,
 flag(ph_virtual_boundaries_present_flag);
 if (current->ph_virtual_boundaries_present_flag) {
 ue(ph_num_ver_virtual_boundaries,
-   0, pps->pps_pic_width_in_luma_samples <= 8 ? 0 : 3);
+   0, pps->pps_pic_width_in_luma_samples <= 8 ? 0 : VVC_MAX_VBS);
 for (i = 0; i < current->ph_num_ver_virtual_boundaries; i++) {
 ues(ph_virtual_boundary_pos_x_minus1[i],
 0, (pps->pps_pic_width_in_luma_samples + 7) / 8 - 2, 1, i);
 }
 ue(ph_num_hor_virtual_boundaries,
-   0, pps->pps_pic_height_in_luma_samples <= 8 ? 0 : 3);
+   0, pps->pps_pic_height_in_luma_samples <= 8 ? 0 : VVC_MAX_VBS);
 for (i = 0; i < current->ph_num_hor_virtual_boundaries; i++) {
 ues(ph_virtual_boundary_pos_y_minus1[i],
 0, (pps->pps_pic_height_in_luma_samples + 7) / 8 - 2, 1, 
i);
diff --git a/libavcodec/vvc.h b/libavcodec/vvc.h
index c4cec1eb8f..92639779c1 100644
--- a/libavcodec/vvc.h
+++ b/libavcodec/vvc.h
@@ -151,6 +151,9 @@ enum {
 // get near that, though, so set a lower limit here with the maximum
 // possible value for 8K video (at most 135 32x32 Ctb rows).
 VVC_MAX_ENTRY_POINTS = VVC_MAX_TILE_COLUMNS * 135,
+
+// {sps, ph}_num_{ver, hor}_virtual_boundaries should in [0, 3]
+VVC_MAX_VBS = 3,
 };
 
 #endif /* AVCODEC_VVC_H */

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/vvcdec: deblock, support virtual boundaries

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sat Jun 15 10:44:30 
2024 +0800| [17c8b637819e9c82f832931f50cc5a2b45a1428b] | committer: Nuo Mi

avcodec/vvcdec: deblock, support virtual boundaries

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=17c8b637819e9c82f832931f50cc5a2b45a1428b
---

 libavcodec/vvc/filter.c | 34 ++
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 26974eae2b..ee87b8c37b 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -55,6 +55,29 @@ static const uint8_t betatable[64] = {
  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,  78,  80,  82,  84,  86, 
 88,
 };
 
+static int get_virtual_boundary(const VVCFrameContext *fc, const int ctu_pos, 
const int vertical)
+{
+const VVCSPS *sps= fc->ps.sps;
+const VVCPH *ph  = &fc->ps.ph;
+const uint16_t *vbs  = vertical ? ph->vb_pos_x: ph->vb_pos_y;
+const uint8_t nb_vbs = vertical ? ph->num_ver_vbs : ph->num_hor_vbs;
+const int pos= ctu_pos << sps->ctb_log2_size_y;
+
+if (sps->r->sps_virtual_boundaries_enabled_flag) {
+for (int i = 0; i < nb_vbs; i++) {
+const int o = vbs[i] - pos;
+if (o >= 0 && o < sps->ctb_size_y)
+return vbs[i];
+}
+}
+return 0;
+}
+
+static int is_virtual_boundary(const VVCFrameContext *fc, const int pos, const 
int vertical)
+{
+return get_virtual_boundary(fc, pos >> fc->ps.sps->ctb_log2_size_y, 
vertical) == pos;
+}
+
 static int get_qPc(const VVCFrameContext *fc, const int x0, const int y0, 
const int chroma)
 {
 const int x= x0 >> MIN_TU_LOG2;
@@ -429,6 +452,7 @@ static void vvc_deblock_subblock_bs(const VVCLocalContext 
*lc,
 
 // bs for TU internal vertical PU boundaries
 for (int i = 8 - ((x0 - cb) % 8); i < width; i += 8) {
+const int is_vb = is_virtual_boundary(fc, x0 + i, vertical);
 const int xp_pu = (x0 + i - 1) >> log2_min_pu_size;
 const int xq_pu = (x0 + i) >> log2_min_pu_size;
 
@@ -436,7 +460,7 @@ static void vvc_deblock_subblock_bs(const VVCLocalContext 
*lc,
 const int y_pu   = (y0 + j) >> log2_min_pu_size;
 const MvField *mvf_p = &tab_mvf[y_pu * stridea + xp_pu * strideb];
 const MvField *mvf_q = &tab_mvf[y_pu * stridea + xq_pu * strideb];
-const int bs = boundary_strength(lc, mvf_q, mvf_p, rpl);
+const int bs = is_vb ? 0 : boundary_strength(lc, mvf_q, 
mvf_p, rpl);
 int x= x0 + i;
 int y= y0 + j;
 uint8_t max_len_p = 0, max_len_q = 0;
@@ -557,6 +581,7 @@ static void vvc_deblock_bs_luma(const VVCLocalContext *lc,
 (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA;
 
 if (deblock_is_boundary(lc, pos > 0 && !(pos & mask), pos, rs, vertical)) {
+const int is_vb = is_virtual_boundary(fc, pos, vertical);
 const int size  = vertical ? height : width;
 const int off   = cb - pos;
 const int cb_size   = (vertical ? fc->tab.cb_width : 
fc->tab.cb_height)[LUMA][off_q];
@@ -569,7 +594,7 @@ static void vvc_deblock_bs_luma(const VVCLocalContext *lc,
 const int x = x0 + i * !vertical;
 const int y = y0 + i * vertical;
 uint8_t max_len_p, max_len_q;
-const int bs = deblock_bs(lc, x - vertical, y - !vertical, x, y, 
rpl_p, LUMA, off, has_sb);
+const int bs = is_vb ? 0 : deblock_bs(lc, x - vertical, y - 
!vertical, x, y, rpl_p, LUMA, off, has_sb);
 
 TAB_BS(fc->tab.bs[vertical][LUMA], x, y) = bs;
 
@@ -594,13 +619,14 @@ static void vvc_deblock_bs_chroma(const VVCLocalContext 
*lc,
 const int pos = vertical ? x0 : y0;
 
 if (deblock_is_boundary(lc, pos > 0 && !(pos & mask), pos, rs, vertical)) {
-const int size = vertical ? height : width;
+const int is_vb = is_virtual_boundary(fc, pos, vertical);
+const int size  = vertical ? height : width;
 
 for (int c_idx = CB; c_idx <= CR; c_idx++) {
 for (int i = 0; i < size; i += 2) {
 const int x  = x0 + i * !vertical;
 const int y  = y0 + i * vertical;
-const int bs = deblock_bs(lc, x - vertical, y - !vertical, x, 
y, NULL, c_idx, 0, 0);
+const int bs = is_vb ? 0 : deblock_bs(lc, x - vertical, y - 
!vertical, x, y, NULL, c_idx, 0, 0);
 
 TAB_BS(fc->tab.bs[vertical][c_idx], x, y) = bs;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avcodec/vvcdec: sao, support virtual boundaries

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Sun Jun 16 18:18:56 
2024 +0800| [3bac5ba8610e672c77090ff919e5136a6291936e] | committer: Nuo Mi

avcodec/vvcdec: sao, support virtual boundaries

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3bac5ba8610e672c77090ff919e5136a6291936e
---

 libavcodec/vvc/filter.c | 46 ++
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index ee87b8c37b..88a8c03985 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -20,6 +20,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "libavutil/frame.h"
+#include "libavutil/imgutils.h"
 
 #include "ctu.h"
 #include "data.h"
@@ -198,7 +199,7 @@ static void sao_get_edges(uint8_t vert_edge[2], uint8_t 
horiz_edge[2], uint8_t d
 const uint8_t no_subpic_filter = rsps->sps_num_subpics_minus1 && 
!rsps->sps_loop_filter_across_subpic_enabled_flag[subpic_idx];
 uint8_t lf_edge[] = { 0, 0, 0, 0 };
 
-*restore = no_subpic_filter || no_tile_filter || !lfase;
+*restore = no_subpic_filter || no_tile_filter || !lfase || 
rsps->sps_virtual_boundaries_enabled_flag;
 
 if (!*restore)
 return;
@@ -206,21 +207,25 @@ static void sao_get_edges(uint8_t vert_edge[2], uint8_t 
horiz_edge[2], uint8_t d
 if (!edges[LEFT]) {
 lf_edge[LEFT]  = no_tile_filter && pps->ctb_to_col_bd[rx] == rx;
 lf_edge[LEFT] |= no_subpic_filter && 
rsps->sps_subpic_ctu_top_left_x[subpic_idx] == rx;
+lf_edge[LEFT] |= is_virtual_boundary(fc, rx << sps->ctb_log2_size_y, 
1);
 vert_edge[0]   = !sao_can_cross_slices(fc, rx, ry, -1, 0) || 
lf_edge[LEFT];
 }
 if (!edges[RIGHT]) {
 lf_edge[RIGHT]  = no_tile_filter && pps->ctb_to_col_bd[rx] != 
pps->ctb_to_col_bd[rx + 1];
 lf_edge[RIGHT] |= no_subpic_filter && 
rsps->sps_subpic_ctu_top_left_x[subpic_idx] + 
rsps->sps_subpic_width_minus1[subpic_idx] == rx;
+lf_edge[RIGHT] |= is_virtual_boundary(fc, (rx + 1) << 
sps->ctb_log2_size_y, 1);
 vert_edge[1]= !sao_can_cross_slices(fc, rx, ry, 1, 0) || 
lf_edge[RIGHT];
 }
 if (!edges[TOP]) {
 lf_edge[TOP]   = no_tile_filter && pps->ctb_to_row_bd[ry] == ry;
 lf_edge[TOP]  |= no_subpic_filter && 
rsps->sps_subpic_ctu_top_left_y[subpic_idx] == ry;
+lf_edge[TOP]  |= is_virtual_boundary(fc, ry << sps->ctb_log2_size_y, 
0);
 horiz_edge[0]  = !sao_can_cross_slices(fc, rx, ry, 0, -1) || 
lf_edge[TOP];
 }
 if (!edges[BOTTOM]) {
 lf_edge[BOTTOM]  = no_tile_filter && pps->ctb_to_row_bd[ry] != 
pps->ctb_to_row_bd[ry + 1];
 lf_edge[BOTTOM] |= no_subpic_filter && 
rsps->sps_subpic_ctu_top_left_y[subpic_idx] + 
rsps->sps_subpic_height_minus1[subpic_idx] == ry;
+lf_edge[BOTTOM] |= is_virtual_boundary(fc, (ry + 1) << 
sps->ctb_log2_size_y, 0);
 horiz_edge[1]= !sao_can_cross_slices(fc, rx, ry, 0, 1) || 
lf_edge[BOTTOM];
 }
 
@@ -285,6 +290,24 @@ static void sao_extends_edges(uint8_t *dst, const 
ptrdiff_t dst_stride,
 copy_ctb(dst, src, width << ps, height, dst_stride, src_stride);
 }
 
+static void sao_restore_vb(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t 
*src, ptrdiff_t src_stride,
+const int width, const int height, const int vb_pos, const int ps, const 
int vertical)
+{
+int w = 2;
+int h = (vertical ? height : width);
+int dx = vb_pos - 1;
+int dy = 0;
+
+if (!vertical) {
+FFSWAP(int, w, h);
+FFSWAP(int, dx, dy);
+}
+dst += dy * dst_stride +(dx << ps);
+src += dy * src_stride +(dx << ps);
+
+av_image_copy_plane(dst, dst_stride, src, src_stride, w << ps, h);
+}
+
 void ff_vvc_sao_filter(VVCLocalContext *lc, int x0, int y0)
 {
 VVCFrameContext *fc  = lc->fc;
@@ -297,7 +320,12 @@ void ff_vvc_sao_filter(VVCLocalContext *lc, int x0, int y0)
 uint8_t vert_edge[]  = { 0, 0 };
 uint8_t horiz_edge[] = { 0, 0 };
 uint8_t diag_edge[]  = { 0, 0, 0, 0 };
-int restore;
+int restore, vb_x = 0, vb_y = 0;;
+
+if (sps->r->sps_virtual_boundaries_enabled_flag) {
+vb_x = get_virtual_boundary(fc, rx, 1);
+vb_y = get_virtual_boundary(fc, ry, 0);
+}
 
 sao_get_edges(vert_edge, horiz_edge, diag_edge, &restore, lc, edges, rx, 
ry);
 
@@ -305,9 +333,13 @@ void ff_vvc_sao_filter(VVCLocalContext *lc, int x0, int y0)
 static const uint8_t sao_tab[16] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 
6, 7, 7, 8, 8 };
 const ptrdiff_t src_stride   = fc->frame->linesize[c_idx];
 uint8_t *src = POS(c_idx, x0, y0);
-const int width  = FFMIN(sps->ctb_size_y, 
fc->ps.pps->width - x0) >> sps->hshift[c_idx];
-const int height = FFMIN(sps->ctb_size_y, 
fc->ps.pps->height - y0) >> sps->vshift[c_idx];
+const int hs = sps->hshift[c_idx];
+const int vs   

[FFmpeg-cvslog] avcodec/vvcdec: alf, support virtual boundaries

2024-06-25 Thread Nuo Mi
ffmpeg | branch: master | Nuo Mi  | Mon Jun 17 00:40:14 
2024 -0400| [23531c97768f79d55d369e3dd1e4b52acd72f6ec] | committer: Nuo Mi

avcodec/vvcdec: alf, support virtual boundaries

see https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9503377

passed files:
GDR_A_ERICSSON_2.bit
GDR_B_NOKIA_2.bit
GDR_C_NOKIA_2.bit
VIRTUAL_A_MediaTek_3.bit
VIRTUAL_B_MediaTek_3.bit

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=23531c97768f79d55d369e3dd1e4b52acd72f6ec
---

 libavcodec/vvc/ctu.h|   7 +++
 libavcodec/vvc/filter.c | 134 +---
 libavcodec/vvc/inter.c  |   7 ---
 3 files changed, 100 insertions(+), 48 deletions(-)

diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h
index 432dbc5ade..d5c3e8d96f 100644
--- a/libavcodec/vvc/ctu.h
+++ b/libavcodec/vvc/ctu.h
@@ -463,6 +463,13 @@ typedef struct ALFParams {
 uint8_t ctb_cc_idc[2];  ///< alf_ctb_cc_cb_idc, 
alf_ctb_cc_cr_idc
 } ALFParams;
 
+typedef struct VVCRect {
+int l;  // left
+int t;  // top
+int r;  // right
+int b;  // bottom
+} VVCRect;
+
 /**
  * parse a CTU
  * @param lc local context for CTU
diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 88a8c03985..2cadaaaf22 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -56,6 +56,9 @@ static const uint8_t betatable[64] = {
  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,  78,  80,  82,  84,  86, 
 88,
 };
 
+// One vertical and one horizontal virtual boundary in a CTU at most. The CTU 
will be divided into 4 subblocks.
+#define MAX_VBBS 4
+
 static int get_virtual_boundary(const VVCFrameContext *fc, const int ctu_pos, 
const int vertical)
 {
 const VVCSPS *sps= fc->ps.sps;
@@ -1129,58 +1132,107 @@ static void alf_get_edges(const VVCLocalContext *lc, 
int edges[MAX_EDGES], const
 edges[RIGHT]  = edges[RIGHT]  || 
fc->ps.sps->r->sps_subpic_ctu_top_left_x[subpic_idx] + 
fc->ps.sps->r->sps_subpic_width_minus1[subpic_idx] == rx;
 edges[BOTTOM] = edges[BOTTOM] || 
fc->ps.sps->r->sps_subpic_ctu_top_left_y[subpic_idx] + 
fc->ps.sps->r->sps_subpic_height_minus1[subpic_idx] == ry;
 }
+
+if (sps->r->sps_virtual_boundaries_enabled_flag) {
+edges[LEFT]   = edges[LEFT]   || is_virtual_boundary(fc, rx << 
sps->ctb_log2_size_y, 1);
+edges[TOP]= edges[TOP]|| is_virtual_boundary(fc, ry << 
sps->ctb_log2_size_y, 0);
+edges[RIGHT]  = edges[RIGHT]  || is_virtual_boundary(fc, (rx + 1) << 
sps->ctb_log2_size_y, 1);
+edges[BOTTOM] = edges[BOTTOM] || is_virtual_boundary(fc, (ry + 1) << 
sps->ctb_log2_size_y, 0);
+}
+}
+
+static void alf_init_subblock(VVCRect *sb, int sb_edges[MAX_EDGES], const 
VVCRect *b, const int edges[MAX_EDGES])
+{
+*sb = *b;
+memcpy(sb_edges, edges, sizeof(int) * MAX_EDGES);
+}
+
+static void alf_get_subblock(VVCRect *sb, int edges[MAX_EDGES], const int bx, 
const int by, const int vb_pos[2], const int has_vb[2])
+{
+int *pos[] = { &sb->l, &sb->t, &sb->r, &sb->b };
+
+for (int vertical = 0; vertical <= 1; vertical++) {
+if (has_vb[vertical]) {
+const int c = vertical ? (bx ? LEFT : RIGHT) : (by ? TOP : BOTTOM);
+*pos[c] = vb_pos[vertical];
+edges[c]  = 1;
+}
+}
+}
+
+static void alf_get_subblocks(const VVCLocalContext *lc, VVCRect 
sbs[MAX_VBBS], int sb_edges[MAX_VBBS][MAX_EDGES], int *nb_sbs,
+const int x0, const int y0, const int rx, const int ry)
+{
+VVCFrameContext *fc  = lc->fc;
+const VVCSPS *sps= fc->ps.sps;
+const VVCPPS *pps= fc->ps.pps;
+const int ctu_size_y = sps->ctb_size_y;
+const int vb_pos[]   = { get_virtual_boundary(fc, ry, 0),  
get_virtual_boundary(fc, rx, 1) };
+const int has_vb[]   = { vb_pos[0] > y0, vb_pos[1] > x0 };
+const VVCRect b  = { x0, y0, FFMIN(x0 + ctu_size_y, pps->width), 
FFMIN(y0 + ctu_size_y, pps->height) };
+int edges[MAX_EDGES] = { !rx, !ry, rx == pps->ctb_width - 1, ry == 
pps->ctb_height - 1 };
+int i= 0;
+
+alf_get_edges(lc, edges, rx, ry);
+
+for (int by = 0; by <= has_vb[0]; by++) {
+for (int bx = 0; bx <= has_vb[1]; bx++, i++) {
+alf_init_subblock(sbs + i, sb_edges[i], &b, edges);
+alf_get_subblock(sbs + i, sb_edges[i], bx, by, vb_pos, has_vb);
+}
+}
+*nb_sbs = i;
 }
 
 void ff_vvc_alf_filter(VVCLocalContext *lc, const int x0, const int y0)
 {
 VVCFrameContext *fc = lc->fc;
 const VVCSPS *sps   = fc->ps.sps;
-const VVCPPS *pps   = fc->ps.pps;
-const int rx= x0 >> fc->ps.sps->ctb_log2_size_y;
-const int ry= y0 >> fc->ps.sps->ctb_log2_size_y;
-const int ctb_size_y= fc->ps.sps->ctb_size_y;
-const int ps= fc->ps.sps->pixel_shift;
+const int rx= x0 >> sps->ctb_log2_size_y;
+const int 

[FFmpeg-cvslog] aarch64: Add OpenBSD runtime detection of dotprod and i8mm using sysctl

2024-06-25 Thread Brad Smith
ffmpeg | branch: master | Brad Smith  | Sat Jun 22 22:49:14 
2024 -0400| [41190da9e11f536cb590df45ce9839974e5e6777] | committer: Brad Smith

aarch64: Add OpenBSD runtime detection of dotprod and i8mm using sysctl

Signed-off-by: Brad Smith 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=41190da9e11f536cb590df45ce9839974e5e6777
---

 libavutil/aarch64/cpu.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
index 196bdaf6b0..cfa9306663 100644
--- a/libavutil/aarch64/cpu.c
+++ b/libavutil/aarch64/cpu.c
@@ -65,6 +65,44 @@ static int detect_flags(void)
 return flags;
 }
 
+#elif defined(__OpenBSD__)
+#include 
+#include 
+#include 
+#include 
+
+static int detect_flags(void)
+{
+int flags = 0;
+
+#ifdef CPU_ID_AA64ISAR0
+int mib[2];
+uint64_t isar0;
+uint64_t isar1;
+size_t len;
+
+mib[0] = CTL_MACHDEP;
+mib[1] = CPU_ID_AA64ISAR0;
+len = sizeof(isar0);
+if (sysctl(mib, 2, &isar0, &len, NULL, 0) != -1) {
+if (ID_AA64ISAR0_DP(isar0) >= ID_AA64ISAR0_DP_IMPL)
+flags |= AV_CPU_FLAG_DOTPROD;
+}
+
+mib[0] = CTL_MACHDEP;
+mib[1] = CPU_ID_AA64ISAR1;
+len = sizeof(isar1);
+if (sysctl(mib, 2, &isar1, &len, NULL, 0) != -1) {
+#ifdef ID_AA64ISAR1_I8MM_IMPL
+if (ID_AA64ISAR1_I8MM(isar1) >= ID_AA64ISAR1_I8MM_IMPL)
+flags |= AV_CPU_FLAG_I8MM;
+#endif
+}
+#endif
+
+return flags;
+}
+
 #elif defined(_WIN32)
 #include 
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] aarch64: Add OpenBSD runtime detection of dotprod and i8mm using sysctl

2024-06-25 Thread Brad Smith
ffmpeg | branch: release/7.0 | Brad Smith  | Sat Jun 22 
22:49:14 2024 -0400| [887e6f404da5f2b0270212164b03a85e223d0f44] | committer: 
Brad Smith

aarch64: Add OpenBSD runtime detection of dotprod and i8mm using sysctl

Signed-off-by: Brad Smith 
(cherry picked from commit 41190da9e11f536cb590df45ce9839974e5e6777)
Signed-off-by: Brad Smith 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=887e6f404da5f2b0270212164b03a85e223d0f44
---

 libavutil/aarch64/cpu.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
index 7a05391343..5ddc7ca888 100644
--- a/libavutil/aarch64/cpu.c
+++ b/libavutil/aarch64/cpu.c
@@ -64,6 +64,44 @@ static int detect_flags(void)
 return flags;
 }
 
+#elif defined(__OpenBSD__)
+#include 
+#include 
+#include 
+#include 
+
+static int detect_flags(void)
+{
+int flags = 0;
+
+#ifdef CPU_ID_AA64ISAR0
+int mib[2];
+uint64_t isar0;
+uint64_t isar1;
+size_t len;
+
+mib[0] = CTL_MACHDEP;
+mib[1] = CPU_ID_AA64ISAR0;
+len = sizeof(isar0);
+if (sysctl(mib, 2, &isar0, &len, NULL, 0) != -1) {
+if (ID_AA64ISAR0_DP(isar0) >= ID_AA64ISAR0_DP_IMPL)
+flags |= AV_CPU_FLAG_DOTPROD;
+}
+
+mib[0] = CTL_MACHDEP;
+mib[1] = CPU_ID_AA64ISAR1;
+len = sizeof(isar1);
+if (sysctl(mib, 2, &isar1, &len, NULL, 0) != -1) {
+#ifdef ID_AA64ISAR1_I8MM_IMPL
+if (ID_AA64ISAR1_I8MM(isar1) >= ID_AA64ISAR1_I8MM_IMPL)
+flags |= AV_CPU_FLAG_I8MM;
+#endif
+}
+#endif
+
+return flags;
+}
+
 #elif defined(_WIN32)
 #include 
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] aarch64: Add OpenBSD runtime detection of dotprod and i8mm using sysctl

2024-06-25 Thread Brad Smith
ffmpeg | branch: release/6.1 | Brad Smith  | Sat Jun 22 
22:49:14 2024 -0400| [07fac146530e1d10660e8e138360b0e3673aac9e] | committer: 
Brad Smith

aarch64: Add OpenBSD runtime detection of dotprod and i8mm using sysctl

Signed-off-by: Brad Smith 
(cherry picked from commit 41190da9e11f536cb590df45ce9839974e5e6777)
Signed-off-by: Brad Smith 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=07fac146530e1d10660e8e138360b0e3673aac9e
---

 libavutil/aarch64/cpu.c | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
index bd780e8591..d97d77fe60 100644
--- a/libavutil/aarch64/cpu.c
+++ b/libavutil/aarch64/cpu.c
@@ -82,6 +82,44 @@ static int detect_flags(void)
 return flags;
 }
 
+#elif defined(__OpenBSD__)
+#include 
+#include 
+#include 
+#include 
+
+static int detect_flags(void)
+{
+int flags = 0;
+
+#ifdef CPU_ID_AA64ISAR0
+int mib[2];
+uint64_t isar0;
+uint64_t isar1;
+size_t len;
+
+mib[0] = CTL_MACHDEP;
+mib[1] = CPU_ID_AA64ISAR0;
+len = sizeof(isar0);
+if (sysctl(mib, 2, &isar0, &len, NULL, 0) != -1) {
+if (ID_AA64ISAR0_DP(isar0) >= ID_AA64ISAR0_DP_IMPL)
+flags |= AV_CPU_FLAG_DOTPROD;
+}
+
+mib[0] = CTL_MACHDEP;
+mib[1] = CPU_ID_AA64ISAR1;
+len = sizeof(isar1);
+if (sysctl(mib, 2, &isar1, &len, NULL, 0) != -1) {
+#ifdef ID_AA64ISAR1_I8MM_IMPL
+if (ID_AA64ISAR1_I8MM(isar1) >= ID_AA64ISAR1_I8MM_IMPL)
+flags |= AV_CPU_FLAG_I8MM;
+#endif
+}
+#endif
+
+return flags;
+}
+
 #elif defined(_WIN32)
 #include 
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".