From: Shaun Loo <shaunlo...@gmail.com>

This is a part of Google Summer of Code 2023

Co-authored-by: Nuo Mi <nuomi2...@gmail.com>
---
 libavcodec/x86/h26x/h2656_sao.asm |   8 +--
 libavcodec/x86/vvc/Makefile       |   2 +
 libavcodec/x86/vvc/dsp_init.c     |  41 +++++++++++
 libavcodec/x86/vvc/sao.asm        |  73 +++++++++++++++++++
 libavcodec/x86/vvc/sao_10bit.asm  | 113 ++++++++++++++++++++++++++++++
 5 files changed, 233 insertions(+), 4 deletions(-)
 create mode 100644 libavcodec/x86/vvc/sao.asm
 create mode 100644 libavcodec/x86/vvc/sao_10bit.asm

diff --git a/libavcodec/x86/h26x/h2656_sao.asm 
b/libavcodec/x86/h26x/h2656_sao.asm
index 504fcb388b..a80ee26178 100644
--- a/libavcodec/x86/h26x/h2656_sao.asm
+++ b/libavcodec/x86/h26x/h2656_sao.asm
@@ -147,7 +147,7 @@ align 16
 %assign i i+mmsize
 %endrep
 
-%if %2 == 48
+%if %2 == 48 || %2 == 80 || %2 == 112
 INIT_XMM cpuname
 
     mova             m13, [srcq + i]
@@ -160,7 +160,7 @@ INIT_XMM cpuname
 %if cpuflag(avx2)
 INIT_YMM cpuname
 %endif
-%endif ; %2 == 48
+%endif ; %2 == 48 || %2 == 80 || %2 == 112
 
     add             dstq, dststrideq             ; dst += dststride
     add             srcq, srcstrideq             ; src += srcstride
@@ -280,7 +280,7 @@ align 16
 %assign i i+mmsize
 %endrep
 
-%if %2 == 48
+%if %2 == 48 || %2 == 80 || %2 == 112
 INIT_XMM cpuname
 
     mova              m1, [srcq + i]
@@ -291,7 +291,7 @@ INIT_XMM cpuname
 %if cpuflag(avx2)
 INIT_YMM cpuname
 %endif
-%endif
+%endif ; %2 == 48 || %2 == 80 || %2 == 112
 
     add             dstq, dststrideq
     add             srcq, EDGE_SRCSTRIDE
diff --git a/libavcodec/x86/vvc/Makefile b/libavcodec/x86/vvc/Makefile
index 86a6c8ba7c..c426b156c1 100644
--- a/libavcodec/x86/vvc/Makefile
+++ b/libavcodec/x86/vvc/Makefile
@@ -8,4 +8,6 @@ X86ASM-OBJS-$(CONFIG_VVC_DECODER)      += x86/vvc/alf.o         
    \
                                           x86/vvc/mc.o              \
                                           x86/vvc/of.o              \
                                           x86/vvc/sad.o             \
+                                          x86/vvc/sao.o             \
+                                          x86/vvc/sao_10bit.o       \
                                           x86/h26x/h2656_inter.o
diff --git a/libavcodec/x86/vvc/dsp_init.c b/libavcodec/x86/vvc/dsp_init.c
index bb68ba0b1e..cbcfa40a66 100644
--- a/libavcodec/x86/vvc/dsp_init.c
+++ b/libavcodec/x86/vvc/dsp_init.c
@@ -215,6 +215,44 @@ ALF_FUNCS(16, 12, avx2)
 
 #endif
 
+#define SAO_FILTER_FUNC(wd, bitd, opt)                                         
                                                      \
+void ff_vvc_sao_band_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t 
*_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,  \
+    const int16_t *sao_offset_val, int sao_left_class, int width, int height); 
                                                      \
+void ff_vvc_sao_edge_filter_##wd##_##bitd##_##opt(uint8_t *_dst, const uint8_t 
*_src, ptrdiff_t stride_dst,                          \
+        const int16_t *sao_offset_val, int eo, int width, int height);         
                                                      \
+
+#define SAO_FILTER_FUNCS(bitd, opt)     \
+    SAO_FILTER_FUNC(8,   bitd, opt)     \
+    SAO_FILTER_FUNC(16,  bitd, opt)     \
+    SAO_FILTER_FUNC(32,  bitd, opt)     \
+    SAO_FILTER_FUNC(48,  bitd, opt)     \
+    SAO_FILTER_FUNC(64,  bitd, opt)     \
+    SAO_FILTER_FUNC(80,  bitd, opt)     \
+    SAO_FILTER_FUNC(96,  bitd, opt)     \
+    SAO_FILTER_FUNC(112, bitd, opt)     \
+    SAO_FILTER_FUNC(128, bitd, opt)     \
+
+SAO_FILTER_FUNCS(8,  avx2)
+SAO_FILTER_FUNCS(10, avx2)
+SAO_FILTER_FUNCS(12, avx2)
+
+#define SAO_FILTER_INIT(type, bitd, opt) do {                                  
 \
+    c->sao.type##_filter[0] = ff_vvc_sao_##type##_filter_8_##bitd##_##opt;    \
+    c->sao.type##_filter[1] = ff_vvc_sao_##type##_filter_16_##bitd##_##opt;   \
+    c->sao.type##_filter[2] = ff_vvc_sao_##type##_filter_32_##bitd##_##opt;   \
+    c->sao.type##_filter[3] = ff_vvc_sao_##type##_filter_48_##bitd##_##opt;   \
+    c->sao.type##_filter[4] = ff_vvc_sao_##type##_filter_64_##bitd##_##opt;   \
+    c->sao.type##_filter[5] = ff_vvc_sao_##type##_filter_80_##bitd##_##opt;   \
+    c->sao.type##_filter[6] = ff_vvc_sao_##type##_filter_96_##bitd##_##opt;   \
+    c->sao.type##_filter[7] = ff_vvc_sao_##type##_filter_112_##bitd##_##opt;  \
+    c->sao.type##_filter[8] = ff_vvc_sao_##type##_filter_128_##bitd##_##opt;  \
+} while (0)
+
+#define SAO_INIT(bitd, opt) do {                                     \
+    SAO_FILTER_INIT(band, bitd, opt);                                \
+    SAO_FILTER_INIT(edge, bitd, opt);                                \
+} while (0)
+
 #define AVG_INIT(bd, opt) do {                                       \
     c->inter.avg    = bf(vvc_avg, bd, opt);                          \
     c->inter.w_avg  = bf(vvc_w_avg, bd, opt);                        \
@@ -329,6 +367,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int 
bd)
 
             // filter
             ALF_INIT(8);
+            SAO_INIT(8, avx2);
         }
 #endif
         break;
@@ -350,6 +389,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int 
bd)
 
             // filter
             ALF_INIT(10);
+            SAO_INIT(10, avx2);
         }
 #endif
         break;
@@ -371,6 +411,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int 
bd)
 
             // filter
             ALF_INIT(12);
+            SAO_INIT(12, avx2);
         }
 #endif
         break;
diff --git a/libavcodec/x86/vvc/sao.asm b/libavcodec/x86/vvc/sao.asm
new file mode 100644
index 0000000000..5f7d7e5358
--- /dev/null
+++ b/libavcodec/x86/vvc/sao.asm
@@ -0,0 +1,73 @@
+;******************************************************************************
+;* SIMD optimized SAO functions for VVC 8bit decoding
+;*
+;* Copyright (c) 2024 Shaun Loo
+;* Copyright (c) 2024 Nuo Mi
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%define MAX_PB_SIZE  128
+%include "libavcodec/x86/h26x/h2656_sao.asm"
+
+%macro VVC_SAO_BAND_FILTER 2
+    H2656_SAO_BAND_FILTER vvc, %1, %2
+%endmacro
+
+%macro VVC_SAO_BAND_FILTER_FUNCS 0
+VVC_SAO_BAND_FILTER   8, 0
+VVC_SAO_BAND_FILTER  16, 1
+VVC_SAO_BAND_FILTER  32, 2
+VVC_SAO_BAND_FILTER  48, 2
+VVC_SAO_BAND_FILTER  64, 4
+VVC_SAO_BAND_FILTER  80, 4
+VVC_SAO_BAND_FILTER  96, 6
+VVC_SAO_BAND_FILTER 112, 6
+VVC_SAO_BAND_FILTER 128, 8
+%endmacro
+
+%if HAVE_AVX2_EXTERNAL
+INIT_XMM avx2
+VVC_SAO_BAND_FILTER   8, 0
+VVC_SAO_BAND_FILTER  16, 1
+INIT_YMM avx2
+VVC_SAO_BAND_FILTER  32, 1
+VVC_SAO_BAND_FILTER  48, 1
+VVC_SAO_BAND_FILTER  64, 2
+VVC_SAO_BAND_FILTER  80, 2
+VVC_SAO_BAND_FILTER  96, 3
+VVC_SAO_BAND_FILTER 112, 3
+VVC_SAO_BAND_FILTER 128, 4
+%endif
+
+%macro VVC_SAO_EDGE_FILTER 2-3
+    H2656_SAO_EDGE_FILTER vvc, %{1:-1}
+%endmacro
+
+%if HAVE_AVX2_EXTERNAL
+INIT_XMM avx2
+VVC_SAO_EDGE_FILTER  8, 0
+VVC_SAO_EDGE_FILTER 16, 1, a
+INIT_YMM avx2
+VVC_SAO_EDGE_FILTER  32, 1, a
+VVC_SAO_EDGE_FILTER  48, 1, u
+VVC_SAO_EDGE_FILTER  64, 2, a
+VVC_SAO_EDGE_FILTER  80, 2, u
+VVC_SAO_EDGE_FILTER  96, 3, a
+VVC_SAO_EDGE_FILTER 112, 3, u
+VVC_SAO_EDGE_FILTER 128, 4, a
+%endif
diff --git a/libavcodec/x86/vvc/sao_10bit.asm b/libavcodec/x86/vvc/sao_10bit.asm
new file mode 100644
index 0000000000..b7d3d08008
--- /dev/null
+++ b/libavcodec/x86/vvc/sao_10bit.asm
@@ -0,0 +1,113 @@
+;******************************************************************************
+;* SIMD optimized SAO functions for VVC 10/12bit decoding
+;*
+;* Copyright (c) 2024 Shaun Loo
+;* Copyright (c) 2024 Nuo Mi
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%define MAX_PB_SIZE  128
+%include "libavcodec/x86/h26x/h2656_sao_10bit.asm"
+
+%macro VVC_SAO_BAND_FILTER 3
+    H2656_SAO_BAND_FILTER vvc, %1, %2, %3
+%endmacro
+
+%macro VVC_SAO_BAND_FILTER_FUNCS 1
+    VVC_SAO_BAND_FILTER %1,   8,  1
+    VVC_SAO_BAND_FILTER %1,  16,  2
+    VVC_SAO_BAND_FILTER %1,  32,  4
+    VVC_SAO_BAND_FILTER %1,  48,  6
+    VVC_SAO_BAND_FILTER %1,  64,  8
+    VVC_SAO_BAND_FILTER %1,  80, 10
+    VVC_SAO_BAND_FILTER %1,  96, 12
+    VVC_SAO_BAND_FILTER %1, 112, 14
+    VVC_SAO_BAND_FILTER %1, 128, 16
+%endmacro
+
+%macro VVC_SAO_BAND_FILTER_FUNCS 0
+    VVC_SAO_BAND_FILTER_FUNCS 10
+    VVC_SAO_BAND_FILTER_FUNCS 12
+%endmacro
+
+INIT_XMM sse2
+VVC_SAO_BAND_FILTER_FUNCS
+INIT_XMM avx
+VVC_SAO_BAND_FILTER_FUNCS
+
+%if HAVE_AVX2_EXTERNAL
+
+%macro VVC_SAO_BAND_FILTER_FUNCS_AVX2 1
+    INIT_XMM avx2
+    VVC_SAO_BAND_FILTER %1,   8, 1
+    INIT_YMM avx2
+    VVC_SAO_BAND_FILTER %1,  16, 1
+    VVC_SAO_BAND_FILTER %1,  32, 2
+    VVC_SAO_BAND_FILTER %1,  48, 3
+    VVC_SAO_BAND_FILTER %1,  64, 4
+    VVC_SAO_BAND_FILTER %1,  80, 5
+    VVC_SAO_BAND_FILTER %1,  96, 6
+    VVC_SAO_BAND_FILTER %1, 112, 7
+    VVC_SAO_BAND_FILTER %1, 128, 8
+%endmacro
+
+VVC_SAO_BAND_FILTER_FUNCS_AVX2 10
+VVC_SAO_BAND_FILTER_FUNCS_AVX2 12
+
+%endif ; HAVE_AVX2_EXTERNAL
+
+%macro VVC_SAO_EDGE_FILTER 3
+    H2656_SAO_EDGE_FILTER vvc, %1, %2, %3
+%endmacro
+
+%macro VVC_SAO_EDGE_FILTER_FUNCS 1
+    VVC_SAO_EDGE_FILTER %1,   8,  1
+    VVC_SAO_EDGE_FILTER %1,  16,  2
+    VVC_SAO_EDGE_FILTER %1,  32,  4
+    VVC_SAO_EDGE_FILTER %1,  48,  6
+    VVC_SAO_EDGE_FILTER %1,  64,  8
+    VVC_SAO_EDGE_FILTER %1,  80, 10
+    VVC_SAO_EDGE_FILTER %1,  96, 12
+    VVC_SAO_EDGE_FILTER %1, 112, 14
+    VVC_SAO_EDGE_FILTER %1, 128, 16
+%endmacro
+
+INIT_XMM sse2
+VVC_SAO_EDGE_FILTER_FUNCS 10
+VVC_SAO_EDGE_FILTER_FUNCS 12
+
+%if HAVE_AVX2_EXTERNAL
+
+%macro VVC_SAO_EDGE_FILTER_FUNCS_AVX2 1
+    INIT_XMM avx2
+    VVC_SAO_EDGE_FILTER %1,   8, 1
+    INIT_YMM avx2
+    VVC_SAO_EDGE_FILTER %1,  16, 1
+    VVC_SAO_EDGE_FILTER %1,  32, 2
+    VVC_SAO_EDGE_FILTER %1,  48, 3
+    VVC_SAO_EDGE_FILTER %1,  64, 4
+    VVC_SAO_EDGE_FILTER %1,  80, 5
+    VVC_SAO_EDGE_FILTER %1,  96, 6
+    VVC_SAO_EDGE_FILTER %1, 112, 7
+    VVC_SAO_EDGE_FILTER %1, 128, 8
+%endmacro
+
+VVC_SAO_EDGE_FILTER_FUNCS_AVX2 10
+VVC_SAO_EDGE_FILTER_FUNCS_AVX2 12
+
+%endif ; HAVE_AVX2_EXTERNAL
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to