From: Zhao Zhili <zhiliz...@tencent.com> --- libavcodec/aarch64/h26x/dsp.h | 6 +++- libavcodec/aarch64/h26x/sao_neon.S | 44 +++++++++++++++++------ libavcodec/aarch64/hevcdsp_init_aarch64.c | 2 +- libavcodec/aarch64/vvc/Makefile | 5 +-- libavcodec/aarch64/vvc/dsp_init.c | 6 ++++ 5 files changed, 48 insertions(+), 15 deletions(-)
diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h index 4dcaf0e6bb..d3f7a4dfe3 100644 --- a/libavcodec/aarch64/h26x/dsp.h +++ b/libavcodec/aarch64/h26x/dsp.h @@ -24,7 +24,7 @@ #include <stddef.h> #include <stdint.h> -void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src, +void ff_h26x_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height); @@ -33,4 +33,8 @@ void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrd void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height); +void ff_vvc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, + const int16_t *sao_offset_val, int eo, int width, int height); +void ff_vvc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, + const int16_t *sao_offset_val, int eo, int width, int height); #endif diff --git a/libavcodec/aarch64/h26x/sao_neon.S b/libavcodec/aarch64/h26x/sao_neon.S index dc407484de..c43820135e 100644 --- a/libavcodec/aarch64/h26x/sao_neon.S +++ b/libavcodec/aarch64/h26x/sao_neon.S @@ -24,15 +24,17 @@ #include "libavutil/aarch64/asm.S" -#define MAX_PB_SIZE 64 +#define HEVC_MAX_PB_SIZE 64 +#define VVC_MAX_PB_SIZE 128 #define AV_INPUT_BUFFER_PADDING_SIZE 64 -#define SAO_STRIDE (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) +#define HEVC_SAO_STRIDE (2 * HEVC_MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) +#define VVC_SAO_STRIDE (2 * VVC_MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) // void sao_band_filter(uint8_t *_dst, uint8_t *_src, // ptrdiff_t stride_dst, ptrdiff_t stride_src, // int16_t *sao_offset_val, int sao_left_class, // int width, int height) -function ff_hevc_sao_band_filter_8x8_8_neon, export=1 +function ff_h26x_sao_band_filter_8x8_8_neon, export=1 stp xzr, xzr, [sp, #-64]! stp xzr, xzr, [sp, #16] stp xzr, xzr, [sp, #32] @@ -79,16 +81,30 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1 ret endfunc -.Lsao_edge_pos: +.Lhevc_sao_edge_pos: .word 1 // horizontal -.word SAO_STRIDE // vertical -.word SAO_STRIDE + 1 // 45 degree -.word SAO_STRIDE - 1 // 135 degree +.word HEVC_SAO_STRIDE // vertical +.word HEVC_SAO_STRIDE + 1 // 45 degree +.word HEVC_SAO_STRIDE - 1 // 135 degree + +.Lvvc_sao_edge_pos: +.word 1 // horizontal +.word VVC_SAO_STRIDE // vertical +.word VVC_SAO_STRIDE + 1 // 45 degree +.word VVC_SAO_STRIDE - 1 // 135 degree + +function ff_vvc_sao_edge_filter_16x16_8_neon, export=1 + adr x7, .Lvvc_sao_edge_pos + mov x15, #VVC_SAO_STRIDE + b 1f +endfunc // ff_hevc_sao_edge_filter_16x16_8_neon(char *dst, char *src, ptrdiff stride_dst, // int16 *sao_offset_val, int eo, int width, int height) function ff_hevc_sao_edge_filter_16x16_8_neon, export=1 - adr x7, .Lsao_edge_pos + adr x7, .Lhevc_sao_edge_pos + mov x15, #HEVC_SAO_STRIDE +1: ld1 {v3.8h}, [x3] // load sao_offset_val add w5, w5, #0xF bic w5, w5, #0xF @@ -101,7 +117,6 @@ function ff_hevc_sao_edge_filter_16x16_8_neon, export=1 uzp2 v1.16b, v3.16b, v3.16b // sao_offset_val -> upper uzp1 v0.16b, v3.16b, v3.16b // sao_offset_val -> lower movi v2.16b, #2 - mov x15, #SAO_STRIDE // strides between end of line and next src/dst sub x15, x15, x5 // stride_src - width sub x16, x2, x5 // stride_dst - width @@ -145,10 +160,18 @@ function ff_hevc_sao_edge_filter_16x16_8_neon, export=1 ret endfunc +function ff_vvc_sao_edge_filter_8x8_8_neon, export=1 + adr x7, .Lvvc_sao_edge_pos + mov x15, #VVC_SAO_STRIDE + b 1f +endfunc + // ff_hevc_sao_edge_filter_8x8_8_neon(char *dst, char *src, ptrdiff stride_dst, // int16 *sao_offset_val, int eo, int width, int height) function ff_hevc_sao_edge_filter_8x8_8_neon, export=1 - adr x7, .Lsao_edge_pos + adr x7, .Lhevc_sao_edge_pos + mov x15, #HEVC_SAO_STRIDE +1: ldr w4, [x7, w4, uxtw #2] ld1 {v3.8h}, [x3] mov v3.h[7], v3.h[0] @@ -160,7 +183,6 @@ function ff_hevc_sao_edge_filter_8x8_8_neon, export=1 movi v2.16b, #2 add x16, x0, x2 lsl x2, x2, #1 - mov x15, #SAO_STRIDE mov x8, x1 sub x9, x1, x4 add x10, x1, x4 diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c index 7efae0f740..a90da0246e 100644 --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c @@ -384,7 +384,7 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth) c->sao_band_filter[1] = c->sao_band_filter[2] = c->sao_band_filter[3] = - c->sao_band_filter[4] = ff_hevc_sao_band_filter_8x8_8_neon; + c->sao_band_filter[4] = ff_h26x_sao_band_filter_8x8_8_neon; c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8x8_8_neon; c->sao_edge_filter[1] = c->sao_edge_filter[2] = diff --git a/libavcodec/aarch64/vvc/Makefile b/libavcodec/aarch64/vvc/Makefile index 58398d6e3d..54c49fea92 100644 --- a/libavcodec/aarch64/vvc/Makefile +++ b/libavcodec/aarch64/vvc/Makefile @@ -1,5 +1,6 @@ clean:: $(RM) $(CLEANSUFFIXES:%=libavcodec/aarch64/vvc/%) -OBJS-$(CONFIG_VVC_DECODER) += aarch64/vvc/dsp_init.o -NEON-OBJS-$(CONFIG_VVC_DECODER) += aarch64/vvc/alf.o +OBJS-$(CONFIG_VVC_DECODER) += aarch64/vvc/dsp_init.o +NEON-OBJS-$(CONFIG_VVC_DECODER) += aarch64/vvc/alf.o \ + aarch64/h26x/sao_neon.o diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c index 2a9f25911f..0aac140a8f 100644 --- a/libavcodec/aarch64/vvc/dsp_init.c +++ b/libavcodec/aarch64/vvc/dsp_init.c @@ -22,6 +22,7 @@ #include "libavutil/cpu.h" #include "libavutil/aarch64/cpu.h" +#include "libavcodec/aarch64/h26x/dsp.h" #include "libavcodec/vvc/dsp.h" #include "libavcodec/vvc/dec.h" #include "libavcodec/vvc/ctu.h" @@ -45,6 +46,11 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd) return; if (bd == 8) { + for (int i = 0; i < FF_ARRAY_ELEMS(c->sao.band_filter); i++) + c->sao.band_filter[i] = ff_h26x_sao_band_filter_8x8_8_neon; + c->sao.edge_filter[0] = ff_vvc_sao_edge_filter_8x8_8_neon; + for (int i = 1; i < FF_ARRAY_ELEMS(c->sao.edge_filter); i++) + c->sao.edge_filter[i] = ff_vvc_sao_edge_filter_16x16_8_neon; c->alf.filter[LUMA] = alf_filter_luma_8_neon; c->alf.filter[CHROMA] = alf_filter_chroma_8_neon; } else if (bd == 10) { -- 2.42.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".