From: Zhao Zhili <zhiliz...@tencent.com> hevc_sao_band_8_8_c: 63.0 ( 1.00x) hevc_sao_band_8_8_simd128: 10.4 ( 6.06x) hevc_sao_band_16_8_c: 230.4 ( 1.00x) hevc_sao_band_16_8_simd128: 22.9 (10.07x) hevc_sao_band_32_8_c: 900.4 ( 1.00x) hevc_sao_band_32_8_simd128: 81.5 (11.05x) hevc_sao_band_48_8_c: 2009.1 ( 1.00x) hevc_sao_band_48_8_simd128: 170.2 (11.80x) hevc_sao_band_64_8_c: 3535.0 ( 1.00x) hevc_sao_band_64_8_simd128: 297.5 (11.88x)
Signed-off-by: Zhao Zhili <zhiliz...@tencent.com> --- libavcodec/wasm/hevc/Makefile | 3 +- libavcodec/wasm/hevc/dsp_init.c | 7 ++ libavcodec/wasm/hevc/sao.c | 113 ++++++++++++++++++++++++++++++++ libavcodec/wasm/hevc/sao.h | 41 ++++++++++++ 4 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 libavcodec/wasm/hevc/sao.c create mode 100644 libavcodec/wasm/hevc/sao.h diff --git a/libavcodec/wasm/hevc/Makefile b/libavcodec/wasm/hevc/Makefile index 132daa3106..7e8ab3776e 100644 --- a/libavcodec/wasm/hevc/Makefile +++ b/libavcodec/wasm/hevc/Makefile @@ -1,3 +1,4 @@ OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/dsp_init.o -SIMD128-OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/idct.o +SIMD128-OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/idct.o \ + wasm/hevc/sao.o diff --git a/libavcodec/wasm/hevc/dsp_init.c b/libavcodec/wasm/hevc/dsp_init.c index e5c8a2ebb6..76a1031ff4 100644 --- a/libavcodec/wasm/hevc/dsp_init.c +++ b/libavcodec/wasm/hevc/dsp_init.c @@ -21,6 +21,7 @@ #include "libavutil/cpu_internal.h" #include "libavcodec/hevc/dsp.h" #include "libavcodec/wasm/hevc/idct.h" +#include "libavcodec/wasm/hevc/sao.h" av_cold void ff_hevc_dsp_init_wasm(HEVCDSPContext *c, const int bit_depth) { @@ -35,6 +36,12 @@ av_cold void ff_hevc_dsp_init_wasm(HEVCDSPContext *c, const int bit_depth) c->idct[1] = ff_hevc_idct_8x8_8_simd128; c->idct[2] = ff_hevc_idct_16x16_8_simd128; c->idct[3] = ff_hevc_idct_32x32_8_simd128; + + c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_simd128; + c->sao_band_filter[1] = + c->sao_band_filter[2] = + c->sao_band_filter[3] = + c->sao_band_filter[4] = ff_hevc_sao_band_filter_16x16_8_simd128; } else if (bit_depth == 10) { c->idct[0] = ff_hevc_idct_4x4_10_simd128; c->idct[1] = ff_hevc_idct_8x8_10_simd128; diff --git a/libavcodec/wasm/hevc/sao.c b/libavcodec/wasm/hevc/sao.c new file mode 100644 index 0000000000..82134af7f3 --- /dev/null +++ b/libavcodec/wasm/hevc/sao.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2025 Zhao Zhili + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "sao.h" + +#include <wasm_simd128.h> + +void ff_hevc_sao_band_filter_8x8_8_simd128(uint8_t *dst, const uint8_t *src, + ptrdiff_t stride_dst, + ptrdiff_t stride_src, + const int16_t *sao_offset_val, + int sao_left_class, int width, + int height) +{ + int8_t offset_table[32] = {0}; + v128_t offset_low, offset_high; + + for (int k = 0; k < 4; k++) + offset_table[(k + sao_left_class) & 31] = (int8_t)sao_offset_val[k + 1]; + + offset_low = wasm_v128_load(offset_table); + offset_high = wasm_v128_load(&offset_table[16]); + + for (int y = height; y > 0; y -= 2) { + v128_t src_v, src_high; + v128_t v0, v1; + + src_v = wasm_v128_load64_zero(src); + src += stride_src; + src_v = wasm_v128_load64_lane(src, src_v, 1); + src += stride_src; + + v0 = wasm_u8x16_shr(src_v, 3); + v1 = wasm_i8x16_sub(v0, wasm_i8x16_const_splat(16)); + v0 = wasm_i8x16_swizzle(offset_low, v0); + v1 = wasm_i8x16_swizzle(offset_high, v1); + v0 = wasm_v128_or(v0, v1); + src_high = wasm_u16x8_extend_high_u8x16(src_v); + v1 = wasm_i16x8_extend_high_i8x16(v0); + src_v = wasm_u16x8_extend_low_u8x16(src_v); + v0 = wasm_i16x8_extend_low_i8x16(v0); + + v0 = wasm_i16x8_add_sat(src_v, v0); + v1 = wasm_i16x8_add_sat(src_high, v1); + v0 = wasm_u8x16_narrow_i16x8(v0, v1); + + wasm_v128_store64_lane(dst, v0, 0); + dst += stride_dst; + wasm_v128_store64_lane(dst, v0, 1); + dst += stride_dst; + } +} + +void ff_hevc_sao_band_filter_16x16_8_simd128(uint8_t *dst, const uint8_t *src, + ptrdiff_t stride_dst, + ptrdiff_t stride_src, + const int16_t *sao_offset_val, + int sao_left_class, int width, + int height) +{ + int8_t offset_table[32] = {0}; + v128_t offset_low, offset_high; + + for (int k = 0; k < 4; k++) + offset_table[(k + sao_left_class) & 31] = (int8_t)sao_offset_val[k + 1]; + + offset_low = wasm_v128_load(offset_table); + offset_high = wasm_v128_load(&offset_table[16]); + + for (int y = height; y > 0; y--) { + for (int x = 0; x < width; x += 16) { + v128_t src_v, src_high; + v128_t v0, v1; + + src_v = wasm_v128_load(&src[x]); + + v0 = wasm_u8x16_shr(src_v, 3); + v1 = wasm_i8x16_sub(v0, wasm_i8x16_const_splat(16)); + v0 = wasm_i8x16_swizzle(offset_low, v0); + v1 = wasm_i8x16_swizzle(offset_high, v1); + v0 = wasm_v128_or(v0, v1); + src_high = wasm_u16x8_extend_high_u8x16(src_v); + v1 = wasm_i16x8_extend_high_i8x16(v0); + src_v = wasm_u16x8_extend_low_u8x16(src_v); + v0 = wasm_i16x8_extend_low_i8x16(v0); + + v0 = wasm_i16x8_add_sat(src_v, v0); + v1 = wasm_i16x8_add_sat(src_high, v1); + v0 = wasm_u8x16_narrow_i16x8(v0, v1); + wasm_v128_store(&dst[x], v0); + } + + dst += stride_dst; + src += stride_src; + } +} diff --git a/libavcodec/wasm/hevc/sao.h b/libavcodec/wasm/hevc/sao.h new file mode 100644 index 0000000000..6119ec90f1 --- /dev/null +++ b/libavcodec/wasm/hevc/sao.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2025 Zhao Zhili + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_WASM_HEVC_SAO_H +#define AVCODEC_WASM_HEVC_SAO_H + +#include <stddef.h> +#include <stdint.h> + +void ff_hevc_sao_band_filter_8x8_8_simd128(uint8_t *_dst, const uint8_t *_src, + ptrdiff_t _stride_dst, + ptrdiff_t _stride_src, + const int16_t *sao_offset_val, + int sao_left_class, int width, + int height); + +void ff_hevc_sao_band_filter_16x16_8_simd128(uint8_t *_dst, const uint8_t *_src, + ptrdiff_t _stride_dst, + ptrdiff_t _stride_src, + const int16_t *sao_offset_val, + int sao_left_class, int width, + int height); + +#endif \ No newline at end of file -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".