On Thu, 7 Oct 2021, J. Dekker wrote:

--bench on AWS Graviton:

hevc_sao_edge_16x16_8_c: 1857.0
hevc_sao_edge_16x16_8_neon: 211.0
hevc_sao_edge_32x32_8_c: 7802.2
hevc_sao_edge_32x32_8_neon: 808.2
hevc_sao_edge_48x48_8_c: 16764.2
hevc_sao_edge_48x48_8_neon: 1796.5
hevc_sao_edge_64x64_8_c: 32647.5
hevc_sao_edge_64x64_8_neon: 3118.5

Signed-off-by: J. Dekker <j...@itanimul.li>
---
libavcodec/aarch64/hevcdsp_init_aarch64.c |  8 ++-
libavcodec/aarch64/hevcdsp_sao_neon.S     | 66 +++++++++++++++++++++++
2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c 
b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index c785e46f79..747ff0412d 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -57,8 +57,8 @@ void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, 
uint8_t *_src,
                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
                                  int16_t *sao_offset_val, int sao_left_class,
                                  int width, int height);
-
-
+void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, uint8_t *src, 
ptrdiff_t stride_dst,
+                                          int16_t *sao_offset_val, int eo, int 
width, int height);

av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
{
@@ -76,6 +76,10 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, 
const int bit_depth)
        c->idct_dc[2]                  = ff_hevc_idct_16x16_dc_8_neon;
        c->idct_dc[3]                  = ff_hevc_idct_32x32_dc_8_neon;
        c->sao_band_filter[0]          = ff_hevc_sao_band_filter_8x8_8_neon;
+        c->sao_edge_filter[1]          =
+        c->sao_edge_filter[2]          =
+        c->sao_edge_filter[3]          =
+        c->sao_edge_filter[4]          = ff_hevc_sao_edge_filter_16x16_8_neon;
    }
    if (bit_depth == 10) {
        c->add_residual[0]             = ff_hevc_add_residual_4x4_10_neon;
diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S 
b/libavcodec/aarch64/hevcdsp_sao_neon.S
index f9fed8345b..a7f054c075 100644
--- a/libavcodec/aarch64/hevcdsp_sao_neon.S
+++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
@@ -85,3 +85,69 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
        bne             1b
        ret
endfunc
+
+// ASSUMES STRIDE_SRC = 192
+.Lsao_edge_pos:
+.word 1 // horizontal
+.word 192 // vertical
+.word 192 + 1 // 45 degree
+.word 192 - 1 // 135 degree
+
+// ff_hevc_sao_edge_filter_16x16_8_neon(char *dst, char *src, ptrdiff 
stride_dst,
+//                                      int16 *sao_offset_val, int eo, int 
width, int height)
+function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
+       lsl             w4, w4, #2

Actually, the left indentation here is one char too little, compared with the existing function here in the same file, and compared with other asm sources. So instead of reindenting the old one, please indent the new one according to all other existing asm instead.

// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to