Re: [FFmpeg-devel] [PATCH] avcodec/hevc: reduce memory used by the SAO

Michael Niedermayer Mon, 02 Feb 2015 07:04:09 -0800

On Mon, Feb 02, 2015 at 03:31:54PM +0100, Michael Niedermayer wrote:
> On Mon, Feb 02, 2015 at 02:22:36PM +0100, Christophe Gisquet wrote:
> > Hi,
> > 
> > 2015-02-02 13:32 GMT+01:00 Michael Niedermayer <michae...@gmx.at>:
> > > On Mon, Feb 02, 2015 at 07:41:54AM +0100, Christophe Gisquet wrote:
> > > hmm, is there a reason not to take the original commit unchanged ?
> > > I was hoping to reduce the difference to openhevc so that we also
> > > are able to merge future changes from openhevc with few confilcts
> > > but maybe iam missing something
> > 
> > Because there are alignment requirements in our dsp, and technically,
> > adding another buffer (which isn't aligned) while there are already
> > perfectly good ones is not the best solution (memory-wise and
> > bookkeeping-wise).
> > 
> 
> > I'd go as far as suggest openhevc to align to this version. But maybe
> > they have diverged too much.
> 
> iam happy with either openhevc picking this version or us picking
> the one from openhevc with minimal changes needed to make it work for
> us..


> Though in the first case we should pick the openhevc version and
> comit our changes in a seperate commit on top so openhevc can
> more easily pick the changes we made if they want to.

For reference attached the difference between the 2 implementations
that is diff between this version and the one cherry picked from
openhevc (actually cherry picked yesterday and rebased to HEAD)

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

No snowflake in an avalanche ever feels responsible. -- Voltaire

From d6c12c28f8a03edebab03f18631fa30ade01d0da Mon Sep 17 00:00:00 2001
From: Christophe Gisquet <christophe.gisq...@gmail.com>
Date: Mon, 2 Feb 2015 15:51:45 +0100
Subject: [PATCH 2/2] Changes on top of openhevc

Signed-off-by: Michael Niedermayer <michae...@gmx.at>
---
 libavcodec/hevc.c        |   65 +++++++---------------------------------------
 libavcodec/hevc.h        |   11 +-------
 libavcodec/hevc_filter.c |   56 ++++++---------------------------------
 3 files changed, 18 insertions(+), 114 deletions(-)

diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index 1b526a0..7699297 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -104,8 +104,7 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
 
     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
     s->tab_ipm  = av_mallocz(min_pu_size);
-    s->is_pcm   = av_mallocz_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
-
+    s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
         goto fail;
 
@@ -281,24 +280,6 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
     return 0;
 }
 
-static int get_buffer_sao(HEVCContext *s, AVFrame *frame, const HEVCSPS *sps)
-{
-    int ret, i;
-
-    frame->width  = FFALIGN(s->avctx->coded_width + 2, FF_INPUT_BUFFER_PADDING_SIZE);
-    frame->height = s->avctx->coded_height + 3;
-    if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
-        return ret;
-    for (i = 0; frame->data[i]; i++) {
-        int offset = frame->linesize[i] + FF_INPUT_BUFFER_PADDING_SIZE;
-        frame->data[i] += offset;
-    }
-    frame->width  = s->avctx->coded_width;
-    frame->height = s->avctx->coded_height;
-
-    return 0;
-}
-
 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 {
     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL)
@@ -354,34 +335,19 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps)
     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 
     if (sps->sao_enabled && !s->avctx->hwaccel) {
-#ifdef USE_SAO_SMALL_BUFFER
-        {
-            int ctb_size = 1 << sps->log2_ctb_size;
-            int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
-            int c_idx, i;
-
-            for (i = 0; i < s->threads_number ; i++) {
-                HEVCLocalContext    *lc = s->HEVClcList[i];
-                lc->sao_pixel_buffer =
-                    av_malloc(((ctb_size + 2) * (ctb_size + 2)) <<
-                              sps->pixel_shift);
-            }
-            for(c_idx = 0; c_idx < c_count; c_idx++) {
-                int w = sps->width >> sps->hshift[c_idx];
-                int h = sps->height >> sps->vshift[c_idx];
-                s->sao_pixel_buffer_h[c_idx] =
+        int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
+        int c_idx;
+
+        for(c_idx = 0; c_idx < c_count; c_idx++) {
+            int w = sps->width >> sps->hshift[c_idx];
+            int h = sps->height >> sps->vshift[c_idx];
+            s->sao_pixel_buffer_h[c_idx] =
                 av_malloc((w * 2 * sps->ctb_height) <<
                           sps->pixel_shift);
-                s->sao_pixel_buffer_v[c_idx] =
+            s->sao_pixel_buffer_v[c_idx] =
                 av_malloc((h * 2 * sps->ctb_width) <<
                           sps->pixel_shift);
-            }
         }
-#else
-        av_frame_unref(s->tmp_frame);
-        ret = get_buffer_sao(s, s->tmp_frame, sps);
-        s->sao_frame = s->tmp_frame;
-#endif
     }
 
     s->sps = sps;
@@ -3201,17 +3167,10 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
 
     av_freep(&s->cabac_state);
 
-#ifdef USE_SAO_SMALL_BUFFER
-    for (i = 0; i < s->threads_number; i++) {
-        av_freep(&s->HEVClcList[i]->sao_pixel_buffer);
-    }
     for (i = 0; i < 3; i++) {
         av_freep(&s->sao_pixel_buffer_h[i]);
         av_freep(&s->sao_pixel_buffer_v[i]);
     }
-#else
-    av_frame_free(&s->tmp_frame);
-#endif
     av_frame_free(&s->output_frame);
 
     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
@@ -3271,12 +3230,6 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
     if (!s->cabac_state)
         goto fail;
 
-#ifndef USE_SAO_SMALL_BUFFER
-    s->tmp_frame = av_frame_alloc();
-    if (!s->tmp_frame)
-        goto fail;
-#endif
-
     s->output_frame = av_frame_alloc();
     if (!s->output_frame)
         goto fail;
diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
index 482341a..ae9a32a 100644
--- a/libavcodec/hevc.h
+++ b/libavcodec/hevc.h
@@ -36,8 +36,6 @@
 #include "thread.h"
 #include "videodsp.h"
 
-//#define USE_SAO_SMALL_BUFFER /* reduce the memory used by SAO */
-
 #define MAX_DPB_SIZE 16 // A.4.1
 #define MAX_REFS 16
 
@@ -747,9 +745,6 @@ typedef struct HEVCNAL {
 } HEVCNAL;
 
 typedef struct HEVCLocalContext {
-#ifdef USE_SAO_SMALL_BUFFER
-    uint8_t *sao_pixel_buffer;
-#endif
     uint8_t cabac_state[HEVC_CONTEXTS];
 
     uint8_t stat_coeff[4];
@@ -774,6 +769,7 @@ typedef struct HEVCLocalContext {
     int     end_of_tiles_y;
     /* +7 is for subpixel interpolation, *2 for high bit depths */
     DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
+    /* The extended size between the new edge emu buffer is abused by SAO */
     DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer2)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
     DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
 
@@ -813,13 +809,8 @@ typedef struct HEVCContext {
 
     AVFrame *frame;
     AVFrame *output_frame;
-#ifdef USE_SAO_SMALL_BUFFER
     uint8_t *sao_pixel_buffer_h[3];
     uint8_t *sao_pixel_buffer_v[3];
-#else
-    AVFrame *tmp_frame;
-    AVFrame *sao_frame;
-#endif
 
     const HEVCVPS *vps;
     const HEVCSPS *sps;
diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c
index 890e2bb..92b431b 100644
--- a/libavcodec/hevc_filter.c
+++ b/libavcodec/hevc_filter.c
@@ -139,7 +139,7 @@ static int get_qPy(HEVCContext *s, int xC, int yC)
     return s->qp_y_tab[x + y * s->sps->min_cb_width];
 }
 
-static void copy_CTB(uint8_t *dst, const uint8_t *src, int width, int height,
+static void copy_CTB(uint8_t *dst, uint8_t *src, int width, int height,
                      intptr_t stride_dst, intptr_t stride_src)
 {
 int i, j;
@@ -161,14 +161,12 @@ int i, j;
     }
 }
 
-#if defined(USE_SAO_SMALL_BUFFER)
 static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift)
 {
     if (pixel_shift)
         *(uint16_t *)dst = *(uint16_t *)src;
     else
         *dst = *src;
-
 }
 
 static void copy_vert(uint8_t *dst, const uint8_t *src,
@@ -210,7 +208,6 @@ static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src,
 
     copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src);
 }
-#endif
 
 static void restore_tqb_pixels(HEVCContext *s,
                                uint8_t *src1, const uint8_t *dst1,
@@ -317,21 +314,16 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
         int height   = FFMIN(ctb_size_v, (s->sps->height >> s->sps->vshift[c_idx]) - y0);
         int tab      = band_tab[(FFALIGN(width, 8) >> 3) - 1];
         uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->sps->pixel_shift)];
-#if defined(USE_SAO_SMALL_BUFFER)
-        int stride_dst = ((1 << (s->sps->log2_ctb_size)) + 2) << s->sps->pixel_shift;
-        uint8_t *dst = lc->sao_pixel_buffer + (1 * stride_dst) + (1 << s->sps->pixel_shift);
-#else
-        int stride_dst = s->sao_frame->linesize[c_idx];
-        uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride_dst + (x0 << s->sps->pixel_shift)];
-#endif
+        int stride_dst;
+        uint8_t *dst;
 
         switch (sao->type_idx[c_idx]) {
         case SAO_BAND:
+            dst = lc->edge_emu_buffer;
+            stride_dst = 2*MAX_PB_SIZE;
             copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride_dst, stride_src);
-#if defined(USE_SAO_SMALL_BUFFER)
             copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
                            x_ctb, y_ctb);
-#endif
             s->hevcdsp.sao_band_filter[tab](src, dst, stride_src, stride_dst,
                                             sao->offset_val[c_idx], sao->band_position[c_idx],
                                             width, height);
@@ -341,7 +333,6 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
             break;
         case SAO_EDGE:
         {
-#if defined(USE_SAO_SMALL_BUFFER)
             int w = s->sps->width >> s->sps->hshift[c_idx];
             int h = s->sps->height >> s->sps->vshift[c_idx];
             int left_edge = edges[0];
@@ -351,6 +342,9 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
             int sh = s->sps->pixel_shift;
             int left_pixels, right_pixels;
 
+            stride_dst = 2*MAX_PB_SIZE + FF_INPUT_BUFFER_PADDING_SIZE;
+            dst = lc->edge_emu_buffer + stride_dst + FF_INPUT_BUFFER_PADDING_SIZE;
+
             if (!top_edge) {
                 int left = 1 - left_edge;
                 int right = 1 - right_edge;
@@ -433,40 +427,6 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
 
             copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
                            x_ctb, y_ctb);
-#else
-            uint8_t left_pixels;
-            /* get the CTB edge pixels from the SAO pixel buffer */
-            left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED);
-            if (!edges[1]) {
-                uint8_t top_left  = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
-                uint8_t top_right = !edges[2] && (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
-                if (CTB(s->sao, x_ctb  , y_ctb-1).type_idx[c_idx] == 0)
-                    memcpy( dst - stride_dst - (top_left << s->sps->pixel_shift),
-                            src - stride_src - (top_left << s->sps->pixel_shift),
-                            (top_left + width + top_right) << s->sps->pixel_shift);
-                else {
-                    if (top_left)
-                        memcpy( dst - stride_dst - (1 << s->sps->pixel_shift),
-                                src - stride_src - (1 << s->sps->pixel_shift),
-                                1 << s->sps->pixel_shift);
-                    if(top_right)
-                        memcpy( dst - stride_dst + (width << s->sps->pixel_shift),
-                                src - stride_src + (width << s->sps->pixel_shift),
-                                1 << s->sps->pixel_shift);
-                }
-            }
-            if (!edges[3]) {                                                                // bottom and bottom right
-                uint8_t bottom_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] != SAO_APPLIED);
-                memcpy( dst + height * stride_dst - (bottom_left << s->sps->pixel_shift),
-                        src + height * stride_src - (bottom_left << s->sps->pixel_shift),
-                        (width + 1 + bottom_left) << s->sps->pixel_shift);
-            }
-            copy_CTB(dst - (left_pixels << s->sps->pixel_shift),
-                     src - (left_pixels << s->sps->pixel_shift),
-                     (width + 1 + left_pixels) << s->sps->pixel_shift, height, stride_dst, stride_src);
-#endif
-            /* XXX: could handle the restoration here to simplify the
-               DSP functions */
             s->hevcdsp.sao_edge_filter[restore](src, dst,
                                                 stride_src, stride_dst,
                                                 sao,
-- 
1.7.9.5

signature.asc
Description: Digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Re: [FFmpeg-devel] [PATCH] avcodec/hevc: reduce memory used by the SAO

Reply via email to