ffmpeg | branch: master | James Almer <jamr...@gmail.com> | Tue Mar 21 15:49:09 2017 -0300| [1e185488269fd5639bc4fe826c8cd53c3e45c047] | committer: James Almer
Merge commit 'ba479f3daafc7e4359ec1212164569ebe59f0bb7' * commit 'ba479f3daafc7e4359ec1212164569ebe59f0bb7': hevc: Change type of array stride parameters to ptrdiff_t Merged-by: James Almer <jamr...@gmail.com> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1e185488269fd5639bc4fe826c8cd53c3e45c047 --- libavcodec/hevc.c | 12 ++++++------ libavcodec/hevc_filter.c | 10 +++++----- libavcodec/hevcdsp_template.c | 8 ++++---- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c index 505249e..0b4a719 100644 --- a/libavcodec/hevc.c +++ b/libavcodec/hevc.c @@ -1283,11 +1283,11 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) HEVCLocalContext *lc = s->HEVClc; GetBitContext gb; int cb_size = 1 << log2_cb_size; - int stride0 = s->frame->linesize[0]; + ptrdiff_t stride0 = s->frame->linesize[0]; + ptrdiff_t stride1 = s->frame->linesize[1]; + ptrdiff_t stride2 = s->frame->linesize[2]; uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)]; - int stride1 = s->frame->linesize[1]; uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)]; - int stride2 = s->frame->linesize[2]; uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)]; int length = cb_size * cb_size * s->ps.sps->pcm.bit_depth + @@ -1357,7 +1357,7 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER || x_off >= pic_width - block_w - QPEL_EXTRA_AFTER || y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) { - const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; + const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; int offset = QPEL_EXTRA_BEFORE * srcstride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); @@ -1423,7 +1423,7 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER || x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER || y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) { - const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; + const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; int offset = QPEL_EXTRA_BEFORE * src0stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); @@ -1440,7 +1440,7 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER || x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER || y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) { - const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; + const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; int offset = QPEL_EXTRA_BEFORE * src1stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c index 6918a55..fe520f4 100644 --- a/libavcodec/hevc_filter.c +++ b/libavcodec/hevc_filter.c @@ -139,7 +139,7 @@ static int get_qPy(HEVCContext *s, int xC, int yC) } static void copy_CTB(uint8_t *dst, const uint8_t *src, int width, int height, - intptr_t stride_dst, intptr_t stride_src) + ptrdiff_t stride_dst, ptrdiff_t stride_src) { int i, j; @@ -170,7 +170,7 @@ static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift) static void copy_vert(uint8_t *dst, const uint8_t *src, int pixel_shift, int height, - int stride_dst, int stride_src) + ptrdiff_t stride_dst, ptrdiff_t stride_src) { int i; if (pixel_shift == 0) { @@ -189,7 +189,7 @@ static void copy_vert(uint8_t *dst, const uint8_t *src, } static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src, - int stride_src, int x, int y, int width, int height, + ptrdiff_t stride_src, int x, int y, int width, int height, int c_idx, int x_ctb, int y_ctb) { int sh = s->ps.sps->pixel_shift; @@ -306,14 +306,14 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y) for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) { int x0 = x >> s->ps.sps->hshift[c_idx]; int y0 = y >> s->ps.sps->vshift[c_idx]; - int stride_src = s->frame->linesize[c_idx]; + ptrdiff_t stride_src = s->frame->linesize[c_idx]; int ctb_size_h = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->hshift[c_idx]; int ctb_size_v = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->vshift[c_idx]; int width = FFMIN(ctb_size_h, (s->ps.sps->width >> s->ps.sps->hshift[c_idx]) - x0); int height = FFMIN(ctb_size_v, (s->ps.sps->height >> s->ps.sps->vshift[c_idx]) - y0); int tab = sao_tab[(FFALIGN(width, 8) >> 3) - 1]; uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->ps.sps->pixel_shift)]; - int stride_dst; + ptrdiff_t stride_dst; uint8_t *dst; switch (sao->type_idx[c_idx]) { diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c index b95984f..2b06dc5 100644 --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@ -393,8 +393,8 @@ static void FUNC(sao_edge_restore_0)(uint8_t *_dst, uint8_t *_src, } if (borders[3]) { int offset_val = sao_offset_val[0]; - int y_stride_dst = stride_dst * (height - 1); - int y_stride_src = stride_src * (height - 1); + ptrdiff_t y_stride_dst = stride_dst * (height - 1); + ptrdiff_t y_stride_src = stride_src * (height - 1); for (x = init_x; x < width; x++) dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); height--; @@ -444,8 +444,8 @@ static void FUNC(sao_edge_restore_1)(uint8_t *_dst, uint8_t *_src, } if (borders[3]) { int offset_val = sao_offset_val[0]; - int y_stride_dst = stride_dst * (height - 1); - int y_stride_src = stride_src * (height - 1); + ptrdiff_t y_stride_dst = stride_dst * (height - 1); + ptrdiff_t y_stride_src = stride_src * (height - 1); for (x = init_x; x < width; x++) dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); height--; ====================================================================== diff --cc libavcodec/hevc.c index 505249e,e38d367..0b4a719 --- a/libavcodec/hevc.c +++ b/libavcodec/hevc.c @@@ -1280,20 -1433,18 +1280,20 @@@ do static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) { - //TODO: non-4:2:0 support - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; GetBitContext gb; int cb_size = 1 << log2_cb_size; - int stride0 = s->frame->linesize[0]; + ptrdiff_t stride0 = s->frame->linesize[0]; + ptrdiff_t stride1 = s->frame->linesize[1]; + ptrdiff_t stride2 = s->frame->linesize[2]; uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)]; - int stride1 = s->frame->linesize[1]; uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)]; - int stride2 = s->frame->linesize[2]; uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)]; - int length = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma; + int length = cb_size * cb_size * s->ps.sps->pcm.bit_depth + + (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) + + ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) * + s->ps.sps->pcm.bit_depth_chroma; const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3); int ret; @@@ -1354,12 -1517,13 +1354,12 @@@ static void luma_mc_uni(HEVCContext *s y_off += mv->y >> 2; src += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift)); - if (x_off < extra_left || y_off < extra_top || - x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] || - y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) { + if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER || + x_off >= pic_width - block_w - QPEL_EXTRA_AFTER || + y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) { - const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; + const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; - int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift); - int buf_offset = extra_top * - edge_emu_stride + (extra_left << s->ps.sps->pixel_shift); + int offset = QPEL_EXTRA_BEFORE * srcstride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); + int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset, edge_emu_stride, srcstride, @@@ -1370,104 -1534,8 +1370,104 @@@ src = lc->edge_emu_buffer + buf_offset; srcstride = edge_emu_stride; } - s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride, - block_h, mx, my, lc->mc_buffer); + + if (!weight_flag) + s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride, + block_h, mx, my, block_w); + else + s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride, + block_h, s->sh.luma_log2_weight_denom, + luma_weight, luma_offset, mx, my, block_w); +} + +/** + * 8.5.3.2.2.1 Luma sample bidirectional interpolation process + * + * @param s HEVC decoding context + * @param dst target buffer for block data at block position + * @param dststride stride of the dst buffer + * @param ref0 reference picture0 buffer at origin (0, 0) + * @param mv0 motion vector0 (relative to block position) to get pixel data from + * @param x_off horizontal position of block from origin (0, 0) + * @param y_off vertical position of block from origin (0, 0) + * @param block_w width of block + * @param block_h height of block + * @param ref1 reference picture1 buffer at origin (0, 0) + * @param mv1 motion vector1 (relative to block position) to get pixel data from + * @param current_mv current motion vector structure + */ + static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, + AVFrame *ref0, const Mv *mv0, int x_off, int y_off, + int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv) +{ + HEVCLocalContext *lc = s->HEVClc; + ptrdiff_t src0stride = ref0->linesize[0]; + ptrdiff_t src1stride = ref1->linesize[0]; + int pic_width = s->ps.sps->width; + int pic_height = s->ps.sps->height; + int mx0 = mv0->x & 3; + int my0 = mv0->y & 3; + int mx1 = mv1->x & 3; + int my1 = mv1->y & 3; + int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || + (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); + int x_off0 = x_off + (mv0->x >> 2); + int y_off0 = y_off + (mv0->y >> 2); + int x_off1 = x_off + (mv1->x >> 2); + int y_off1 = y_off + (mv1->y >> 2); + int idx = ff_hevc_pel_weight[block_w]; + + uint8_t *src0 = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift); + uint8_t *src1 = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift); + + if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER || + x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER || + y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) { - const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; ++ const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; + int offset = QPEL_EXTRA_BEFORE * src0stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); + int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); + + s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset, + edge_emu_stride, src0stride, + block_w + QPEL_EXTRA, + block_h + QPEL_EXTRA, + x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE, + pic_width, pic_height); + src0 = lc->edge_emu_buffer + buf_offset; + src0stride = edge_emu_stride; + } + + if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER || + x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER || + y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) { - const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; ++ const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; + int offset = QPEL_EXTRA_BEFORE * src1stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); + int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); + + s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset, + edge_emu_stride, src1stride, + block_w + QPEL_EXTRA, + block_h + QPEL_EXTRA, + x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE, + pic_width, pic_height); + src1 = lc->edge_emu_buffer2 + buf_offset; + src1stride = edge_emu_stride; + } + + s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride, + block_h, mx0, my0, block_w); + if (!weight_flag) + s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp, + block_h, mx1, my1, block_w); + else + s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp, + block_h, s->sh.luma_log2_weight_denom, + s->sh.luma_weight_l0[current_mv->ref_idx[0]], + s->sh.luma_weight_l1[current_mv->ref_idx[1]], + s->sh.luma_offset_l0[current_mv->ref_idx[0]], + s->sh.luma_offset_l1[current_mv->ref_idx[1]], + mx1, my1, block_w); + } /** diff --cc libavcodec/hevc_filter.c index 6918a55,5037dae..fe520f4 --- a/libavcodec/hevc_filter.c +++ b/libavcodec/hevc_filter.c @@@ -138,106 -171,15 +138,106 @@@ static int get_qPy(HEVCContext *s, int return s->qp_y_tab[x + y * s->ps.sps->min_cb_width]; } -static void copy_CTB(uint8_t *dst, uint8_t *src, - int width, int height, ptrdiff_t stride) +static void copy_CTB(uint8_t *dst, const uint8_t *src, int width, int height, - intptr_t stride_dst, intptr_t stride_src) ++ ptrdiff_t stride_dst, ptrdiff_t stride_src) +{ +int i, j; + + if (((intptr_t)dst | (intptr_t)src | stride_dst | stride_src) & 15) { + for (i = 0; i < height; i++) { + for (j = 0; j < width; j+=8) + AV_COPY64U(dst+j, src+j); + dst += stride_dst; + src += stride_src; + } + } else { + for (i = 0; i < height; i++) { + for (j = 0; j < width; j+=16) + AV_COPY128(dst+j, src+j); + dst += stride_dst; + src += stride_src; + } + } +} + +static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift) +{ + if (pixel_shift) + *(uint16_t *)dst = *(uint16_t *)src; + else + *dst = *src; +} + +static void copy_vert(uint8_t *dst, const uint8_t *src, + int pixel_shift, int height, - int stride_dst, int stride_src) ++ ptrdiff_t stride_dst, ptrdiff_t stride_src) { int i; + if (pixel_shift == 0) { + for (i = 0; i < height; i++) { + *dst = *src; + dst += stride_dst; + src += stride_src; + } + } else { + for (i = 0; i < height; i++) { + *(uint16_t *)dst = *(uint16_t *)src; + dst += stride_dst; + src += stride_src; + } + } +} + +static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src, - int stride_src, int x, int y, int width, int height, ++ ptrdiff_t stride_src, int x, int y, int width, int height, + int c_idx, int x_ctb, int y_ctb) +{ + int sh = s->ps.sps->pixel_shift; + int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx]; + int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx]; - for (i = 0; i < height; i++) { - memcpy(dst, src, width); - dst += stride; - src += stride; + /* copy horizontal edges */ + memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << sh), + src, width << sh); + memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 1) * w + x) << sh), + src + stride_src * (height - 1), width << sh); + + /* copy vertical edges */ + copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb) * h + y) << sh), src, sh, height, 1 << sh, stride_src); + + copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src); +} + +static void restore_tqb_pixels(HEVCContext *s, + uint8_t *src1, const uint8_t *dst1, + ptrdiff_t stride_src, ptrdiff_t stride_dst, + int x0, int y0, int width, int height, int c_idx) +{ + if ( s->ps.pps->transquant_bypass_enable_flag || + (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) { + int x, y; + int min_pu_size = 1 << s->ps.sps->log2_min_pu_size; + int hshift = s->ps.sps->hshift[c_idx]; + int vshift = s->ps.sps->vshift[c_idx]; + int x_min = ((x0 ) >> s->ps.sps->log2_min_pu_size); + int y_min = ((y0 ) >> s->ps.sps->log2_min_pu_size); + int x_max = ((x0 + width ) >> s->ps.sps->log2_min_pu_size); + int y_max = ((y0 + height) >> s->ps.sps->log2_min_pu_size); + int len = (min_pu_size >> hshift) << s->ps.sps->pixel_shift; + for (y = y_min; y < y_max; y++) { + for (x = x_min; x < x_max; x++) { + if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) { + int n; + uint8_t *src = src1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift); + const uint8_t *dst = dst1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift); + for (n = 0; n < (min_pu_size >> vshift); n++) { + memcpy(src, dst, len); + src += stride_src; + dst += stride_dst; + } + } + } + } } } @@@ -303,151 -269,46 +303,151 @@@ static void sao_filter_CTB(HEVCContext } } - for (c_idx = 0; c_idx < 3; c_idx++) { - int chroma = c_idx ? 1 : 0; - int x0 = x >> chroma; - int y0 = y >> chroma; - ptrdiff_t stride = s->frame->linesize[c_idx]; - int ctb_size = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->hshift[c_idx]; - int width = FFMIN(ctb_size, - (s->ps.sps->width >> s->ps.sps->hshift[c_idx]) - x0); - int height = FFMIN(ctb_size, - (s->ps.sps->height >> s->ps.sps->vshift[c_idx]) - y0); - - uint8_t *src = &s->frame->data[c_idx][y0 * stride + (x0 << s->ps.sps->pixel_shift)]; - uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride + (x0 << s->ps.sps->pixel_shift)]; - int offset = (y_shift >> chroma) * stride + ((x_shift >> chroma) << s->ps.sps->pixel_shift); - - copy_CTB(dst - offset, src - offset, - (edges[2] ? width + (x_shift >> chroma) : width) << s->ps.sps->pixel_shift, - (edges[3] ? height + (y_shift >> chroma) : height), stride); - - for (class_index = 0; class_index < class; class_index++) { - - switch (sao[class_index]->type_idx[c_idx]) { - case SAO_BAND: - s->hevcdsp.sao_band_filter[classes[class_index]](dst, src, - stride, - sao[class_index], - edges, width, - height, c_idx); - break; - case SAO_EDGE: - s->hevcdsp.sao_edge_filter[classes[class_index]](dst, src, - stride, - sao[class_index], - edges, width, - height, c_idx, - vert_edge[classes[class_index]], - horiz_edge[classes[class_index]], - diag_edge[classes[class_index]]); - break; + for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) { + int x0 = x >> s->ps.sps->hshift[c_idx]; + int y0 = y >> s->ps.sps->vshift[c_idx]; - int stride_src = s->frame->linesize[c_idx]; ++ ptrdiff_t stride_src = s->frame->linesize[c_idx]; + int ctb_size_h = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->hshift[c_idx]; + int ctb_size_v = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->vshift[c_idx]; + int width = FFMIN(ctb_size_h, (s->ps.sps->width >> s->ps.sps->hshift[c_idx]) - x0); + int height = FFMIN(ctb_size_v, (s->ps.sps->height >> s->ps.sps->vshift[c_idx]) - y0); + int tab = sao_tab[(FFALIGN(width, 8) >> 3) - 1]; + uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->ps.sps->pixel_shift)]; - int stride_dst; ++ ptrdiff_t stride_dst; + uint8_t *dst; + + switch (sao->type_idx[c_idx]) { + case SAO_BAND: + copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx, + x_ctb, y_ctb); + if (s->ps.pps->transquant_bypass_enable_flag || + (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) { + dst = lc->edge_emu_buffer; + stride_dst = 2*MAX_PB_SIZE; + copy_CTB(dst, src, width << s->ps.sps->pixel_shift, height, stride_dst, stride_src); + s->hevcdsp.sao_band_filter[tab](src, dst, stride_src, stride_dst, + sao->offset_val[c_idx], sao->band_position[c_idx], + width, height); + restore_tqb_pixels(s, src, dst, stride_src, stride_dst, + x, y, width, height, c_idx); + } else { + s->hevcdsp.sao_band_filter[tab](src, src, stride_src, stride_src, + sao->offset_val[c_idx], sao->band_position[c_idx], + width, height); + } + sao->type_idx[c_idx] = SAO_APPLIED; + break; + case SAO_EDGE: + { + int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx]; + int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx]; + int left_edge = edges[0]; + int top_edge = edges[1]; + int right_edge = edges[2]; + int bottom_edge = edges[3]; + int sh = s->ps.sps->pixel_shift; + int left_pixels, right_pixels; + + stride_dst = 2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE; + dst = lc->edge_emu_buffer + stride_dst + AV_INPUT_BUFFER_PADDING_SIZE; + + if (!top_edge) { + int left = 1 - left_edge; + int right = 1 - right_edge; + const uint8_t *src1[2]; + uint8_t *dst1; + int src_idx, pos; + + dst1 = dst - stride_dst - (left << sh); + src1[0] = src - stride_src - (left << sh); + src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb - 1) * w + x0 - left) << sh); + pos = 0; + if (left) { + src_idx = (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] == + SAO_APPLIED); + copy_pixel(dst1, src1[src_idx], sh); + pos += (1 << sh); + } + src_idx = (CTB(s->sao, x_ctb, y_ctb-1).type_idx[c_idx] == + SAO_APPLIED); + memcpy(dst1 + pos, src1[src_idx] + pos, width << sh); + if (right) { + pos += width << sh; + src_idx = (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] == + SAO_APPLIED); + copy_pixel(dst1 + pos, src1[src_idx] + pos, sh); + } } + if (!bottom_edge) { + int left = 1 - left_edge; + int right = 1 - right_edge; + const uint8_t *src1[2]; + uint8_t *dst1; + int src_idx, pos; + + dst1 = dst + height * stride_dst - (left << sh); + src1[0] = src + height * stride_src - (left << sh); + src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 2) * w + x0 - left) << sh); + pos = 0; + if (left) { + src_idx = (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] == + SAO_APPLIED); + copy_pixel(dst1, src1[src_idx], sh); + pos += (1 << sh); + } + src_idx = (CTB(s->sao, x_ctb, y_ctb+1).type_idx[c_idx] == + SAO_APPLIED); + memcpy(dst1 + pos, src1[src_idx] + pos, width << sh); + if (right) { + pos += width << sh; + src_idx = (CTB(s->sao, x_ctb+1, y_ctb+1).type_idx[c_idx] == + SAO_APPLIED); + copy_pixel(dst1 + pos, src1[src_idx] + pos, sh); + } + } + left_pixels = 0; + if (!left_edge) { + if (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] == SAO_APPLIED) { + copy_vert(dst - (1 << sh), + s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb - 1) * h + y0) << sh), + sh, height, stride_dst, 1 << sh); + } else { + left_pixels = 1; + } + } + right_pixels = 0; + if (!right_edge) { + if (CTB(s->sao, x_ctb+1, y_ctb).type_idx[c_idx] == SAO_APPLIED) { + copy_vert(dst + (width << sh), + s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 2) * h + y0) << sh), + sh, height, stride_dst, 1 << sh); + } else { + right_pixels = 1; + } + } + + copy_CTB(dst - (left_pixels << sh), + src - (left_pixels << sh), + (width + left_pixels + right_pixels) << sh, + height, stride_dst, stride_src); + + copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx, + x_ctb, y_ctb); + s->hevcdsp.sao_edge_filter[tab](src, dst, stride_src, sao->offset_val[c_idx], + sao->eo_class[c_idx], width, height); + s->hevcdsp.sao_edge_restore[restore](src, dst, + stride_src, stride_dst, + sao, + edges, width, + height, c_idx, + vert_edge, + horiz_edge, + diag_edge); + restore_tqb_pixels(s, src, dst, stride_src, stride_dst, + x, y, width, height, c_idx); + sao->type_idx[c_idx] = SAO_APPLIED; + break; + } } } } diff --cc libavcodec/hevcdsp_template.c index b95984f,cd55571..2b06dc5 --- a/libavcodec/hevcdsp_template.c +++ b/libavcodec/hevcdsp_template.c @@@ -390,13 -418,13 +390,13 @@@ static void FUNC(sao_edge_restore_0)(ui int offset_val = sao_offset_val[0]; for (x = init_x; x < width; x++) dst[x] = av_clip_pixel(src[x] + offset_val); - init_y = 1; } if (borders[3]) { - int offset_val = sao_offset_val[0]; - ptrdiff_t y_stride = stride * (height - 1); + int offset_val = sao_offset_val[0]; - int y_stride_dst = stride_dst * (height - 1); - int y_stride_src = stride_src * (height - 1); ++ ptrdiff_t y_stride_dst = stride_dst * (height - 1); ++ ptrdiff_t y_stride_src = stride_src * (height - 1); for (x = init_x; x < width; x++) - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val); + dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); height--; } } @@@ -443,11 -600,10 +443,11 @@@ static void FUNC(sao_edge_restore_1)(ui init_y = 1; } if (borders[3]) { - int offset_val = sao_offset_val[0]; - ptrdiff_t y_stride = stride * (height - 1); + int offset_val = sao_offset_val[0]; - int y_stride_dst = stride_dst * (height - 1); - int y_stride_src = stride_src * (height - 1); ++ ptrdiff_t y_stride_dst = stride_dst * (height - 1); ++ ptrdiff_t y_stride_src = stride_src * (height - 1); for (x = init_x; x < width; x++) - dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val); + dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); height--; } } _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog