27% performance increase for a 12bit 4k file. Signed-off-by: Rostislav Pehlivanov <rpehliva...@obe.tv> --- libavcodec/diracdec.c | 152 ++++++++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 72 deletions(-)
diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c index 63eb4d1..ec45132 100644 --- a/libavcodec/diracdec.c +++ b/libavcodec/diracdec.c @@ -1804,99 +1804,107 @@ static int interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int return 0; } -/** - * Dirac Specification -> - * 13.0 Transform data syntax. transform_data() - */ -static int dirac_decode_frame_internal(DiracContext *s) +static int decode_plane(AVCodecContext *avctx, void *arg, int jobnr, int thread) { DWTContext d; - int y, i, comp, dsty; - int ret; + int i, y, ret, dsty; + DiracContext *s = avctx->priv_data; + Plane *p = &s->plane[jobnr]; + uint8_t *frame = s->current_picture->avframe->data[jobnr]; - if (s->low_delay) { - /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */ - for (comp = 0; comp < 3; comp++) { - Plane *p = &s->plane[comp]; - memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height); - } - if (!s->zero_res) { - if ((ret = decode_lowdelay(s)) < 0) - return ret; - } + /* FIXME: small resolutions */ + for (i = 0; i < 4; i++) + s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16); + + if (!s->zero_res && !s->low_delay) + { + memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height); + decode_component(s, jobnr); /* [DIRAC_STD] 13.4.1 core_transform_data() */ } + ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2, + s->wavelet_depth, s->bit_depth); + if (ret < 0) + return ret; - for (comp = 0; comp < 3; comp++) { - Plane *p = &s->plane[comp]; - uint8_t *frame = s->current_picture->avframe->data[comp]; + if (!s->num_refs) { /* intra */ + for (y = 0; y < p->height; y += 16) { + int idx = (s->bit_depth - 8) >> 1; + ff_spatial_idwt_slice2(&d, y+16); /* decode */ + s->diracdsp.put_signed_rect_clamped[idx](frame + y*p->stride, + p->stride, + p->idwt.buf + y*p->idwt.stride, + p->idwt.stride, p->width, 16); + } + } else { /* inter */ + int rowheight = p->ybsep*p->stride; - /* FIXME: small resolutions */ - for (i = 0; i < 4; i++) - s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16); + select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen); - if (!s->zero_res && !s->low_delay) - { - memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height); - decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */ + for (i = 0; i < s->num_refs; i++) { + int ret = interpolate_refplane(s, s->ref_pics[i], jobnr, p->width, p->height); + if (ret < 0) + return ret; } - ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2, - s->wavelet_depth, s->bit_depth); - if (ret < 0) - return ret; - if (!s->num_refs) { /* intra */ - for (y = 0; y < p->height; y += 16) { - int idx = (s->bit_depth - 8) >> 1; - ff_spatial_idwt_slice2(&d, y+16); /* decode */ - s->diracdsp.put_signed_rect_clamped[idx](frame + y*p->stride, - p->stride, - p->idwt.buf + y*p->idwt.stride, - p->idwt.stride, p->width, 16); - } - } else { /* inter */ - int rowheight = p->ybsep*p->stride; + memset(s->mctmp, 0, 4*p->yoffset*p->stride); - select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen); + dsty = -p->yoffset; + for (y = 0; y < s->blheight; y++) { + int h = 0, + start = FFMAX(dsty, 0); + uint16_t *mctmp = s->mctmp + y*rowheight; + DiracBlock *blocks = s->blmotion + y*s->blwidth; - for (i = 0; i < s->num_refs; i++) { - int ret = interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height); - if (ret < 0) - return ret; - } + init_obmc_weights(s, p, y); - memset(s->mctmp, 0, 4*p->yoffset*p->stride); + if (y == s->blheight-1 || start+p->ybsep > p->height) + h = p->height - start; + else + h = p->ybsep - (start - dsty); + if (h < 0) + break; - dsty = -p->yoffset; - for (y = 0; y < s->blheight; y++) { - int h = 0, - start = FFMAX(dsty, 0); - uint16_t *mctmp = s->mctmp + y*rowheight; - DiracBlock *blocks = s->blmotion + y*s->blwidth; + memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight); + mc_row(s, blocks, mctmp, jobnr, dsty); - init_obmc_weights(s, p, y); + mctmp += (start - dsty)*p->stride + p->xoffset; + ff_spatial_idwt_slice2(&d, start + h); /* decode */ + /* NOTE: add_rect_clamped hasn't been templated hence the shifts. + * idwt.stride is passed as pixels, not in bytes as in the rest of the decoder */ + s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride, + (int16_t*)(p->idwt.buf) + start*(p->idwt.stride >> 1), (p->idwt.stride >> 1), p->width, h); - if (y == s->blheight-1 || start+p->ybsep > p->height) - h = p->height - start; - else - h = p->ybsep - (start - dsty); - if (h < 0) - break; + dsty += p->ybsep; + } + } - memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight); - mc_row(s, blocks, mctmp, comp, dsty); + return 0; +} - mctmp += (start - dsty)*p->stride + p->xoffset; - ff_spatial_idwt_slice2(&d, start + h); /* decode */ - /* NOTE: add_rect_clamped hasn't been templated hence the shifts. - * idwt.stride is passed as pixels, not in bytes as in the rest of the decoder */ - s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride, - (int16_t*)(p->idwt.buf) + start*(p->idwt.stride >> 1), (p->idwt.stride >> 1), p->width, h); +/** + * Dirac Specification -> + * 13.0 Transform data syntax. transform_data() + */ +static int dirac_decode_frame_internal(DiracContext *s) +{ + int ret, comp, res[3]; - dsty += p->ybsep; - } + if (s->low_delay) { + /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */ + for (comp = 0; comp < 3; comp++) { + Plane *p = &s->plane[comp]; + memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height); + } + if (!s->zero_res) { + if ((ret = decode_lowdelay(s)) < 0) + return ret; } } + s->avctx->execute2(s->avctx, decode_plane, NULL, res, 3); + for (comp = 0; comp < 3; comp++) + if (res[comp]) + return res[comp]; return 0; } -- 2.8.1.369.geae769a _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel