On 23 June 2016 at 18:07, Rostislav Pehlivanov <rpehliva...@ob-encoder.com> wrote:
> 27% performance increase for a 12bit 4k file. > > Signed-off-by: Rostislav Pehlivanov <rpehliva...@obe.tv> > --- > libavcodec/diracdec.c | 152 > ++++++++++++++++++++++++++------------------------ > 1 file changed, 80 insertions(+), 72 deletions(-) > > diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c > index 63eb4d1..ec45132 100644 > --- a/libavcodec/diracdec.c > +++ b/libavcodec/diracdec.c > @@ -1804,99 +1804,107 @@ static int interpolate_refplane(DiracContext *s, > DiracFrame *ref, int plane, int > return 0; > } > > -/** > - * Dirac Specification -> > - * 13.0 Transform data syntax. transform_data() > - */ > -static int dirac_decode_frame_internal(DiracContext *s) > +static int decode_plane(AVCodecContext *avctx, void *arg, int jobnr, int > thread) > { > DWTContext d; > - int y, i, comp, dsty; > - int ret; > + int i, y, ret, dsty; > + DiracContext *s = avctx->priv_data; > + Plane *p = &s->plane[jobnr]; > + uint8_t *frame = s->current_picture->avframe->data[jobnr]; > > - if (s->low_delay) { > - /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */ > - for (comp = 0; comp < 3; comp++) { > - Plane *p = &s->plane[comp]; > - memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height); > - } > - if (!s->zero_res) { > - if ((ret = decode_lowdelay(s)) < 0) > - return ret; > - } > + /* FIXME: small resolutions */ > + for (i = 0; i < 4; i++) > + s->edge_emu_buffer[i] = s->edge_emu_buffer_base + > i*FFALIGN(p->width, 16); > + > + if (!s->zero_res && !s->low_delay) > + { > + memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height); > + decode_component(s, jobnr); /* [DIRAC_STD] 13.4.1 > core_transform_data() */ > } > + ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2, > + s->wavelet_depth, s->bit_depth); > + if (ret < 0) > + return ret; > > - for (comp = 0; comp < 3; comp++) { > - Plane *p = &s->plane[comp]; > - uint8_t *frame = s->current_picture->avframe->data[comp]; > + if (!s->num_refs) { /* intra */ > + for (y = 0; y < p->height; y += 16) { > + int idx = (s->bit_depth - 8) >> 1; > + ff_spatial_idwt_slice2(&d, y+16); /* decode */ > + s->diracdsp.put_signed_rect_clamped[idx](frame + y*p->stride, > + p->stride, > + p->idwt.buf + > y*p->idwt.stride, > + p->idwt.stride, > p->width, 16); > + } > + } else { /* inter */ > + int rowheight = p->ybsep*p->stride; > > - /* FIXME: small resolutions */ > - for (i = 0; i < 4; i++) > - s->edge_emu_buffer[i] = s->edge_emu_buffer_base + > i*FFALIGN(p->width, 16); > + select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen); > > - if (!s->zero_res && !s->low_delay) > - { > - memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height); > - decode_component(s, comp); /* [DIRAC_STD] 13.4.1 > core_transform_data() */ > + for (i = 0; i < s->num_refs; i++) { > + int ret = interpolate_refplane(s, s->ref_pics[i], jobnr, > p->width, p->height); > + if (ret < 0) > + return ret; > } > - ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2, > - s->wavelet_depth, s->bit_depth); > - if (ret < 0) > - return ret; > > - if (!s->num_refs) { /* intra */ > - for (y = 0; y < p->height; y += 16) { > - int idx = (s->bit_depth - 8) >> 1; > - ff_spatial_idwt_slice2(&d, y+16); /* decode */ > - s->diracdsp.put_signed_rect_clamped[idx](frame + > y*p->stride, > - p->stride, > - p->idwt.buf + > y*p->idwt.stride, > - p->idwt.stride, > p->width, 16); > - } > - } else { /* inter */ > - int rowheight = p->ybsep*p->stride; > + memset(s->mctmp, 0, 4*p->yoffset*p->stride); > > - select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen); > + dsty = -p->yoffset; > + for (y = 0; y < s->blheight; y++) { > + int h = 0, > + start = FFMAX(dsty, 0); > + uint16_t *mctmp = s->mctmp + y*rowheight; > + DiracBlock *blocks = s->blmotion + y*s->blwidth; > > - for (i = 0; i < s->num_refs; i++) { > - int ret = interpolate_refplane(s, s->ref_pics[i], comp, > p->width, p->height); > - if (ret < 0) > - return ret; > - } > + init_obmc_weights(s, p, y); > > - memset(s->mctmp, 0, 4*p->yoffset*p->stride); > + if (y == s->blheight-1 || start+p->ybsep > p->height) > + h = p->height - start; > + else > + h = p->ybsep - (start - dsty); > + if (h < 0) > + break; > > - dsty = -p->yoffset; > - for (y = 0; y < s->blheight; y++) { > - int h = 0, > - start = FFMAX(dsty, 0); > - uint16_t *mctmp = s->mctmp + y*rowheight; > - DiracBlock *blocks = s->blmotion + y*s->blwidth; > + memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight); > + mc_row(s, blocks, mctmp, jobnr, dsty); > > - init_obmc_weights(s, p, y); > + mctmp += (start - dsty)*p->stride + p->xoffset; > + ff_spatial_idwt_slice2(&d, start + h); /* decode */ > + /* NOTE: add_rect_clamped hasn't been templated hence the > shifts. > + * idwt.stride is passed as pixels, not in bytes as in the > rest of the decoder */ > + s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, > p->stride, > + (int16_t*)(p->idwt.buf) + > start*(p->idwt.stride >> 1), (p->idwt.stride >> 1), p->width, h); > > - if (y == s->blheight-1 || start+p->ybsep > p->height) > - h = p->height - start; > - else > - h = p->ybsep - (start - dsty); > - if (h < 0) > - break; > + dsty += p->ybsep; > + } > + } > > - memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight); > - mc_row(s, blocks, mctmp, comp, dsty); > + return 0; > +} > > - mctmp += (start - dsty)*p->stride + p->xoffset; > - ff_spatial_idwt_slice2(&d, start + h); /* decode */ > - /* NOTE: add_rect_clamped hasn't been templated hence the > shifts. > - * idwt.stride is passed as pixels, not in bytes as in > the rest of the decoder */ > - s->diracdsp.add_rect_clamped(frame + start*p->stride, > mctmp, p->stride, > - (int16_t*)(p->idwt.buf) + > start*(p->idwt.stride >> 1), (p->idwt.stride >> 1), p->width, h); > +/** > + * Dirac Specification -> > + * 13.0 Transform data syntax. transform_data() > + */ > +static int dirac_decode_frame_internal(DiracContext *s) > +{ > + int ret, comp, res[3]; > > - dsty += p->ybsep; > - } > + if (s->low_delay) { > + /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */ > + for (comp = 0; comp < 3; comp++) { > + Plane *p = &s->plane[comp]; > + memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height); > + } > + if (!s->zero_res) { > + if ((ret = decode_lowdelay(s)) < 0) > + return ret; > } > } > > + s->avctx->execute2(s->avctx, decode_plane, NULL, res, 3); > + for (comp = 0; comp < 3; comp++) > + if (res[comp]) > + return res[comp]; > > return 0; > } > -- > 2.8.1.369.geae769a > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > Disregard this patch, it breaks regular Dirac files. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel