Just sticking my head above the parapet, but shouldn’t things like... > + for (x = 0; x < c->srcW / 2; x++) { > + dstUV[x*2 ] = src[1][x] << 6; > + dstUV[x*2+1] = src[2][x] << 6; > + }
…be more efficiently written as... uint16_t* tdstUV = dstUV; uint16_t* tsrc1 = src[1]; uint16_t* tsrc2 = src[2]; for (x = c->srcW / 2; x > 0; x--) { *tdstUV++ = *tsrc1++ << 6; *tdstUV++ = *tsrc2++ << 6; } …or is that really old-school and a modern compiler does all that when optimising? Or is readability considered more important than marginal gains in performance? Oliver (time travelling from the 1980s) > On 1 Sep 2016, at 20:49, Timo Rothenpieler <t...@rothenpieler.org> wrote: > > --- > libswscale/swscale_unscaled.c | 42 ++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 42 insertions(+) > > diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c > index b231abe..f47e1f4 100644 > --- a/libswscale/swscale_unscaled.c > +++ b/libswscale/swscale_unscaled.c > @@ -197,6 +197,43 @@ static int nv12ToPlanarWrapper(SwsContext *c, const > uint8_t *src[], > return srcSliceH; > } > > +static int planarToP010Wrapper(SwsContext *c, const uint8_t *src8[], > + int srcStride[], int srcSliceY, > + int srcSliceH, uint8_t *dstParam8[], > + int dstStride[]) > +{ > + uint16_t *src[] = { > + (uint16_t*)(src8[0] + srcStride[0] * srcSliceY), > + (uint16_t*)(src8[1] + srcStride[1] * srcSliceY), > + (uint16_t*)(src8[2] + srcStride[2] * srcSliceY) > + }; > + uint16_t *dstY = (uint16_t*)(dstParam8[0] + dstStride[0] * srcSliceY); > + uint16_t *dstUV = (uint16_t*)(dstParam8[1] + dstStride[1] * srcSliceY / > 2); > + int x, y; > + > + av_assert0(!(srcStride[0] % 2 || srcStride[1] % 2 || srcStride[2] % 2 || > + dstStride[0] % 2 || dstStride[1] % 2)); > + > + for (y = srcSliceY; y < srcSliceY + srcSliceH; y++) { > + if (!(y & 1)) { > + for (x = 0; x < c->srcW / 2; x++) { > + dstUV[x*2 ] = src[1][x] << 6; > + dstUV[x*2+1] = src[2][x] << 6; > + } > + src[1] += srcStride[1] / 2; > + src[2] += srcStride[2] / 2; > + dstUV += dstStride[1] / 2; > + } > + for (x = 0; x < c->srcW; x++) { > + dstY[x] = src[0][x] << 6; > + } > + src[0] += srcStride[0] / 2; > + dstY += dstStride[0] / 2; > + } > + > + return srcSliceH; > +} > + > static int planarToYuy2Wrapper(SwsContext *c, const uint8_t *src[], > int srcStride[], int srcSliceY, int srcSliceH, > uint8_t *dstParam[], int dstStride[]) > @@ -1600,6 +1637,11 @@ void ff_get_unscaled_swscale(SwsContext *c) > !(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || > c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) { > c->swscale = ff_yuv2rgb_get_func_ptr(c); > } > + /* yuv420p10le_to_p010le */ > + if ((srcFormat == AV_PIX_FMT_YUV420P10 || srcFormat == > AV_PIX_FMT_YUVA420P10) && > + dstFormat == AV_PIX_FMT_P010) { > + c->swscale = planarToP010Wrapper; > + } > > if (srcFormat == AV_PIX_FMT_YUV410P && !(dstH & 3) && > (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) > && > -- > 2.9.2 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel