Hi, I took the time to investigate it and I find a few bugs which I'm attaching a patch for them. Yet I'm not convinced that it is a definitive fix, since the output of swscale-test differs in some cases when scaling the whole frame or in slices. Also there is a few fixes needed in this patch which took me a while to figure out because I blindly trusted the patch was correct.
2016-06-05 9:27 GMT-03:00 Michael Niedermayer <mich...@niedermayer.cc>: > + src_tmp[0] += srcStride[0] * i * STEP; > here it should be: src_tmp[0] += srcStride[0] * i; + if (src_tmp[2]) { > + int step = STEP >> desc_src->log2_chroma_h; > + src_tmp[1] += srcStride[1] * i * step; > + src_tmp[2] += srcStride[2] * i * step; > + } > + if (src_tmp[3]) > + src_tmp[3] += srcStride[3] * i * STEP; > The same applies for the above code. Regards, Pedro.
From d0b51102b77e9a883b7b821ecd691ed36cb175ba Mon Sep 17 00:00:00 2001 From: Pedro Arthur <bygran...@gmail.com> Date: Wed, 8 Jun 2016 21:36:16 -0300 Subject: [PATCH 1/2] swscale: fix crash with swscale-test when using slices --- libswscale/swscale_unscaled.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 4d6cfd1..b231abe 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -575,7 +575,7 @@ static int Rgb16ToPlanarRgb16Wrapper(SwsContext *c, const uint8_t *src[], case AV_PIX_FMT_RGB48BE: case AV_PIX_FMT_RGBA64LE: case AV_PIX_FMT_RGBA64BE: - packed16togbra16(src[0] + srcSliceY * srcStride[0], srcStride[0], + packed16togbra16(src[0], srcStride[0], dst2013, stride2013, srcSliceH, alpha, swap, 16 - bpc, c->srcW); break; @@ -583,7 +583,7 @@ static int Rgb16ToPlanarRgb16Wrapper(SwsContext *c, const uint8_t *src[], case AV_PIX_FMT_BGR48BE: case AV_PIX_FMT_BGRA64LE: case AV_PIX_FMT_BGRA64BE: - packed16togbra16(src[0] + srcSliceY * srcStride[0], srcStride[0], + packed16togbra16(src[0], srcStride[0], dst1023, stride1023, srcSliceH, alpha, swap, 16 - bpc, c->srcW); break; -- 1.9.1
From c45a3d6dca86fbf2b622dbbb9f0562de43abab5a Mon Sep 17 00:00:00 2001 From: Pedro Arthur <bygran...@gmail.com> Date: Wed, 8 Jun 2016 21:38:49 -0300 Subject: [PATCH 2/2] swscale: fix ring buffer size when scaling slices of a frame The ring buffer size should be able to store input lines when there is not enough lines to output a single line. --- libswscale/slice.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- libswscale/swscale.c | 10 ++++++---- 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/libswscale/slice.c b/libswscale/slice.c index 0159a73..3da4136 100644 --- a/libswscale/slice.c +++ b/libswscale/slice.c @@ -210,6 +210,42 @@ static void fill_ones(SwsSlice *s, int n, int is16bit) } } +/* + Calculates the minimum ring buffer size, it should be able to store vFilterSize + more n lines where n is the max difference between each adjacent slice which + outputs a line. + The n lines are needed only when there is not enough src lines to output a single + dst line, then we should buffer these lines to process them on the next call to scale. +*/ +static void get_min_buffer_size(SwsContext *c, int *out_lum_size, int *out_chr_size) +{ + int lumY; + int dstH = c->dstH; + int chrDstH = c->chrDstH; + int *lumFilterPos = c->vLumFilterPos; + int *chrFilterPos = c->vChrFilterPos; + int lumFilterSize = c->vLumFilterSize; + int chrFilterSize = c->vChrFilterSize; + int chrSubSample = c->chrSrcVSubSample; + + *out_lum_size = lumFilterSize; + *out_chr_size = chrFilterSize; + + for (lumY = 0; lumY < dstH; lumY++) { + int chrY = (int64_t)lumY * chrDstH / dstH; + int nextSlice = FFMAX(lumFilterPos[lumY] + lumFilterSize - 1, + ((chrFilterPos[chrY] + chrFilterSize - 1) + << chrSubSample)); + + nextSlice >>= chrSubSample; + nextSlice <<= chrSubSample; + (*out_lum_size) = FFMAX((*out_lum_size), nextSlice - lumFilterPos[lumY]); + (*out_chr_size) = FFMAX((*out_chr_size), (nextSlice >> chrSubSample) - chrFilterPos[chrY]); + } +} + + + int ff_init_filters(SwsContext * c) { int i; @@ -226,6 +262,13 @@ int ff_init_filters(SwsContext * c) uint32_t * pal = usePal(c->srcFormat) ? c->pal_yuv : (uint32_t*)c->input_rgb2yuv_table; int res = 0; + int lumBufSize; + int chrBufSize; + + get_min_buffer_size(c, &lumBufSize, &chrBufSize); + lumBufSize = FFMAX(lumBufSize, c->vLumFilterSize + MAX_LINES_AHEAD); + chrBufSize = FFMAX(chrBufSize, c->vChrFilterSize + MAX_LINES_AHEAD); + if (c->dstBpc == 16) dst_stride <<= 1; @@ -248,13 +291,13 @@ int ff_init_filters(SwsContext * c) res = alloc_slice(&c->slice[0], c->srcFormat, c->srcH, c->chrSrcH, c->chrSrcHSubSample, c->chrSrcVSubSample, 0); if (res < 0) goto cleanup; for (i = 1; i < c->numSlice-2; ++i) { - res = alloc_slice(&c->slice[i], c->srcFormat, c->vLumFilterSize + MAX_LINES_AHEAD, c->vChrFilterSize + MAX_LINES_AHEAD, c->chrSrcHSubSample, c->chrSrcVSubSample, 0); + res = alloc_slice(&c->slice[i], c->srcFormat, lumBufSize, chrBufSize, c->chrSrcHSubSample, c->chrSrcVSubSample, 0); if (res < 0) goto cleanup; res = alloc_lines(&c->slice[i], FFALIGN(c->srcW*2+78, 16), c->srcW); if (res < 0) goto cleanup; } // horizontal scaler output - res = alloc_slice(&c->slice[i], c->srcFormat, c->vLumFilterSize + MAX_LINES_AHEAD, c->vChrFilterSize + MAX_LINES_AHEAD, c->chrDstHSubSample, c->chrDstVSubSample, 1); + res = alloc_slice(&c->slice[i], c->srcFormat, lumBufSize, chrBufSize, c->chrDstHSubSample, c->chrDstVSubSample, 1); if (res < 0) goto cleanup; res = alloc_lines(&c->slice[i], dst_stride, c->dstW); if (res < 0) goto cleanup; diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 2e246d9..bbea0fe 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -413,8 +413,6 @@ static int swscale(SwsContext *c, const uint8_t *src[], lastInChrBuf = firstChrSrcY - 1; } - av_assert0(firstLumSrcY >= lastInLumBuf - vLumFilterSize + 1); - av_assert0(firstChrSrcY >= lastInChrBuf - vChrFilterSize + 1); DEBUG_BUFFERS("dstY: %d\n", dstY); DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n", @@ -433,10 +431,14 @@ static int swscale(SwsContext *c, const uint8_t *src[], lastLumSrcY, lastChrSrcY); } + av_assert0((lastLumSrcY - firstLumSrcY + 1) <= hout_slice->plane[0].available_lines); + av_assert0((lastChrSrcY - firstChrSrcY + 1) <= hout_slice->plane[1].available_lines); + + posY = hout_slice->plane[0].sliceY + hout_slice->plane[0].sliceH; if (posY <= lastLumSrcY && !hasLumHoles) { firstPosY = FFMAX(firstLumSrcY, posY); - lastPosY = FFMIN(lastLumSrcY + MAX_LINES_AHEAD, srcSliceY + srcSliceH - 1); + lastPosY = FFMIN(firstLumSrcY + hout_slice->plane[0].available_lines - 1, srcSliceY + srcSliceH - 1); } else { firstPosY = lastInLumBuf + 1; lastPosY = lastLumSrcY; @@ -445,7 +447,7 @@ static int swscale(SwsContext *c, const uint8_t *src[], cPosY = hout_slice->plane[1].sliceY + hout_slice->plane[1].sliceH; if (cPosY <= lastChrSrcY && !hasChrHoles) { firstCPosY = FFMAX(firstChrSrcY, cPosY); - lastCPosY = FFMIN(lastChrSrcY + MAX_LINES_AHEAD, AV_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample) - 1); + lastCPosY = FFMIN(firstChrSrcY + hout_slice->plane[1].available_lines - 1, AV_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample) - 1); } else { firstCPosY = lastInChrBuf + 1; lastCPosY = lastChrSrcY; -- 1.9.1
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel