From: Tucker DiNapoli <t.dinapol...@gmail.com> Also pulled QP initialization out of inner loop.
Added some dummy fields to PPContext to allow current code to work while changing QP stuff. --- libpostproc/postprocess_internal.h | 6 ++ libpostproc/postprocess_template.c | 138 ++++++++++++++++++------------------- 2 files changed, 74 insertions(+), 70 deletions(-) diff --git a/libpostproc/postprocess_internal.h b/libpostproc/postprocess_internal.h index 1ebd974..ccf862a 100644 --- a/libpostproc/postprocess_internal.h +++ b/libpostproc/postprocess_internal.h @@ -143,6 +143,9 @@ typedef struct PPContext{ DECLARE_ALIGNED(8, uint64_t, pQPb); DECLARE_ALIGNED(8, uint64_t, pQPb2); + DECLARE_ALIGNED(8, uint64_t, pQPb_block)[4]; + DECLARE_ALIGNED(8, uint64_t, pQPb2_block)[4]; + DECLARE_ALIGNED(8, uint64_t, mmxDcOffset)[64]; DECLARE_ALIGNED(8, uint64_t, mmxDcThreshold)[64]; @@ -153,6 +156,9 @@ typedef struct PPContext{ int QP; int nonBQP; + QP_STORE_T QP_block[4]; + QP_STORE_T nonBQP_block[4]; + int frameNum; int cpuCaps; diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c index 6377ea7..344152e 100644 --- a/libpostproc/postprocess_template.c +++ b/libpostproc/postprocess_template.c @@ -3416,7 +3416,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ #endif const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride]; int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)]; - int QP=0; + int QP=0, nonBQP=0; /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards if not than use a temporary buffer */ if(y+15 >= height){ @@ -3449,58 +3449,69 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ int endx = FFMIN(width, x+32); uint8_t *dstBlockStart = dstBlock; const uint8_t *srcBlockStart = srcBlock; - for(; x < endx; x+=BLOCK_SIZE){ - prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32); - prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32); - prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32); - prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32); - - RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride, - srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset); + int qp_index = 0; + for(qp_index=0; qp_index < 4; qp_index+=1){ + QP = QPptr[(x+qp_index*8)>>qpHShift]; + nonBQP = nonBQPptr[(x+qp_index*8)>>qpHShift]; + if(!isColor){ + QP= (QP* QPCorrecture + 256*128)>>16; + nonBQP= (nonBQP* QPCorrecture + 256*128)>>16; + yHistogram[srcBlock[srcStride*12 + 4]]++; + } + c.QP_block[qp_index]= QP; + c.nonBQP_block[qp_index]= nonBQP; +#if TEMPLATE_PP_MMX + __asm__ volatile( + "movd %1, %%mm7 \n\t" + "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP + "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP + "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP + "movq %%mm7, %0 \n\t" + : "=m" (c.pQPb_block[qp_index]) + : "r" (QP) + ); +#endif + } + for(; x < endx; x+=BLOCK_SIZE){ + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32); + prefetchnta(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32); + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead)*dstStride + 32); + prefetcht0(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32); + + RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride, + srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset); + + if(mode & LINEAR_IPOL_DEINT_FILTER) + RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride); + else if(mode & LINEAR_BLEND_DEINT_FILTER) + RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x); + else if(mode & MEDIAN_DEINT_FILTER) + RENAME(deInterlaceMedian)(dstBlock, dstStride); + else if(mode & CUBIC_IPOL_DEINT_FILTER) + RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); + else if(mode & FFMPEG_DEINT_FILTER) + RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); + else if(mode & LOWPASS5_DEINT_FILTER) + RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x); + /* else if(mode & CUBIC_BLEND_DEINT_FILTER) + RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); + */ + dstBlock+=8; + srcBlock+=8; + } - if(mode & LINEAR_IPOL_DEINT_FILTER) - RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride); - else if(mode & LINEAR_BLEND_DEINT_FILTER) - RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x); - else if(mode & MEDIAN_DEINT_FILTER) - RENAME(deInterlaceMedian)(dstBlock, dstStride); - else if(mode & CUBIC_IPOL_DEINT_FILTER) - RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); - else if(mode & FFMPEG_DEINT_FILTER) - RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); - else if(mode & LOWPASS5_DEINT_FILTER) - RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x); -/* else if(mode & CUBIC_BLEND_DEINT_FILTER) - RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); -*/ - dstBlock+=8; - srcBlock+=8; - } + qp_index = 0; + dstBlock = dstBlockStart; + srcBlock = srcBlockStart; - dstBlock = dstBlockStart; - srcBlock = srcBlockStart; + for(x = startx; x < endx; x+=BLOCK_SIZE){ + const int stride= dstStride; + //temporary while changing QP stuff to make things continue to work + c.QP = c.QP_block[qp_index]; + c.nonBQP = c.nonBQP_block[qp_index]; + c.pQPb = c.pQPb_block[qp_index]; + c.pQPb2 = c.pQPb2_block[qp_index++]; - for(x = startx; x < endx; x+=BLOCK_SIZE){ - const int stride= dstStride; - QP = QPptr[x>>qpHShift]; - c.nonBQP = nonBQPptr[x>>qpHShift]; - if(!isColor){ - QP= (QP* QPCorrecture + 256*128)>>16; - c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16; - yHistogram[srcBlock[srcStride*12 + 4]]++; - } - c.QP= QP; -#if TEMPLATE_PP_MMX - __asm__ volatile( - "movd %1, %%mm7 \n\t" - "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP - "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP - "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP - "movq %%mm7, %0 \n\t" - : "=m" (c.pQPb) - : "r" (QP) - ); -#endif /* only deblock if we have 2 blocks */ if(y + 8 < height){ if(mode & V_X1_FILTER) @@ -3521,6 +3532,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ srcBlock+=8; } + qp_index = 0; dstBlock = dstBlockStart; srcBlock = srcBlockStart; @@ -3528,26 +3540,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ const int stride= dstStride; av_unused uint8_t *tmpXchg; - if(isColor){ - QP= QPptr[x>>qpHShift]; - c.nonBQP= nonBQPptr[x>>qpHShift]; - }else{ - QP= QPptr[x>>4]; - QP= (QP* QPCorrecture + 256*128)>>16; - c.nonBQP= nonBQPptr[x>>4]; - c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16; - } - c.QP= QP; + + c.QP = c.QP_block[qp_index]; + c.nonBQP = c.nonBQP_block[qp_index]; + c.pQPb = c.pQPb_block[qp_index]; + c.pQPb2 = c.pQPb2_block[qp_index++]; #if TEMPLATE_PP_MMX - __asm__ volatile( - "movd %1, %%mm7 \n\t" - "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP - "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP - "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP - "movq %%mm7, %0 \n\t" - : "=m" (c.pQPb) - : "r" (QP) - ); RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); #endif /* check if we have a previous block to deblock it with dstBlock */ @@ -3569,7 +3567,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ #else if(mode & H_X1_FILTER) - horizX1Filter(dstBlock-4, stride, QP); + horizX1Filter(dstBlock-4, stride, c.QP); else if(mode & H_DEBLOCK){ #if TEMPLATE_PP_ALTIVEC DECLARE_ALIGNED(16, unsigned char, tempBlock)[272]; -- 2.3.3 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel