The second stride is always the internal buffer one, MAX_PB_SIZE (times 2 to get the value in bytes). --- libavcodec/x86/hevc_mc.asm | 30 +++++++++++++++--------------- libavcodec/x86/hevcdsp.h | 4 ++-- libavcodec/x86/hevcdsp_init.c | 16 ++++++++-------- 3 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm index 07b0f77..4776eed 100644 --- a/libavcodec/x86/hevc_mc.asm +++ b/libavcodec/x86/hevc_mc.asm @@ -1416,11 +1416,11 @@ cglobal hevc_put_hevc_bi_qpel_hv%1_%2, 8, 10, 16, dst, dststride, src, srcstride %macro WEIGHTING_FUNCS 2 %if WIN64 || ARCH_X86_32 -cglobal hevc_put_hevc_uni_w%1_%2, 4, 5, 7, dst, dststride, src, srcstride, height, denom, wx, ox +cglobal hevc_put_hevc_uni_w%1_%2, 4, 5, 7, dst, dststride, src, height, denom, wx, ox mov r4d, denomm %define SHIFT r4d %else -cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, height, denom, wx, ox +cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, height, denom, wx, ox %define SHIFT denomd %endif lea SHIFT, [SHIFT+14-%2] ; shift = 14 - bitd + denom @@ -1481,15 +1481,15 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh jnz .loop ; height loop RET -cglobal hevc_put_hevc_bi_w%1_%2, 5, 7, 10, dst, dststride, src, srcstride, src2, height, denom, wx0, wx1, ox0, ox1 - mov r6d, denomm +cglobal hevc_put_hevc_bi_w%1_%2, 4, 5, 10, dst, dststride, src, src2, height, denom, wx0, wx1, ox0, ox1 + mov r5d, denomm %if %1 <= 4 pxor m1, m1 %endif movd m2, wx0m ; WX0 - lea r6d, [r6d+14-%2] ; shift = 14 - bitd + denom + lea r5d, [r5d+14-%2] ; shift = 14 - bitd + denom movd m3, wx1m ; WX1 - movd m0, r6d ; shift + movd m0, r5d ; shift %if %1 <= 4 punpcklwd m2, m1 punpcklwd m3, m1 @@ -1497,19 +1497,19 @@ cglobal hevc_put_hevc_bi_w%1_%2, 5, 7, 10, dst, dststride, src, srcstride, src2, punpcklwd m2, m2 punpcklwd m3, m3 %endif - inc r6d - movd m5, r6d ; shift+1 + inc r5d + movd m5, r5d ; shift+1 pshufd m2, m2, 0 - mov r6d, ox0m + mov r5d, ox0m pshufd m3, m3, 0 - add r6d, ox1m + add r5d, ox1m %if %2 != 8 - shl r6d, %2-8 ; ox << (bitd - 8) + shl r5d, %2-8 ; ox << (bitd - 8) %endif - inc r6d - movd m4, r6d ; offset + inc r5d + movd m4, r5d ; offset pshufd m4, m4, 0 - mov r6d, heightm + mov r4d, heightm pslld m4, m0 .loop @@ -1549,7 +1549,7 @@ cglobal hevc_put_hevc_bi_w%1_%2, 5, 7, 10, dst, dststride, src, srcstride, src2, add dstq, dststrideq ; dst += dststride add srcq, 2*MAX_PB_SIZE ; src += srcstride add src2q, 2*MAX_PB_SIZE ; src2 += srcstride - dec r6d ; cmp height + dec r4d ; cmp height jnz .loop ; height loop RET %endmacro diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h index 7864163..ad8168f 100644 --- a/libavcodec/x86/hevcdsp.h +++ b/libavcodec/x86/hevcdsp.h @@ -74,8 +74,8 @@ void ff_hevc_put_hevc_bi_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t PEL_PROTOTYPE(fname##64, bitd, opt) #define WEIGHTING_PROTOTYPE(width, bitd, opt) \ -void ff_hevc_put_hevc_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, int height, int denom, int _wx, int _ox); \ -void ff_hevc_put_hevc_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, int16_t *_src2, int height, int denom, int _wx0, int _wx1, int _ox0, int _ox1) +void ff_hevc_put_hevc_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *_src, int height, int denom, int _wx, int _ox); \ +void ff_hevc_put_hevc_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *_src, int16_t *_src2, int height, int denom, int _wx0, int _wx1, int _ox0, int _ox1) #define WEIGHTING_PROTOTYPES(bitd, opt) \ WEIGHTING_PROTOTYPE(2, bitd, opt); \ diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index f7b3d0f..e493033 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -427,7 +427,7 @@ mc_rep_funcs(qpel_hv,12, 8, 16, sse4); mc_rep_funcs(qpel_hv,12, 4, 12, sse4); #define mc_rep_uni_w(bitd, step, W, opt) \ -void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride,\ +void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \ int height, int denom, int _wx, int _ox) \ { \ int i; \ @@ -436,7 +436,7 @@ void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststri for (i = 0; i < W; i += step) { \ src= _src + i; \ dst= _dst + (i * ((bitd + 7) / 8)); \ - ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \ + ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, \ height, denom, _wx, _ox); \ } \ } @@ -463,7 +463,7 @@ mc_rep_uni_w(12, 8, 48, sse4); mc_rep_uni_w(12, 8, 64, sse4); #define mc_rep_bi_w(bitd, step, W, opt) \ -void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \ +void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \ int16_t *_src2, int height, \ int denom, int _wx0, int _wx1, int _ox0, int _ox1) \ { \ @@ -475,8 +475,8 @@ void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststrid src = _src + i; \ src2 = _src2 + i; \ dst = _dst + (i * ((bitd + 7) / 8)); \ - ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \ - height, denom, _wx0, _wx1, _ox0, _ox1); \ + ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \ + height, denom, _wx0, _wx1, _ox0, _ox1); \ } \ } @@ -510,7 +510,7 @@ void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t { \ LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ - ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, MAX_PB_SIZE, height, denom, _wx, _ox);\ + ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\ } #define mc_uni_w_funcs(name, bitd, opt) \ @@ -569,8 +569,8 @@ void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _ { \ LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ - ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, MAX_PB_SIZE, _src2, \ - height, denom, _wx0, _wx1, _ox0, _ox1); \ + ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \ + height, denom, _wx0, _wx1, _ox0, _ox1); \ } #define mc_bi_w_funcs(name, bitd, opt) \ -- 1.9.2.msysgit.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel