Hi, 2015-02-06 17:54 GMT+01:00 James Almer <jamr...@gmail.com>: > pb_eo must be handled as a rip relative address for MSVC64, so an > intermediate register is needed. Should fix link failures.
Seems ok on principle, passes fate on mingw64. I'm always wary of those ABI, so if anyone could verify for msvc64 and unix64 (in addition to yourself), that would be nice. > +%if ARCH_X86_64 > +cglobal hevc_sao_edge_filter_%1_8, 4, 9, 8, dst, src, dststride, offset, eo, > a_stride, b_stride, height, tmp > +%define tmp2q heightq > %if WIN64 > -cglobal hevc_sao_edge_filter_%1_8, 4, 8, 8, dst, src, dststride, offset, > a_stride, b_stride, height, tmp > -%define eoq heightq > movsxd eoq, dword r4m > - movsx a_strideq, byte [pb_eo+eoq*4+1] > - movsx b_strideq, byte [pb_eo+eoq*4+3] > - imul a_strideq, EDGE_SRCSTRIDE > - imul b_strideq, EDGE_SRCSTRIDE > - movsx tmpq, byte [pb_eo+eoq*4] > - add a_strideq, tmpq > - movsx tmpq, byte [pb_eo+eoq*4+2] > - add b_strideq, tmpq > - mov heightd, r6m > - > -%elif ARCH_X86_64 > -cglobal hevc_sao_edge_filter_%1_8, 5, 9, 8, dst, src, dststride, offset, eo, > a_stride, b_stride, height, tmp > -%define tmp2q heightq > +%else > movsxd eoq, eod > +%endif > lea tmp2q, [pb_eo] > movsx a_strideq, byte [tmp2q+eoq*4+1] > movsx b_strideq, byte [tmp2q+eoq*4+3] The new common loading block could almost be abstracted in a single macro (with the stride as parameter). Something like: %macro LOAD_EO_ARGS 1 %define tmp2q heightq %if WIN64 movsxd eoq, dword r4m %else movsxd eoq, eod %endif lea tmp2q, [pb_eo] movsx a_strideq, byte [tmp2q+eoq*4+1] movsx b_strideq, byte [tmp2q+eoq*4+3] imul a_strideq, %1 imul b_strideq, %1 movsx tmpq, byte [tmp2q+eoq*4] add a_strideq, tmpq movsx tmpq, byte [tmp2q+eoq*4+2] add b_strideq, tmpq mov heightd, r6m %endmacro (macro provided as example because it's probably clearer :D but not tested ) -- Christophe _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel