On 8/16/2020 11:09 AM, Paul B Mahol wrote: > On 8/16/20, Paul B Mahol <one...@gmail.com> wrote: >> Hi, >> >> patch attached. >> >> Please help porting this to linux and 64bit calling convention. >> > > New patch attached, could build on x64, please report any build failure.
[...] > diff --git a/libavcodec/x86/cfhddsp.asm b/libavcodec/x86/cfhddsp.asm > new file mode 100644 > index 0000000000..80371e65c9 > --- /dev/null > +++ b/libavcodec/x86/cfhddsp.asm > @@ -0,0 +1,626 @@ > +;****************************************************************************** > +;* x86-optimized functions for the CFHD decoder > +;* Copyright (c) 2020 Paul B Mahol > +;* > +;* This file is part of FFmpeg. > +;* > +;* FFmpeg is free software; you can redistribute it and/or > +;* modify it under the terms of the GNU Lesser General Public > +;* License as published by the Free Software Foundation; either > +;* version 2.1 of the License, or (at your option) any later version. > +;* > +;* FFmpeg is distributed in the hope that it will be useful, > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +;* Lesser General Public License for more details. > +;* > +;* You should have received a copy of the GNU Lesser General Public > +;* License along with FFmpeg; if not, write to the Free Software > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > +;****************************************************************************** > + > +%include "libavutil/x86/x86util.asm" > + > +SECTION_RODATA > + > +factor_p1_p1: dw 1, 1, 1, 1, 1, 1, 1, 1, > +factor_p1_n1: dw 1, -1, 1, -1, 1, -1, 1, -1, > +factor_n1_p1: dw -1, 1, -1, 1, -1, 1, -1, 1, > +factor_p11_n4: dw 11, -4, 11, -4, 11, -4, 11, -4, > +factor_p5_p4: dw 5, 4, 5, 4, 5, 4, 5, 4, > +pd_4: times 4 dd 4 > +pw_0: times 8 dw 0 > +pw_1023: times 8 dw 1023 > +pw_4095: times 8 dw 4095 > + > +SECTION .text > + > +%macro CFHD_HORIZ_FILTER 1 > +%if %1 == 1023 > +cglobal cfhd_horiz_filter_clip10, 5, 6, 8, output, low, high, width, bpc > + DEFINE_ARGS output, low, high, width, x, temp > + shl widthd, 1 > +%define ostrideq widthq > +%define lwidthq widthq > +%define hwidthq widthq > +%elif %1 == 4095 > +cglobal cfhd_horiz_filter_clip12, 5, 6, 8, output, low, high, width, bpc > + DEFINE_ARGS output, low, high, width, x, temp > + shl widthd, 1 > +%define ostrideq widthq > +%define lwidthq widthq > +%define hwidthq widthq > +%else > +%if ARCH_X86_64 > +cglobal cfhd_horiz_filter, 11, 11, 8, output, ostride, low, lwidth, high, > hwidth, width, height > +DEFINE_ARGS output, ostride, low, lwidth, high, hwidth, width, height, x, > y, temp > + shl ostrided, 1 > + shl lwidthd, 1 > + shl hwidthd, 1 > + shl widthd, 1 > + > + mov yq, heightq > + neg yq > +%else > +cglobal cfhd_horiz_filter, 6, 6, 8, 64, output, x, low, y, high, temp, > width, height > + shl xd, 1 > + shl yd, 1 > + shl tempd, 1 > + > + mov dword [rsp + 0], xq > + mov dword [rsp + 8], yq > + mov dword [rsp + 16], tempq These are four bytes on x86_32, not eight. Also, since all arguments come from stack, you can simply move them back doing mov xmp, xq mov ymp, yq mov tempmp, tempq %define ostrideq xm %define lwidthq ym %define hwidthq tempm Saving you the need to reserve space. > + > + mov yd, r6m Just load r6/width normally in cglobal, you can use up to seven regs on x86_32. > + shl yd, 1 > + mov dword [rsp + 24], yq > + > + mov yd, r7m > + neg yq > + > +%define ostrideq [rsp + 0] > +%define lwidthq [rsp + 8] > +%define hwidthq [rsp + 16] > +%define widthq [rsp + 24] If you're going to define widthq here like this, then you shouldn't define width in cglobal. But as i said above, you have a reg free to store it. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".