On 2/27/2021 11:05 AM, Paul B Mahol wrote:
Signed-off-by: Paul B Mahol <one...@gmail.com>
---
  libavcodec/cfhdencdsp.c          |   3 +
  libavcodec/x86/Makefile          |   2 +
  libavcodec/x86/cfhdencdsp.asm    | 435 +++++++++++++++++++++++++++++++
  libavcodec/x86/cfhdencdsp_init.c |  48 ++++
  4 files changed, 488 insertions(+)
  create mode 100644 libavcodec/x86/cfhdencdsp.asm
  create mode 100644 libavcodec/x86/cfhdencdsp_init.c

diff --git a/libavcodec/cfhdencdsp.c b/libavcodec/cfhdencdsp.c
index 0becb76d1d..b979e9e09a 100644
--- a/libavcodec/cfhdencdsp.c
+++ b/libavcodec/cfhdencdsp.c
@@ -73,4 +73,7 @@ av_cold void ff_cfhdencdsp_init(CFHDEncDSPContext *c)
  {
      c->horiz_filter = horiz_filter;
      c->vert_filter = vert_filter;
+
+    if (ARCH_X86)
+        ff_cfhdencdsp_init_x86(c);
  }
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 884dc0c759..6361161180 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -51,6 +51,7 @@ OBJS-$(CONFIG_ALAC_DECODER)            += x86/alacdsp_init.o
  OBJS-$(CONFIG_APNG_DECODER)            += x86/pngdsp_init.o
  OBJS-$(CONFIG_CAVS_DECODER)            += x86/cavsdsp.o
  OBJS-$(CONFIG_CFHD_DECODER)            += x86/cfhddsp_init.o
+OBJS-$(CONFIG_CFHD_ENCODER)            += x86/cfhdencdsp_init.o
  OBJS-$(CONFIG_DCA_DECODER)             += x86/dcadsp_init.o 
x86/synth_filter_init.o
  OBJS-$(CONFIG_DNXHD_ENCODER)           += x86/dnxhdenc_init.o
  OBJS-$(CONFIG_EXR_DECODER)             += x86/exrdsp_init.o
@@ -154,6 +155,7 @@ X86ASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp.o
  X86ASM-OBJS-$(CONFIG_ALAC_DECODER)     += x86/alacdsp.o
  X86ASM-OBJS-$(CONFIG_APNG_DECODER)     += x86/pngdsp.o
  X86ASM-OBJS-$(CONFIG_CAVS_DECODER)     += x86/cavsidct.o
+X86ASM-OBJS-$(CONFIG_CFHD_ENCODER)     += x86/cfhdencdsp.o
  X86ASM-OBJS-$(CONFIG_CFHD_DECODER)     += x86/cfhddsp.o
  X86ASM-OBJS-$(CONFIG_DCA_DECODER)      += x86/dcadsp.o x86/synth_filter.o
  X86ASM-OBJS-$(CONFIG_DIRAC_DECODER)    += x86/diracdsp.o                \
diff --git a/libavcodec/x86/cfhdencdsp.asm b/libavcodec/x86/cfhdencdsp.asm
new file mode 100644
index 0000000000..b0b094aa71
--- /dev/null
+++ b/libavcodec/x86/cfhdencdsp.asm
@@ -0,0 +1,435 @@
+;******************************************************************************
+;* x86-optimized functions for the CFHD encoder
+;* Copyright (c) 2021 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pw_p1_n1:  dw  1, -1, 1, -1, 1, -1, 1, -1
+pw_n1_p1:  dw  -1, 1, -1, 1, -1, 1, -1, 1
+pw_p5_n11: dw  5, -11, 5, -11, 5, -11, 5, -11
+pw_n5_p11: dw -5, 11, -5, 11, -5, 11, -5, 11
+pw_p11_n5: dw 11, -5, 11, -5, 11, -5, 11, -5
+pw_n11_p5: dw -11, 5, -11, 5, -11, 5, -11, 5
+pd_4:  times 4 dd  4
+pw_n4: times 8 dw -4
+cextern pw_m1
+cextern pw_1
+cextern pw_4
+
+SECTION .text

[...]

+
+%if ARCH_X86_64
+INIT_XMM sse2
+cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, 
hwidth, width, height, x, y, pos
+    movsxdifnidn  widthq, widthd
+    movsxdifnidn heightq, heightd

Why did you add this? The shl and sub below using a d suffix like in the previous version is enough to clear the upper bits.

+
+    shl  istrideq, 1
+
+    shl    widthq, 1
+    sub   heightq, 2

Should be ok if tested and bitexact.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to