[FFmpeg-cvslog] avcodec: [loongarch] Optimize h264_deblock with LASX.

2021-12-15 Thread Jin Bo
ffmpeg | branch: master | Jin Bo  | Wed Dec 15 11:51:09 2021 
+0800| [1ccc45896096b39b9a0d5650618d384df30d15e4] | committer: Michael 
Niedermayer

avcodec: [loongarch] Optimize h264_deblock with LASX.

./ffmpeg -i ../1_h264_1080p_30fps_3Mbps.mp4 -f rawvideo -y /dev/null -an
before:293
after :295

Change-Id: I5ff6cba4eaca0c4218c0c97b880ca500e35f9c87
Signed-off-by: Hao Chen 
Reviewed-by: Shiyou Yin 
Reviewed-by: guxiwei 
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1ccc45896096b39b9a0d5650618d384df30d15e4
---

 libavcodec/loongarch/Makefile |   3 +-
 libavcodec/loongarch/h264_deblock_lasx.c  | 147 ++
 libavcodec/loongarch/h264dsp_init_loongarch.c |   2 +
 libavcodec/loongarch/h264dsp_lasx.h   |   6 ++
 4 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/libavcodec/loongarch/Makefile b/libavcodec/loongarch/Makefile
index 242a2be290..1e1fe3fd48 100644
--- a/libavcodec/loongarch/Makefile
+++ b/libavcodec/loongarch/Makefile
@@ -4,4 +4,5 @@ OBJS-$(CONFIG_H264DSP)+= 
loongarch/h264dsp_init_loongarch.o
 LASX-OBJS-$(CONFIG_H264CHROMA)+= loongarch/h264chroma_lasx.o
 LASX-OBJS-$(CONFIG_H264QPEL)  += loongarch/h264qpel_lasx.o
 LASX-OBJS-$(CONFIG_H264DSP)   += loongarch/h264dsp_lasx.o \
- loongarch/h264idct_lasx.o
+ loongarch/h264idct_lasx.o \
+ loongarch/h264_deblock_lasx.o
diff --git a/libavcodec/loongarch/h264_deblock_lasx.c 
b/libavcodec/loongarch/h264_deblock_lasx.c
new file mode 100644
index 00..c89bea9a84
--- /dev/null
+++ b/libavcodec/loongarch/h264_deblock_lasx.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ * Contributed by Xiwei Gu 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/bit_depth_template.c"
+#include "h264dsp_lasx.h"
+#include "libavutil/loongarch/loongson_intrinsics.h"
+
+#define H264_LOOP_FILTER_STRENGTH_ITERATION_LASX(edges, step, mask_mv, dir, \
+ d_idx, mask_dir)   \
+do {\
+int b_idx = 0; \
+int step_x4 = step << 2; \
+int d_idx_12 = d_idx + 12; \
+int d_idx_52 = d_idx + 52; \
+int d_idx_x4 = d_idx << 2; \
+int d_idx_x4_48 = d_idx_x4 + 48; \
+int dir_x32  = dir * 32; \
+uint8_t *ref_t = (uint8_t*)ref; \
+uint8_t *mv_t  = (uint8_t*)mv; \
+uint8_t *nnz_t = (uint8_t*)nnz; \
+uint8_t *bS_t  = (uint8_t*)bS; \
+mask_mv <<= 3; \
+for (; b_idx < edges; b_idx += step) { \
+out &= mask_dir; \
+if (!(mask_mv & b_idx)) { \
+if (bidir) { \
+ref2 = __lasx_xvldx(ref_t, d_idx_12); \
+ref3 = __lasx_xvldx(ref_t, d_idx_52); \
+ref0 = __lasx_xvld(ref_t, 12); \
+ref1 = __lasx_xvld(ref_t, 52); \
+ref2 = __lasx_xvilvl_w(ref3, ref2); \
+ref0 = __lasx_xvilvl_w(ref0, ref0); \
+ref1 = __lasx_xvilvl_w(ref1, ref1); \
+ref3 = __lasx_xvshuf4i_w(ref2, 0xB1); \
+ref0 = __lasx_xvsub_b(ref0, ref2); \
+ref1 = __lasx_xvsub_b(ref1, ref3); \
+ref0 = __lasx_xvor_v(ref0, ref1); \
+\
+tmp2 = __lasx_xvldx(mv_t, d_idx_x4_48);   \
+tmp3 = __lasx_xvld(mv_t, 48); \
+tmp4 = __lasx_xvld(mv_t, 208); \
+tmp5 = __lasx_xvld(mv_t + d_idx_x4, 208); \
+DUP2_ARG3(__lasx_xvpermi_q, tmp2, tmp2, 0x20, tmp5, tmp5, \
+  0x20, tmp2, tmp5); \
+tmp3 =  __lasx_xvpermi_q(tmp4, tmp3, 0x20); \
+tmp2 = __lasx_xvsub_h(tmp2, tmp3); \
+tmp5 = __lasx_xvsub_h(tmp5, tmp3); \
+DUP2_ARG2(__lasx_xvsat_h, tmp2, 7, tmp5, 7, tmp2, tmp5); \
+tmp0 = __lasx_xvpickev_b(tmp5, tmp2); \
+tmp0 = __lasx_xvpermi_d(tmp0, 0xd8); \
+tmp0 = __lasx_xvadd_b(tmp0, cn

[FFmpeg-cvslog] libavcodec/mips: Fix build errors reported by clang

2021-06-03 Thread Jin Bo
ffmpeg | branch: master | Jin Bo  | Tue Jun  1 14:22:09 2021 
+0800| [fd5fd48659a956b1b890c217d04f77f2bdab6a44] | committer: Michael 
Niedermayer

libavcodec/mips: Fix build errors reported by clang

Clang is more strict on the type of asm operands, float or double
type variable should use constraint 'f', integer variable should
use constraint 'r'.

Signed-off-by: Jin Bo 
Reviewed-by: yinshiyou...@loongson.cn
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fd5fd48659a956b1b890c217d04f77f2bdab6a44
---

 libavcodec/mips/constants.c  |  89 +++--
 libavcodec/mips/constants.h  |  88 +++--
 libavcodec/mips/h264chroma_mmi.c | 157 +++
 libavcodec/mips/h264dsp_mmi.c|  20 +--
 libavcodec/mips/h264pred_mmi.c   |  23 ++--
 libavcodec/mips/h264qpel_mmi.c   |  34 ++---
 libavcodec/mips/hevcdsp_mmi.c|  59 +
 libavcodec/mips/idctdsp_mmi.c|   2 +-
 libavcodec/mips/mpegvideo_mmi.c  |  20 +--
 libavcodec/mips/vc1dsp_mmi.c | 176 +-
 libavcodec/mips/vp8dsp_mmi.c | 263 +--
 libavutil/mips/asmdefs.h |   8 ++
 12 files changed, 536 insertions(+), 403 deletions(-)

diff --git a/libavcodec/mips/constants.c b/libavcodec/mips/constants.c
index 8c990b6119..6a60dd3451 100644
--- a/libavcodec/mips/constants.c
+++ b/libavcodec/mips/constants.c
@@ -19,50 +19,49 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "config.h"
-#include "libavutil/mem_internal.h"
+#include "libavutil/intfloat.h"
 #include "constants.h"
 
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_1) =   {0x0001000100010001ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_2) =   {0x0002000200020002ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_3) =   {0x0003000300030003ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_4) =   {0x0004000400040004ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_5) =   {0x0005000500050005ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_6) =   {0x0006000600060006ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_8) =   {0x0008000800080008ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_9) =   {0x0009000900090009ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_10) =  {0x000A000A000A000AULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_12) =  {0x000C000C000C000CULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) =  {0x000F000F000F000FULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_16) =  {0x0010001000100010ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_17) =  {0x0011001100110011ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_18) =  {0x0012001200120012ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_20) =  {0x0014001400140014ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_22) =  {0x0016001600160016ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_28) =  {0x001C001C001C001CULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_32) =  {0x0020002000200020ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) =  {0x0035003500350035ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_64) =  {0x0040004000400040ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = {0x0080008000800080ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_512) = {0x0200020002000200ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_m8tom5) =  {0xFFFBFFFAFFF9FFF8ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_m4tom1) =  {0xFFFEFFFDFFFCULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) ={0x0004000300020001ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) ={0x0008000700060005ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) ={0x000300020001ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) ={0x0007000600050004ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_8tob) ={0x000b000a00090008ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pw_ctof) ={0x000f000e000d000cULL};
-
-DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) =   {0x0101010101010101ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) =   {0x0303030303030303ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pb_80) =  {0x8080808080808080ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) =  {0xA1A1A1A1A1A1A1A1ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_pb_FE) =  {0xFEFEFEFEFEFEFEFEULL};
-
-DECLARE_ALIGNED(8, const uint64_t, ff_rnd) ={0x0004000400040004ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) =   {0x0040004000400040ULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) =   {0x0020002000200020ULL};
-
-DECLARE_ALIGNED(8, const uint64_t, ff_wm1010) = {0xULL};
-DECLARE_ALIGNED(8, const uint64_t, ff_d4) = {0x0004ULL};
+const union av_intfloat64 ff_pw_1 =  {0x0001000100010001ULL};
+const union av_intfloat64 ff_pw_2 =  {0x0002000200020002ULL};
+const union av_

[FFmpeg-cvslog] libavcodec/mips: Fix fate errors reported by clang

2021-06-03 Thread Jin Bo
ffmpeg | branch: master | Jin Bo  | Fri May 28 10:04:41 2021 
+0800| [2fac1e370c9cf48b2e9d4a7f2c0d7236017f9bbe] | committer: Michael 
Niedermayer

libavcodec/mips: Fix fate errors reported by clang

The data width of gsldrc1/gsldlc1 should be 8 bytes wide.

Signed-off-by: Jin Bo 
Reviewed-by: yinshiyou...@loongson.cn
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2fac1e370c9cf48b2e9d4a7f2c0d7236017f9bbe
---

 libavcodec/mips/vp9_mc_mmi.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/mips/vp9_mc_mmi.c b/libavcodec/mips/vp9_mc_mmi.c
index fa65ff5116..812f7a6994 100644
--- a/libavcodec/mips/vp9_mc_mmi.c
+++ b/libavcodec/mips/vp9_mc_mmi.c
@@ -83,9 +83,9 @@ static void convolve_horiz_mmi(const uint8_t *src, int32_t 
src_stride,
 __asm__ volatile (
 "move   %[tmp1],%[width]   \n\t"
 "pxor   %[ftmp0],   %[ftmp0],%[ftmp0]  \n\t"
-"gsldlc1%[filter1], 0x03(%[filter])\n\t"
+"gsldlc1%[filter1], 0x07(%[filter])\n\t"
 "gsldrc1%[filter1], 0x00(%[filter])\n\t"
-"gsldlc1%[filter2], 0x0b(%[filter])\n\t"
+"gsldlc1%[filter2], 0x0f(%[filter])\n\t"
 "gsldrc1%[filter2], 0x08(%[filter])\n\t"
 "li %[tmp0],0x07   \n\t"
 "dmtc1  %[tmp0],%[ftmp13]  \n\t"
@@ -158,9 +158,9 @@ static void convolve_vert_mmi(const uint8_t *src, int32_t 
src_stride,
 
 __asm__ volatile (
 "pxor   %[ftmp0],%[ftmp0],   %[ftmp0]  \n\t"
-"gsldlc1%[ftmp4],0x03(%[filter])   \n\t"
+"gsldlc1%[ftmp4],0x07(%[filter])   \n\t"
 "gsldrc1%[ftmp4],0x00(%[filter])   \n\t"
-"gsldlc1%[ftmp5],0x0b(%[filter])   \n\t"
+"gsldlc1%[ftmp5],0x0f(%[filter])   \n\t"
 "gsldrc1%[ftmp5],0x08(%[filter])   \n\t"
 "punpcklwd  %[filter10], %[ftmp4],   %[ftmp4]  \n\t"
 "punpckhwd  %[filter32], %[ftmp4],   %[ftmp4]  \n\t"
@@ -254,9 +254,9 @@ static void convolve_avg_horiz_mmi(const uint8_t *src, 
int32_t src_stride,
 __asm__ volatile (
 "move   %[tmp1],%[width]   \n\t"
 "pxor   %[ftmp0],   %[ftmp0],%[ftmp0]  \n\t"
-"gsldlc1%[filter1], 0x03(%[filter])\n\t"
+"gsldlc1%[filter1], 0x07(%[filter])\n\t"
 "gsldrc1%[filter1], 0x00(%[filter])\n\t"
-"gsldlc1%[filter2], 0x0b(%[filter])\n\t"
+"gsldlc1%[filter2], 0x0f(%[filter])\n\t"
 "gsldrc1%[filter2], 0x08(%[filter])\n\t"
 "li %[tmp0],0x07   \n\t"
 "dmtc1  %[tmp0],%[ftmp13]  \n\t"
@@ -340,9 +340,9 @@ static void convolve_avg_vert_mmi(const uint8_t *src, 
int32_t src_stride,
 
 __asm__ volatile (
 "pxor   %[ftmp0],%[ftmp0],   %[ftmp0]  \n\t"
-"gsldlc1%[ftmp4],0x03(%[filter])   \n\t"
+"gsldlc1%[ftmp4],0x07(%[filter])   \n\t"
 "gsldrc1%[ftmp4],0x00(%[filter])   \n\t"
-"gsldlc1%[ftmp5],0x0b(%[filter])   \n\t"
+"gsldlc1%[ftmp5],0x0f(%[filter])   \n\t"
 "gsldrc1%[ftmp5],0x08(%[filter])   \n\t"
 "punpcklwd  %[filter10], %[ftmp4],   %[ftmp4]  \n\t"
 "punpckhwd  %[filter32], %[ftmp4],   %[ftmp4]  \n\t"

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] configure: [loongson] adjust MMI check in configure

2021-08-10 Thread Jin Bo
ffmpeg | branch: master | Jin Bo  | Tue Aug  3 12:05:21 2021 
+0800| [903c5d58f0311b12bd8127a545f1bf8549307f5c] | committer: Michael 
Niedermayer

configure: [loongson] adjust MMI check in configure

After standardizing the use of 'pxor' in commit 'ebedd26', FFmpeg
build failed with upstream compiler, for 'pxor' is not supported
in time. This patch helps to workaround the build failure by
checking whether 'pxor' is supported during configuration, if not,
MMI will be disabled.

Reviewed-by: yinshiyou...@loongson.cn
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=903c5d58f0311b12bd8127a545f1bf8549307f5c
---

 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index f9fdf58bc3..82639ce057 100755
--- a/configure
+++ b/configure
@@ -5947,7 +5947,7 @@ elif enabled mips; then
 enabled loongson3 && check_inline_asm loongson3 '"gsldxc1 $f0, 0($2, $3)"' 
'-mloongson-ext' && append MMIFLAGS '-mloongson-ext'
 
 # MMI can be detected at runtime too
-enabled mmi && check_inline_asm mmi '"punpcklhw $f0, $f0, $f0"' 
'-mloongson-mmi' && append MMIFLAGS '-mloongson-mmi'
+enabled mmi && check_inline_asm mmi '"pxor $f0, $f0, $f0"' 
'-mloongson-mmi' && append MMIFLAGS '-mloongson-mmi'
 
 if enabled bigendian && enabled msa; then
 disable msa

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".