[FFmpeg-cvslog] avcodec: [loongarch] Optimize h264_deblock with LASX.
ffmpeg | branch: master | Jin Bo | Wed Dec 15 11:51:09 2021 +0800| [1ccc45896096b39b9a0d5650618d384df30d15e4] | committer: Michael Niedermayer avcodec: [loongarch] Optimize h264_deblock with LASX. ./ffmpeg -i ../1_h264_1080p_30fps_3Mbps.mp4 -f rawvideo -y /dev/null -an before:293 after :295 Change-Id: I5ff6cba4eaca0c4218c0c97b880ca500e35f9c87 Signed-off-by: Hao Chen Reviewed-by: Shiyou Yin Reviewed-by: guxiwei Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1ccc45896096b39b9a0d5650618d384df30d15e4 --- libavcodec/loongarch/Makefile | 3 +- libavcodec/loongarch/h264_deblock_lasx.c | 147 ++ libavcodec/loongarch/h264dsp_init_loongarch.c | 2 + libavcodec/loongarch/h264dsp_lasx.h | 6 ++ 4 files changed, 157 insertions(+), 1 deletion(-) diff --git a/libavcodec/loongarch/Makefile b/libavcodec/loongarch/Makefile index 242a2be290..1e1fe3fd48 100644 --- a/libavcodec/loongarch/Makefile +++ b/libavcodec/loongarch/Makefile @@ -4,4 +4,5 @@ OBJS-$(CONFIG_H264DSP)+= loongarch/h264dsp_init_loongarch.o LASX-OBJS-$(CONFIG_H264CHROMA)+= loongarch/h264chroma_lasx.o LASX-OBJS-$(CONFIG_H264QPEL) += loongarch/h264qpel_lasx.o LASX-OBJS-$(CONFIG_H264DSP) += loongarch/h264dsp_lasx.o \ - loongarch/h264idct_lasx.o + loongarch/h264idct_lasx.o \ + loongarch/h264_deblock_lasx.o diff --git a/libavcodec/loongarch/h264_deblock_lasx.c b/libavcodec/loongarch/h264_deblock_lasx.c new file mode 100644 index 00..c89bea9a84 --- /dev/null +++ b/libavcodec/loongarch/h264_deblock_lasx.c @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + * Contributed by Xiwei Gu + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/bit_depth_template.c" +#include "h264dsp_lasx.h" +#include "libavutil/loongarch/loongson_intrinsics.h" + +#define H264_LOOP_FILTER_STRENGTH_ITERATION_LASX(edges, step, mask_mv, dir, \ + d_idx, mask_dir) \ +do {\ +int b_idx = 0; \ +int step_x4 = step << 2; \ +int d_idx_12 = d_idx + 12; \ +int d_idx_52 = d_idx + 52; \ +int d_idx_x4 = d_idx << 2; \ +int d_idx_x4_48 = d_idx_x4 + 48; \ +int dir_x32 = dir * 32; \ +uint8_t *ref_t = (uint8_t*)ref; \ +uint8_t *mv_t = (uint8_t*)mv; \ +uint8_t *nnz_t = (uint8_t*)nnz; \ +uint8_t *bS_t = (uint8_t*)bS; \ +mask_mv <<= 3; \ +for (; b_idx < edges; b_idx += step) { \ +out &= mask_dir; \ +if (!(mask_mv & b_idx)) { \ +if (bidir) { \ +ref2 = __lasx_xvldx(ref_t, d_idx_12); \ +ref3 = __lasx_xvldx(ref_t, d_idx_52); \ +ref0 = __lasx_xvld(ref_t, 12); \ +ref1 = __lasx_xvld(ref_t, 52); \ +ref2 = __lasx_xvilvl_w(ref3, ref2); \ +ref0 = __lasx_xvilvl_w(ref0, ref0); \ +ref1 = __lasx_xvilvl_w(ref1, ref1); \ +ref3 = __lasx_xvshuf4i_w(ref2, 0xB1); \ +ref0 = __lasx_xvsub_b(ref0, ref2); \ +ref1 = __lasx_xvsub_b(ref1, ref3); \ +ref0 = __lasx_xvor_v(ref0, ref1); \ +\ +tmp2 = __lasx_xvldx(mv_t, d_idx_x4_48); \ +tmp3 = __lasx_xvld(mv_t, 48); \ +tmp4 = __lasx_xvld(mv_t, 208); \ +tmp5 = __lasx_xvld(mv_t + d_idx_x4, 208); \ +DUP2_ARG3(__lasx_xvpermi_q, tmp2, tmp2, 0x20, tmp5, tmp5, \ + 0x20, tmp2, tmp5); \ +tmp3 = __lasx_xvpermi_q(tmp4, tmp3, 0x20); \ +tmp2 = __lasx_xvsub_h(tmp2, tmp3); \ +tmp5 = __lasx_xvsub_h(tmp5, tmp3); \ +DUP2_ARG2(__lasx_xvsat_h, tmp2, 7, tmp5, 7, tmp2, tmp5); \ +tmp0 = __lasx_xvpickev_b(tmp5, tmp2); \ +tmp0 = __lasx_xvpermi_d(tmp0, 0xd8); \ +tmp0 = __lasx_xvadd_b(tmp0, cn
[FFmpeg-cvslog] libavcodec/mips: Fix build errors reported by clang
ffmpeg | branch: master | Jin Bo | Tue Jun 1 14:22:09 2021 +0800| [fd5fd48659a956b1b890c217d04f77f2bdab6a44] | committer: Michael Niedermayer libavcodec/mips: Fix build errors reported by clang Clang is more strict on the type of asm operands, float or double type variable should use constraint 'f', integer variable should use constraint 'r'. Signed-off-by: Jin Bo Reviewed-by: yinshiyou...@loongson.cn Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fd5fd48659a956b1b890c217d04f77f2bdab6a44 --- libavcodec/mips/constants.c | 89 +++-- libavcodec/mips/constants.h | 88 +++-- libavcodec/mips/h264chroma_mmi.c | 157 +++ libavcodec/mips/h264dsp_mmi.c| 20 +-- libavcodec/mips/h264pred_mmi.c | 23 ++-- libavcodec/mips/h264qpel_mmi.c | 34 ++--- libavcodec/mips/hevcdsp_mmi.c| 59 + libavcodec/mips/idctdsp_mmi.c| 2 +- libavcodec/mips/mpegvideo_mmi.c | 20 +-- libavcodec/mips/vc1dsp_mmi.c | 176 +- libavcodec/mips/vp8dsp_mmi.c | 263 +-- libavutil/mips/asmdefs.h | 8 ++ 12 files changed, 536 insertions(+), 403 deletions(-) diff --git a/libavcodec/mips/constants.c b/libavcodec/mips/constants.c index 8c990b6119..6a60dd3451 100644 --- a/libavcodec/mips/constants.c +++ b/libavcodec/mips/constants.c @@ -19,50 +19,49 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "config.h" -#include "libavutil/mem_internal.h" +#include "libavutil/intfloat.h" #include "constants.h" -DECLARE_ALIGNED(8, const uint64_t, ff_pw_1) = {0x0001000100010001ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_2) = {0x0002000200020002ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_3) = {0x0003000300030003ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_4) = {0x0004000400040004ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_5) = {0x0005000500050005ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_6) = {0x0006000600060006ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_8) = {0x0008000800080008ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_9) = {0x0009000900090009ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_10) = {0x000A000A000A000AULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_12) = {0x000C000C000C000CULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = {0x000F000F000F000FULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_16) = {0x0010001000100010ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_17) = {0x0011001100110011ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_18) = {0x0012001200120012ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_20) = {0x0014001400140014ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_22) = {0x0016001600160016ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_28) = {0x001C001C001C001CULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_32) = {0x0020002000200020ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) = {0x0035003500350035ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_64) = {0x0040004000400040ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = {0x0080008000800080ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_512) = {0x0200020002000200ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_m8tom5) = {0xFFFBFFFAFFF9FFF8ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_m4tom1) = {0xFFFEFFFDFFFCULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) ={0x0004000300020001ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) ={0x0008000700060005ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) ={0x000300020001ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) ={0x0007000600050004ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_8tob) ={0x000b000a00090008ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_ctof) ={0x000f000e000d000cULL}; - -DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) = {0x0101010101010101ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) = {0x0303030303030303ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pb_80) = {0x8080808080808080ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) = {0xA1A1A1A1A1A1A1A1ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_pb_FE) = {0xFEFEFEFEFEFEFEFEULL}; - -DECLARE_ALIGNED(8, const uint64_t, ff_rnd) ={0x0004000400040004ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) = {0x0040004000400040ULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) = {0x0020002000200020ULL}; - -DECLARE_ALIGNED(8, const uint64_t, ff_wm1010) = {0xULL}; -DECLARE_ALIGNED(8, const uint64_t, ff_d4) = {0x0004ULL}; +const union av_intfloat64 ff_pw_1 = {0x0001000100010001ULL}; +const union av_intfloat64 ff_pw_2 = {0x0002000200020002ULL}; +const union av_
[FFmpeg-cvslog] libavcodec/mips: Fix fate errors reported by clang
ffmpeg | branch: master | Jin Bo | Fri May 28 10:04:41 2021 +0800| [2fac1e370c9cf48b2e9d4a7f2c0d7236017f9bbe] | committer: Michael Niedermayer libavcodec/mips: Fix fate errors reported by clang The data width of gsldrc1/gsldlc1 should be 8 bytes wide. Signed-off-by: Jin Bo Reviewed-by: yinshiyou...@loongson.cn Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2fac1e370c9cf48b2e9d4a7f2c0d7236017f9bbe --- libavcodec/mips/vp9_mc_mmi.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libavcodec/mips/vp9_mc_mmi.c b/libavcodec/mips/vp9_mc_mmi.c index fa65ff5116..812f7a6994 100644 --- a/libavcodec/mips/vp9_mc_mmi.c +++ b/libavcodec/mips/vp9_mc_mmi.c @@ -83,9 +83,9 @@ static void convolve_horiz_mmi(const uint8_t *src, int32_t src_stride, __asm__ volatile ( "move %[tmp1],%[width] \n\t" "pxor %[ftmp0], %[ftmp0],%[ftmp0] \n\t" -"gsldlc1%[filter1], 0x03(%[filter])\n\t" +"gsldlc1%[filter1], 0x07(%[filter])\n\t" "gsldrc1%[filter1], 0x00(%[filter])\n\t" -"gsldlc1%[filter2], 0x0b(%[filter])\n\t" +"gsldlc1%[filter2], 0x0f(%[filter])\n\t" "gsldrc1%[filter2], 0x08(%[filter])\n\t" "li %[tmp0],0x07 \n\t" "dmtc1 %[tmp0],%[ftmp13] \n\t" @@ -158,9 +158,9 @@ static void convolve_vert_mmi(const uint8_t *src, int32_t src_stride, __asm__ volatile ( "pxor %[ftmp0],%[ftmp0], %[ftmp0] \n\t" -"gsldlc1%[ftmp4],0x03(%[filter]) \n\t" +"gsldlc1%[ftmp4],0x07(%[filter]) \n\t" "gsldrc1%[ftmp4],0x00(%[filter]) \n\t" -"gsldlc1%[ftmp5],0x0b(%[filter]) \n\t" +"gsldlc1%[ftmp5],0x0f(%[filter]) \n\t" "gsldrc1%[ftmp5],0x08(%[filter]) \n\t" "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t" "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t" @@ -254,9 +254,9 @@ static void convolve_avg_horiz_mmi(const uint8_t *src, int32_t src_stride, __asm__ volatile ( "move %[tmp1],%[width] \n\t" "pxor %[ftmp0], %[ftmp0],%[ftmp0] \n\t" -"gsldlc1%[filter1], 0x03(%[filter])\n\t" +"gsldlc1%[filter1], 0x07(%[filter])\n\t" "gsldrc1%[filter1], 0x00(%[filter])\n\t" -"gsldlc1%[filter2], 0x0b(%[filter])\n\t" +"gsldlc1%[filter2], 0x0f(%[filter])\n\t" "gsldrc1%[filter2], 0x08(%[filter])\n\t" "li %[tmp0],0x07 \n\t" "dmtc1 %[tmp0],%[ftmp13] \n\t" @@ -340,9 +340,9 @@ static void convolve_avg_vert_mmi(const uint8_t *src, int32_t src_stride, __asm__ volatile ( "pxor %[ftmp0],%[ftmp0], %[ftmp0] \n\t" -"gsldlc1%[ftmp4],0x03(%[filter]) \n\t" +"gsldlc1%[ftmp4],0x07(%[filter]) \n\t" "gsldrc1%[ftmp4],0x00(%[filter]) \n\t" -"gsldlc1%[ftmp5],0x0b(%[filter]) \n\t" +"gsldlc1%[ftmp5],0x0f(%[filter]) \n\t" "gsldrc1%[ftmp5],0x08(%[filter]) \n\t" "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t" "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t" ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-cvslog] configure: [loongson] adjust MMI check in configure
ffmpeg | branch: master | Jin Bo | Tue Aug 3 12:05:21 2021 +0800| [903c5d58f0311b12bd8127a545f1bf8549307f5c] | committer: Michael Niedermayer configure: [loongson] adjust MMI check in configure After standardizing the use of 'pxor' in commit 'ebedd26', FFmpeg build failed with upstream compiler, for 'pxor' is not supported in time. This patch helps to workaround the build failure by checking whether 'pxor' is supported during configuration, if not, MMI will be disabled. Reviewed-by: yinshiyou...@loongson.cn Signed-off-by: Michael Niedermayer > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=903c5d58f0311b12bd8127a545f1bf8549307f5c --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index f9fdf58bc3..82639ce057 100755 --- a/configure +++ b/configure @@ -5947,7 +5947,7 @@ elif enabled mips; then enabled loongson3 && check_inline_asm loongson3 '"gsldxc1 $f0, 0($2, $3)"' '-mloongson-ext' && append MMIFLAGS '-mloongson-ext' # MMI can be detected at runtime too -enabled mmi && check_inline_asm mmi '"punpcklhw $f0, $f0, $f0"' '-mloongson-mmi' && append MMIFLAGS '-mloongson-mmi' +enabled mmi && check_inline_asm mmi '"pxor $f0, $f0, $f0"' '-mloongson-mmi' && append MMIFLAGS '-mloongson-mmi' if enabled bigendian && enabled msa; then disable msa ___ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog To unsubscribe, visit link above, or email ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".